diff --git a/.gitattributes b/.gitattributes index e77d446ba6..82d852900b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ # do not show up detailed difference on GitHub source/3rdparty/* linguist-generated=true +source/3rdparty/README.md linguist-generated=false diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index f13b187dfb..49918e47ac 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: dropdown diff --git a/.github/ISSUE_TEMPLATE/generic-issue.yml b/.github/ISSUE_TEMPLATE/generic-issue.yml index af9f01c64d..f84097580e 100644 --- a/.github/ISSUE_TEMPLATE/generic-issue.yml +++ b/.github/ISSUE_TEMPLATE/generic-issue.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: textarea diff --git a/.github/labeler.yml b/.github/labeler.yml index b0a85679de..b048574e77 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,39 +1,38 @@ -Python: -- changed-files: - - any-glob-to-any-file: - - deepmd/**/* - - deepmd_utils/**/* - - source/tests/**/* -Docs: -- changed-files: - - any-glob-to-any-file: doc/**/* -Examples: -- changed-files: - - any-glob-to-any-file: examples/**/* -Core: -- changed-files: - - any-glob-to-any-file: source/lib/**/* -CUDA: -- changed-files: - - any-glob-to-any-file: source/lib/src/gpu/**/* -ROCM: -- changed-files: - - any-glob-to-any-file: source/lib/src/gpu/**/* -OP: -- changed-files: - - any-glob-to-any-file: source/op/**/* -C++: -- changed-files: - - any-glob-to-any-file: source/api_cc/**/* -C: -- changed-files: - - any-glob-to-any-file: source/api_c/**/* -LAMMPS: -- changed-files: - - any-glob-to-any-file: source/lmp/**/* -Gromacs: -- changed-files: - - any-glob-to-any-file: source/gmx/**/* -i-Pi: -- changed-files: - - any-glob-to-any-file: source/ipi/**/* +Python: + - changed-files: + - any-glob-to-any-file: + - deepmd/**/* + - source/tests/**/* +Docs: + - changed-files: + - any-glob-to-any-file: doc/**/* +Examples: + - changed-files: + - any-glob-to-any-file: examples/**/* +Core: + - changed-files: + - any-glob-to-any-file: source/lib/**/* +CUDA: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +ROCM: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +OP: + - changed-files: + - any-glob-to-any-file: source/op/**/* +C++: + - changed-files: + - any-glob-to-any-file: source/api_cc/**/* +C: + - changed-files: + - any-glob-to-any-file: source/api_c/**/* +LAMMPS: + - changed-files: + - any-glob-to-any-file: source/lmp/**/* +Gromacs: + - changed-files: + - any-glob-to-any-file: source/gmx/**/* +i-Pi: + - changed-files: + - any-glob-to-any-file: source/ipi/**/* diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index f029517d80..adcb615a0a 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -1,6 +1,12 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Build C++ jobs: buildcc: @@ -27,6 +33,10 @@ jobs: cache: 'pip' - uses: lukka/get-cmake@latest - run: python -m pip install tensorflow + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ @@ -48,13 +58,17 @@ jobs: && sudo apt-get update \ && sudo apt-get install -y rocm-dev hipcub-dev if: matrix.variant == 'rocm' - - run: source/install/build_cc.sh + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: ${{ matrix.dp_variant }} DOWNLOAD_TENSORFLOW: "FALSE" CMAKE_GENERATOR: Ninja if: matrix.variant != 'clang' - - run: source/install/build_cc.sh + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: cpu DOWNLOAD_TENSORFLOW: "FALSE" diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 23076e9bf5..18fd7a1ac1 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -2,7 +2,16 @@ name: Build and upload to PyPI on: push: + branches-ignore: + - "gh-readonly-queue/**" + tags: + - "v*" pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: determine-arm64-runner: @@ -41,12 +50,12 @@ jobs: cuda_version: 11.8 dp_pkg_name: deepmd-kit-cu11 # macos-x86-64 - - os: macos-latest + - os: macos-13 python: 311 platform_id: macosx_x86_64 dp_variant: cpu # macos-arm64 - - os: macos-latest + - os: macos-14 python: 311 platform_id: macosx_arm64 dp_variant: cpu @@ -68,8 +77,20 @@ jobs: - uses: docker/setup-qemu-action@v3 name: Setup QEMU if: matrix.platform_id == 'manylinux_aarch64' && matrix.os == 'ubuntu-latest' + # detect version in advance. See #3168 + - uses: actions/setup-python@v5 + name: Install Python + with: + python-version: '3.11' + cache: 'pip' + if: matrix.dp_pkg_name == 'deepmd-kit-cu11' + - run: | + python -m pip install setuptools_scm + python -c "from setuptools_scm import get_version;print('SETUPTOOLS_SCM_PRETEND_VERSION='+get_version())" >> $GITHUB_ENV + rm -rf .git + if: matrix.dp_pkg_name == 'deepmd-kit-cu11' - name: Build wheels - uses: pypa/cibuildwheel@v2.16 + uses: pypa/cibuildwheel@v2.17 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: all @@ -136,7 +157,7 @@ jobs: path: source/install/docker/dist merge-multiple: true - name: Log in to the Container registry - uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -144,12 +165,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@dbef88086f6cef02e264edb7dbf63250c17cef6c + uses: docker/metadata-action@v5 with: images: ghcr.io/deepmodeling/deepmd-kit - name: Build and push Docker image - uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 + uses: docker/build-push-action@v5 with: context: source/install/docker push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a9a162432c..c912ece8d5 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -2,10 +2,14 @@ name: "CodeQL" on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: schedule: - cron: '45 2 * * 2' - +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: analyze: name: Analyze @@ -37,6 +41,8 @@ jobs: && sudo apt-get update \ && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2 python -m pip install tensorflow + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip env: DEBIAN_FRONTEND: noninteractive # Initializes the CodeQL tools for scanning. @@ -46,7 +52,9 @@ jobs: languages: ${{ matrix.language }} queries: security-extended,security-and-quality - name: "Run, Build Application using script" - run: source/install/build_cc.sh + run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: cuda DOWNLOAD_TENSORFLOW: "FALSE" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 877c780f1f..be43c5cff2 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -11,4 +11,4 @@ jobs: steps: - uses: actions/labeler@v5 with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index 5594c79181..e11f773b3a 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -2,8 +2,15 @@ name: Build C library on: push: + branches-ignore: + - "gh-readonly-queue/**" + tags: + - "v*" pull_request: - + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: build_c: name: Build C library @@ -35,7 +42,7 @@ jobs: - name: Test C library run: ./source/install/docker_test_package_c.sh - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: ${{ matrix.filename }} diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index ef6fade8e5..5c5d260f42 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -1,11 +1,20 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test C++ jobs: testcc: name: Test C++ runs-on: ubuntu-latest + strategy: + matrix: + check_memleak: [true, false] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -18,28 +27,41 @@ jobs: mpi: mpich - uses: lukka/get-cmake@latest - run: python -m pip install tensorflow - - run: source/install/test_cc_local.sh + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip + # https://github.com/actions/runner-images/issues/9491 + - name: Fix kernel mmap rnd bits + run: sudo sysctl vm.mmap_rnd_bits=28 + if: ${{ matrix.check_memleak }} + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LMP_CXX11_ABI_0: 1 CMAKE_GENERATOR: Ninja + CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }} # test lammps # ASE issue: https://gitlab.com/ase/ase/-/merge_requests/2843 # TODO: remove ase version when ase has new release - run: | python -m pip install -U pip - python -m pip install -e .[cpu,test,lmp] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" + python -m pip install -e .[cpu,test,lmp] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: DP_BUILD_TESTING: 1 + if: ${{ !matrix.check_memleak }} - run: pytest --cov=deepmd source/lmp/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib + LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib + if: ${{ !matrix.check_memleak }} # test ipi - run: pytest --cov=deepmd source/ipi/tests env: @@ -47,10 +69,11 @@ jobs: TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 PATH: ${{ github.workspace }}/dp_test/bin:$PATH - LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib - - uses: codecov/codecov-action@v3 - with: - gcov: true + LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib + if: ${{ !matrix.check_memleak }} + - uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} pass: name: Pass testing C++ needs: [testcc] diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index e74c0abde2..15a20d889a 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -4,6 +4,14 @@ on: pull_request: types: - "labeled" + # to let the PR pass the test + - "opened" + - "reopened" + - "synchronize" + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test CUDA jobs: test_cuda: @@ -11,9 +19,9 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: nvidia/cuda:12.2.0-devel-ubuntu22.04 + image: nvidia/cuda:12.3.1-devel-ubuntu22.04 options: --gpus all - if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' + if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group') steps: - name: Make sudo and git work run: apt-get update && apt-get install -y sudo git @@ -27,24 +35,34 @@ jobs: with: mpi: mpich - uses: lukka/get-cmake@latest + with: + useLocalCache: true + useCloudCache: false + - name: Install wget and unzip + run: apt-get update && apt-get install -y wget unzip - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ - && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2 + && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3 if: false # skip as we use nvidia image - - name: Set PyPI mirror for Aliyun cloud machine - run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: python -m pip install "tensorflow>=2.15.0rc0" - - run: python -m pip install -v -e .[gpu,test,lmp,cu12] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" + - run: python -m pip install "tensorflow>=2.15.0rc0" "torch>=2.2.0" + - run: python -m pip install -v -e .[gpu,test,lmp,cu12,torch] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: - DP_BUILD_TESTING: 1 DP_VARIANT: cuda - CUDA_PATH: /usr/local/cuda-12.2 + DP_ENABLE_NATIVE_OPTIMIZATION: 1 - run: dp --version - - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0 - - run: source/install/test_cc_local.sh + - run: python -m pytest source/tests --durations=0 + env: + NUM_WORKERS: 0 + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip -O libtorch.zip + unzip libtorch.zip + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 @@ -53,18 +71,25 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - CUDA_PATH: /usr/local/cuda-12.2 - run: | - export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$GITHUB_WORKSPACE/libtorch/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH - python -m pytest -s --cov=deepmd source/lmp/tests - python -m pytest -s --cov=deepmd source/ipi/tests + python -m pytest source/lmp/tests + python -m pytest source/ipi/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - CUDA_PATH: /usr/local/cuda-12.2 - - uses: codecov/codecov-action@v3 + CUDA_VISIBLE_DEVICES: 0 + pass: + name: Pass testing on CUDA + needs: [test_cuda] + runs-on: ubuntu-latest + if: always() + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 with: - gcov: true + jobs: ${{ toJSON(needs) }} + allowed-skips: test_cuda diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 1bd78bfae0..60b5ecf0e0 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -1,6 +1,12 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test Python jobs: testpython: @@ -9,12 +15,12 @@ jobs: strategy: matrix: include: - - python: 3.7 - tf: 1.14 - python: 3.8 tf: + torch: - python: "3.11" tf: + torch: steps: - uses: actions/checkout@v4 @@ -23,25 +29,26 @@ jobs: python-version: ${{ matrix.python }} cache: 'pip' - uses: mpi4py/setup-mpi@v1 - if: ${{ matrix.tf == '' }} with: mpi: openmpi # https://github.com/pypa/pip/issues/11770 - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -e .[cpu,test] + - run: pip install -e .[cpu,test,torch] env: TENSORFLOW_VERSION: ${{ matrix.tf }} DP_BUILD_TESTING: 1 - run: pip install horovod mpi4py - if: ${{ matrix.tf == '' }} env: HOROVOD_WITH_TENSORFLOW: 1 + HOROVOD_WITHOUT_PYTORCH: 1 HOROVOD_WITHOUT_GLOO: 1 - run: dp --version - - run: pytest --cov=deepmd --cov=deepmd_utils source/tests --durations=0 - - uses: codecov/codecov-action@v3 - with: - gcov: true + - run: pytest --cov=deepmd source/tests --durations=0 + env: + NUM_WORKERS: 0 + - uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} pass: name: Pass testing Python needs: [testpython] diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml new file mode 100644 index 0000000000..2608bb1071 --- /dev/null +++ b/.github/workflows/todo.yml @@ -0,0 +1,20 @@ +name: TODO workflow +on: + push: + branches: + - devel +jobs: + build: + if: github.repository_owner == 'deepmodeling' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run tdg-github-action + uses: ribtoks/tdg-github-action@master + with: + TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + SHA: ${{ github.sha }} + REF: ${{ github.ref }} + EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*" + COMMENT_ON_ISSUES: 1 diff --git a/.gitignore b/.gitignore index 82d3e4a7da..5e30cf3167 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.bz2 *.pyc *.pb +*.DS_Store tmp* CMakeCache.txt CMakeFiles diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4e89f1129..f75d0db7ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,119 +1,121 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - - id: trailing-whitespace + - id: trailing-whitespace exclude: "^.+\\.pbtxt$" - - id: end-of-file-fixer + - id: end-of-file-fixer exclude: "^.+\\.pbtxt$" - - id: check-yaml - - id: check-json - - id: check-added-large-files - args: ['--maxkb=1024', '--enforce-all'] - # TODO: remove the following after resolved + - id: check-yaml + - id: check-json + - id: check-added-large-files + args: ["--maxkb=1024", "--enforce-all"] exclude: | - (?x)^( - source/tests/infer/dipolecharge_e.pbtxt| - source/tests/infer/deeppolar_new.pbtxt - )$ - - id: check-merge-conflict - - id: check-symlinks - - id: check-toml -# Python -- repo: https://github.com/PyCQA/isort + (?x)^( + source/tests/infer/dipolecharge_e.pbtxt| + source/tests/infer/deeppolar_new.pbtxt + )$ + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + # Python + - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - - id: isort - files: \.py$ - exclude: ^source/3rdparty -- repo: https://github.com/astral-sh/ruff-pre-commit + - id: isort + files: \.py$ + exclude: ^source/3rdparty + - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.13 + rev: v0.3.4 hooks: - - id: ruff - args: ["--fix"] - exclude: ^source/3rdparty - types_or: [python, pyi, jupyter] - - id: ruff-format - exclude: ^source/3rdparty - types_or: [python, pyi, jupyter] -# numpydoc -- repo: https://github.com/Carreau/velin + - id: ruff + args: ["--fix"] + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + - id: ruff-format + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + # numpydoc + - repo: https://github.com/Carreau/velin rev: 0.0.12 hooks: - - id: velin - args: ["--write"] - exclude: ^source/3rdparty -# Python inside docs -- repo: https://github.com/asottile/blacken-docs + - id: velin + args: ["--write"] + exclude: ^source/3rdparty + # Python inside docs + - repo: https://github.com/asottile/blacken-docs rev: 1.16.0 hooks: - - id: blacken-docs -# C++ -- repo: https://github.com/pre-commit/mirrors-clang-format - rev: v17.0.6 + - id: blacken-docs + # C++ + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.2 hooks: - - id: clang-format + - id: clang-format exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc -# CSS -- repo: https://github.com/pre-commit/mirrors-csslint - rev: v1.0.5 + # markdown, yaml, CSS, javascript + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + types_or: [markdown, yaml, css] + # workflow files cannot be modified by pre-commit.ci + exclude: ^(source/3rdparty|\.github/workflows|\.clang-format) + # Shell + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.8.0-1 + hooks: + - id: shfmt + # CMake + - repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + #- id: cmake-lint + # license header + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 hooks: - - id: csslint -# Shell -- repo: https://github.com/scop/pre-commit-shfmt - rev: v3.7.0-4 - hooks: - - id: shfmt -# CMake -- repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 - hooks: - - id: cmake-format - #- id: cmake-lint -# license header -- repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.5.4 - hooks: - # C++, js - - id: insert-license + # C++, js + - id: insert-license files: \.(c|cc|cpp|js|ts|h|hpp)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - // - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - // + - --no-extra-eol exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc - # CSS - - id: insert-license + # CSS + - id: insert-license files: \.(css|scss)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - /*| *| */ - - --no-extra-eol - # Python - - id: insert-license + - --license-filepath + - .license-header.txt + - --comment-style + - /*| *| */ + - --no-extra-eol + # Python + - id: insert-license files: \.(py|pyx)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - "#" - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - "#" + - --no-extra-eol exclude: ^source/3rdparty - # HTML - - id: insert-license + # HTML + - id: insert-license files: \.(html|vue|xml)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - + - --no-extra-eol ci: autoupdate_branch: devel diff --git a/CITATIONS.bib b/CITATIONS.bib index ac682b28f7..425c00ac42 100644 --- a/CITATIONS.bib +++ b/CITATIONS.bib @@ -105,6 +105,25 @@ @misc{Zhang_2022_DPA1 doi = {10.48550/arXiv.2208.08236}, } +@misc{Zhang_2023_DPA2, + annote = {DPA-2}, + author = {Duo Zhang and Xinzijian Liu and Xiangyu Zhang and Chengqian Zhang and + Chun Cai and Hangrui Bi and Yiming Du and Xuejian Qin and Jiameng Huang + and Bowen Li and Yifan Shan and Jinzhe Zeng and Yuzhi Zhang and Siyuan + Liu and Yifan Li and Junhan Chang and Xinyan Wang and Shuo Zhou and + Jianchuan Liu and Xiaoshan Luo and Zhenyu Wang and Wanrun Jiang and Jing + Wu and Yudi Yang and Jiyuan Yang and Manyi Yang and Fu-Qiang Gong and + Linshuang Zhang and Mengchao Shi and Fu-Zhi Dai and Darrin M. York and + Shi Liu and Tong Zhu and Zhicheng Zhong and Jian Lv and Jun Cheng and + Weile Jia and Mohan Chen and Guolin Ke and Weinan E and Linfeng Zhang + and Han Wang}, + title = {{DPA-2: Towards a universal large atomic model for molecular and material + simulation}}, + publisher = {arXiv}, + year = {2023}, + doi = {10.48550/arXiv.2312.15492}, +} + @article{Zhang_PhysPlasmas_2020_v27_p122704, annote = {frame-specific parameters (e.g. electronic temperature)}, author = {Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, Han and Chen, Mohan}, diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e43e23beb6..cb08609c2b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,7 @@ Welcome to [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit)! You can either make a code contribution, help improve our document or offer help to other users. Your help is always appreciated. Come and have fun! ### Code contribution + You can start from any one of the following items to help improve deepmd-kit - Smash a bug @@ -18,6 +19,7 @@ See [here](#before-you-contribute) for some before-hand heads-up. See [here](#how-to-contribute) to learn how to contribute. ### Document improvement + You can start from any one of the following items to help improve [DeePMD-kit Docs](https://deepmd.readthedocs.io/en/latest/?badge=latest): - Fix typos or format (punctuation, space, indentation, code block, etc.) @@ -26,21 +28,27 @@ You can start from any one of the following items to help improve [DeePMD-kit Do - Translate docs changes from English to Chinese ### Offer help + You can help other users of deepmd-kit in the following way - Submit, reply to, and resolve [issues](https://github.com/deepmodeling/deepmd-kit/issues) - (Advanced) Review Pull Requests created by others ## Before you contribute + ### Overview of DeePMD-kit + Currently, we maintain two main branch: + - master: stable branch with version tag -- devel : branch for developers +- devel : branch for developers ### Developer guide -See [here](doc/development/index.md) for coding conventions, API and other needs-to-know of the code. + +See [documentation](https://deepmd.readthedocs.io/) for coding conventions, API and other needs-to-know of the code. ## How to contribute + Please perform the following steps to create your Pull Request to this repository. If don't like to use commands, you can also use [GitHub Desktop](https://desktop.github.com/), which is easier to get started. Go to [git documentation](https://git-scm.com/doc) if you want to really master git. ### Step 1: Fork the repository @@ -51,79 +59,82 @@ Please perform the following steps to create your Pull Request to this repositor ### Step 2: Clone the forked repository to local storage and set configurations 1. Clone your own repo, not the public repo (from deepmodeling) ! And change the branch to devel. - ```bash - git clone https://github.com/$username/deepmd-kit.git - # Replace `$username` with your GitHub ID - git checkout devel - ``` + ```bash + git clone https://github.com/$username/deepmd-kit.git + # Replace `$username` with your GitHub ID + + git checkout devel + ``` 2. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation. - ```bash - git remote add upstream https://github.com/deepmodeling/deepmd-kit.git - # After you add a remote repo, your local repo will be automatically named "origin". - git fetch upstream + ```bash + git remote add upstream https://github.com/deepmodeling/deepmd-kit.git + # After you add a remote repo, your local repo will be automatically named "origin". - # If your current codes are behind the latest codes, you should merge latest codes first. - # Notice you should merge from "devel"! - git merge upstream/devel - ``` + git fetch upstream + + # If your current codes are behind the latest codes, you should merge latest codes first. + # Notice you should merge from "devel"! + git merge upstream/devel + ``` 3. Modify your codes and design unit tests. 4. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` + + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` 5. Push the changed codes to your original repo on github. - ```bash - git push origin devel - ``` + ```bash + git push origin devel + ``` ### Alternatively: Create a new branch 1. Get your local master up-to-date with upstream/master. - ```bash - cd $working_dir/deepmd-kit - git fetch upstream - git checkout master - git rebase upstream/master - ``` + ```bash + cd $working_dir/deepmd-kit + git fetch upstream + git checkout master + git rebase upstream/master + ``` 2. Create a new branch based on the master branch. - ```bash - git checkout -b new-branch-name - ``` + ```bash + git checkout -b new-branch-name + ``` 3. Modify your codes and design unit tests. 4. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` 5. Keep your branch in sync with upstream/master - ```bash - # While on your new branch - git fetch upstream - git rebase upstream/master - ``` + ```bash + # While on your new branch + git fetch upstream + git rebase upstream/master + ``` 6. Push your changes to the remote - ```bash - git push -u origin new-branch-name # "-u" is used to track the remote branch from origin - ``` + ```bash + git push -u origin new-branch-name # "-u" is used to track the remote branch from origin + ``` ### Step 3: Create a pull request @@ -133,4 +144,5 @@ Please perform the following steps to create your Pull Request to this repositor Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to DeePMD-kit. ## Contact us + E-mail: contact@deepmodeling.org diff --git a/README.md b/README.md index 81fdead098..3838f2596a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ [DeePMD-kit logo](./doc/logo.md) --------------------------------------------------------------------------------- +--- + +# DeePMD-kit -DeePMD-kit Manual -======== [![GitHub release](https://img.shields.io/github/release/deepmodeling/deepmd-kit.svg?maxAge=86400)](https://github.com/deepmodeling/deepmd-kit/releases) [![offline packages](https://img.shields.io/github/downloads/deepmodeling/deepmd-kit/total?label=offline%20packages)](https://github.com/deepmodeling/deepmd-kit/releases) [![conda-forge](https://img.shields.io/conda/dn/conda-forge/deepmd-kit?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/deepmd-kit) @@ -11,168 +11,93 @@ [![docker pull](https://img.shields.io/docker/pulls/deepmodeling/deepmd-kit)](https://hub.docker.com/r/deepmodeling/deepmd-kit) [![Documentation Status](https://readthedocs.org/projects/deepmd/badge/)](https://deepmd.readthedocs.io/) -# Table of contents -- [About DeePMD-kit](#about-deepmd-kit) - - [Highlights in v2.0](#highlights-in-deepmd-kit-v2.0) - - [Highlighted features](#highlighted-features) - - [License and credits](#license-and-credits) - - [Deep Potential in a nutshell](#deep-potential-in-a-nutshell) -- [Download and install](#download-and-install) -- [Use DeePMD-kit](#use-deepmd-kit) -- [Code structure](#code-structure) -- [Troubleshooting](#troubleshooting) - -# About DeePMD-kit +## About DeePMD-kit + DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems. For more information, check the [documentation](https://deepmd.readthedocs.io/). -# Highlights in DeePMD-kit v2.0 -* [Model compression](doc/freeze/compress.md). Accelerate the efficiency of model inference 4-15 times. -* [New descriptors](doc/model/overall.md). Including [`se_e2_r`](doc/model/train-se-e2-r.md) and [`se_e3`](doc/model/train-se-e3.md). -* [Hybridization of descriptors](doc/model/train-hybrid.md). Hybrid descriptor constructed from the concatenation of several descriptors. -* [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom-type embedding to decline training complexity and refine performance. -* Training and inference of the dipole (vector) and polarizability (matrix). -* Split of training and validation dataset. -* Optimized training on GPUs. - -## Highlighted features -* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition, Tensorboard can be used to visualize training procedures. -* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively. -* **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, insulators, etc. -* **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. -* **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. - -## License and credits +### Highlighted features + +- **interfaced with multiple backends**, including TensorFlow and PyTorch, the most popular deep learning frameworks, making the training process highly automatic and efficient. +- **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABUCUS. +- **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc. +- **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. +- **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. + +### License and credits + The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE). If you use this code in any future publications, please cite the following publications for general purpose: + - Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184. -[![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016) -[![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) + [![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016) + [![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) - Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang. "DeePMD-kit v2: A software package for deep potential models." J. Chem. Phys. 159 (2023): 054801. -[![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600) -[![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600) + [![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600) + [![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600) In addition, please follow [the bib file](CITATIONS.bib) to cite the methods you used. -## Deep Potential in a nutshell -The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called *atomic energy*. Summing up all the atomic energies gives the potential energy of the system. +### Highlights in major versions + +#### Initial version -The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical *ab initio* molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. +The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called _atomic energy_. Summing up all the atomic energies gives the potential energy of the system. + +The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical _ab initio_ molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. Although highly efficient, the original Deep Potential model satisfies the extensive and symmetry-invariant properties of a potential energy model at the price of introducing discontinuities in the model. This has negligible influence on a trajectory from canonical sampling but might not be sufficient for calculations of dynamical and mechanical properties. These points motivated us to develop the Deep Potential-Smooth Edition ([DeepPot-SE][3]) model, which replaces the non-smooth local frame with a smooth and adaptive embedding network. DeepPot-SE shows great ability in modeling many kinds of systems that are of interest in the fields of physics, chemistry, biology, and materials science. In addition to building up potential energy models, DeePMD-kit can also be used to build up coarse-grained models. In these models, the quantity that we want to parameterize is the free energy, or the coarse-grained potential, of the coarse-grained particles. See the [DeePCG paper][4] for more details. -See [our latest paper](https://doi.org/10.48550/arXiv.2304.09409) for details of all features. - -# Download and install - -Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). - -DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker). - -One may manually install DeePMD-kit by following the instructions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS, i-PI or GROMACS. - - -# Use DeePMD-kit - -A quick start on using DeePMD-kit can be found [here](doc/getting-started/quick_start.ipynb). - -A full [document](doc/train/train-input-auto.rst) on options in the training input script is available. - -# Advanced - -- [Installation](doc/install/index.md) - - [Easy install](doc/install/easy-install.md) - - [Install from source code](doc/install/install-from-source.md) - - [Install from pre-compiled C library](doc/install/install-from-c-library.md) - - [Install LAMMPS](doc/install/install-lammps.md) - - [Install i-PI](doc/install/install-ipi.md) - - [Install GROMACS](doc/install/install-gromacs.md) - - [Building conda packages](doc/install/build-conda.md) - - [Install Node.js interface](doc/install/install-nodejs.md) - - [Easy install the latest development version](doc/install/easy-install-dev.md) -- [Data](doc/data/index.md) - - [System](doc/data/system.md) - - [Formats of a system](doc/data/data-conv.md) - - [Prepare data with dpdata](doc/data/dpdata.md) -- [Model](doc/model/index.md) - - [Overall](doc/model/overall.md) - - [Descriptor `"se_e2_a"`](doc/model/train-se-e2-a.md) - - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md) - - [Descriptor `"se_e3"`](doc/model/train-se-e3.md) - - [Descriptor `"se_atten"`](doc/model/train-se-atten.md) - - [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2) - - [Descriptor `"hybrid"`](doc/model/train-hybrid.md) - - [Descriptor `sel`](doc/model/sel.md) - - [Fit energy](doc/model/train-energy.md) - - [Fit spin energy](doc/model/train-energy-spin.md) - - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md) - - [Fit electronic density of states (DOS)](doc/model/train-fitting-dos.md) - - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md) - - [Deep potential long-range](doc/model/dplr.md) - - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md) - - [Linear model](doc/model/linear.md) - - [Interpolation or combination with a pairwise potential](doc/model/pairtab.md) -- [Training](doc/train/index.md) - - [Training a model](doc/train/training.md) - - [Advanced options](doc/train/training-advanced.md) - - [Parallel training](doc/train/parallel-training.md) - - [Multi-task training](doc/train/multi-task-training.md) - - [TensorBoard Usage](doc/train/tensorboard.md) - - [Known limitations of using GPUs](doc/train/gpu-limitations.md) - - [Training Parameters](doc/train-input-auto.rst) -- [Freeze and Compress](doc/freeze/index.rst) - - [Freeze a model](doc/freeze/freeze.md) - - [Compress a model](doc/freeze/compress.md) -- [Test](doc/test/index.rst) - - [Test a model](doc/test/test.md) - - [Calculate Model Deviation](doc/test/model-deviation.md) -- [Inference](doc/inference/index.rst) - - [Python interface](doc/inference/python.md) - - [C++ interface](doc/inference/cxx.md) - - [Node.js interface](doc/inference/nodejs.md) -- [Integrate with third-party packages](doc/third-party/index.rst) - - [Use deep potential with ASE](doc/third-party/ase.md) - - [Run MD with LAMMPS](doc/third-party/lammps-command.md) - - [Run path-integral MD with i-PI](doc/third-party/ipi.md) - - [Run MD with GROMACS](doc/third-party/gromacs.md) - - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md) -- [Use NVNMD](doc/nvnmd/index.md) - -# Code structure +#### v1 + +- Code refactor to make it highly modularized. +- GPU support for descriptors. + +#### v2 + +- Model compression. Accelerate the efficiency of model inference 4-15 times. +- New descriptors. Including `se_e2_r`, `se_e3`, and `se_atten` (DPA-1). +- Hybridization of descriptors. Hybrid descriptor constructed from the concatenation of several descriptors. +- Atom type embedding. Enable atom-type embedding to decline training complexity and refine performance. +- Training and inference of the dipole (vector) and polarizability (matrix). +- Split of training and validation dataset. +- Optimized training on GPUs, including CUDA and ROCm. +- Non-von-Neumann. +- C API to interface with the third-party packages. + +See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all features until v2.2.3. + +#### v3 + +- Multiple backends supported. Add a PyTorch backend. +- The DPA-2 model. + +## Install and use DeePMD-kit + +Please read the [online documentation](https://deepmd.readthedocs.io/) for how to install and use DeePMD-kit. + +## Code structure The code is organized as follows: -* `data/raw`: tools manipulating the raw data files. -* `examples`: examples. -* `deepmd`: DeePMD-kit python modules. -* `source/api_cc`: source code of DeePMD-kit C++ API. -* `source/ipi`: source code of i-PI client. -* `source/lib`: source code of DeePMD-kit library. -* `source/lmp`: source code of Lammps module. -* `source/gmx`: source code of Gromacs plugin. -* `source/op`: TensorFlow op implementation. working with the library. - - -# Troubleshooting - -- [Model compatibility](doc/troubleshooting/model_compatability.md) -- [Installation](doc/troubleshooting/installation.md) -- [The temperature undulates violently during the early stages of MD](doc/troubleshooting/md_energy_undulation.md) -- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](doc/troubleshooting/md_version_compatibility.md) -- [Do we need to set rcut < half boxsize?](doc/troubleshooting/howtoset_rcut.md) -- [How to set sel?](doc/troubleshooting/howtoset_sel.md) -- [How to control the parallelism of a job?](doc/troubleshooting/howtoset_num_nodes.md) -- [How to tune Fitting/embedding-net size?](doc/troubleshooting/howtoset_netsize.md) -- [Why does a model have low precision?](doc/troubleshooting/precision.md) +- `examples`: examples. +- `deepmd`: DeePMD-kit python modules. +- `source/lib`: source code of the core library. +- `source/op`: Operator (OP) implementation. +- `source/api_cc`: source code of DeePMD-kit C++ API. +- `source/api_c`: source code of the C API. +- `source/nodejs`: source code of the Node.js API. +- `source/ipi`: source code of i-PI client. +- `source/lmp`: source code of Lammps module. +- `source/gmx`: source code of Gromacs plugin. # Contributing See [DeePMD-kit Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓 - [1]: https://arxiv.org/abs/1707.01478 [2]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 [3]: https://arxiv.org/abs/1805.09003 diff --git a/backend/dp_backend.py b/backend/dp_backend.py index d28afdb239..2ca0ff2f93 100644 --- a/backend/dp_backend.py +++ b/backend/dp_backend.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """A PEP-517 backend to find TensorFlow.""" + from typing import ( List, ) diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py index ab955c3cf8..2a66ff065c 100644 --- a/backend/dynamic_metadata.py +++ b/backend/dynamic_metadata.py @@ -1,4 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import sys +from pathlib import ( + Path, +) from typing import ( Dict, List, @@ -12,6 +16,11 @@ get_argument_from_env, ) +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + __all__ = ["dynamic_metadata"] @@ -22,70 +31,24 @@ def __dir__() -> List[str]: def dynamic_metadata( field: str, settings: Optional[Dict[str, object]] = None, -) -> str: +): assert field in ["optional-dependencies", "entry-points", "scripts"] _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env() + with Path("pyproject.toml").open("rb") as f: + pyproject = tomllib.load(f) + if field == "scripts": return { - "dp": "deepmd_utils.main:main", + **pyproject["tool"]["deepmd_build_backend"]["scripts"], **extra_scripts, } elif field == "optional-dependencies": + optional_dependencies = pyproject["tool"]["deepmd_build_backend"][ + "optional-dependencies" + ] + optional_dependencies["lmp"].extend(find_libpython_requires) + optional_dependencies["ipi"].extend(find_libpython_requires) return { - "test": [ - "dpdata>=0.1.9", - "ase", - "pytest", - "pytest-cov", - "pytest-sugar", - "dpgui", - ], - "docs": [ - "sphinx>=3.1.1", - "sphinx_rtd_theme>=1.0.0rc1", - "sphinx_markdown_tables", - "myst-nb>=1.0.0rc0", - "myst-parser>=0.19.2", - "breathe", - "exhale", - "numpydoc", - "ase", - "deepmodeling-sphinx>=0.1.0", - "dargs>=0.3.4", - "sphinx-argparse", - "pygments-lammps", - "sphinxcontrib-bibtex", - ], - "lmp": [ - "lammps~=2023.8.2.2.0", - *find_libpython_requires, - ], - "ipi": [ - "i-PI", - *find_libpython_requires, - ], - "gui": [ - "dpgui", - ], + **optional_dependencies, **get_tf_requirement(tf_version), - "cu11": [ - "nvidia-cuda-runtime-cu11", - "nvidia-cublas-cu11", - "nvidia-cufft-cu11", - "nvidia-curand-cu11", - "nvidia-cusolver-cu11", - "nvidia-cusparse-cu11", - "nvidia-cudnn-cu11", - "nvidia-cuda-nvcc-cu11", - ], - "cu12": [ - "nvidia-cuda-runtime-cu12", - "nvidia-cublas-cu12", - "nvidia-cufft-cu12", - "nvidia-curand-cu12", - "nvidia-cusolver-cu12", - "nvidia-cusparse-cu12", - "nvidia-cudnn-cu12", - "nvidia-cuda-nvcc-cu12", - ], } diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py new file mode 100644 index 0000000000..f039b6f289 --- /dev/null +++ b/backend/find_pytorch.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import os +import site +from functools import ( + lru_cache, +) +from importlib.machinery import ( + FileFinder, +) +from importlib.util import ( + find_spec, +) +from pathlib import ( + Path, +) +from sysconfig import ( + get_path, +) +from typing import ( + Optional, +) + + +@lru_cache +def find_pytorch() -> Optional[str]: + """Find PyTorch library. + + Tries to find PyTorch in the order of: + + 1. Environment variable `PYTORCH_ROOT` if set + 2. The current Python environment. + 3. user site packages directory if enabled + 4. system site packages directory (purelib) + + Considering the default PyTorch package still uses old CXX11 ABI, we + cannot install it automatically. + + Returns + ------- + str, optional + PyTorch library path if found. + """ + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0": + return None + pt_spec = None + + if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None: + site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute() + pt_spec = FileFinder(str(site_packages)).find_spec("torch") + + # get pytorch spec + # note: isolated build will not work for backend + if pt_spec is None or not pt_spec: + pt_spec = find_spec("torch") + + if not pt_spec and site.ENABLE_USER_SITE: + # first search TF from user site-packages before global site-packages + site_packages = site.getusersitepackages() + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + if not pt_spec: + # purelib gets site-packages path + site_packages = get_path("purelib") + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + # get install dir from spec + try: + pt_install_dir = pt_spec.submodule_search_locations[0] # type: ignore + # AttributeError if ft_spec is None + # TypeError if submodule_search_locations are None + # IndexError if submodule_search_locations is an empty list + except (AttributeError, TypeError, IndexError): + pt_install_dir = None + return pt_install_dir diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 08a73f7252..4d63f3118d 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -28,7 +28,7 @@ ) -@lru_cache() +@lru_cache def find_tensorflow() -> Tuple[Optional[str], List[str]]: """Find TensorFlow library. @@ -47,15 +47,11 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: list of str TensorFlow requirement if not found. Empty if found. """ + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "0": + return None, [] requires = [] tf_spec = None - if os.environ.get("CIBUILDWHEEL", "0") == "1" and os.environ.get( - "CIBW_BUILD", "" - ).endswith("macosx_arm64"): - # cibuildwheel cross build - site_packages = Path(os.environ.get("RUNNER_TEMP")) / "tensorflow" - tf_spec = FileFinder(str(site_packages)).find_spec("tensorflow") if (tf_spec is None or not tf_spec) and os.environ.get( "TENSORFLOW_ROOT" @@ -87,6 +83,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # TypeError if submodule_search_locations are None # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): + tf_version = "" if os.environ.get("CIBUILDWHEEL", "0") == "1": cuda_version = os.environ.get("CUDA_VERSION", "12.2") if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): @@ -103,15 +100,16 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'", ] ) + tf_version = "2.14.1" else: raise RuntimeError("Unsupported CUDA version") - requires.extend(get_tf_requirement()["cpu"]) + requires.extend(get_tf_requirement(tf_version)["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None return tf_install_dir, requires -@lru_cache() +@lru_cache def get_tf_requirement(tf_version: str = "") -> dict: """Get TensorFlow requirement (CPU) when TF is not installed. @@ -127,6 +125,12 @@ def get_tf_requirement(tf_version: str = "") -> dict: dict TensorFlow requirement, including cpu and gpu. """ + if tf_version is None: + return { + "cpu": [], + "gpu": [], + "mpi": [], + } if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") @@ -134,6 +138,11 @@ def get_tf_requirement(tf_version: str = "") -> dict: extra_select = {} if not (tf_version == "" or tf_version in SpecifierSet(">=2.12", prereleases=True)): extra_requires.append("protobuf<3.20") + # keras 3 is not compatible with tf.compat.v1 + if tf_version == "" or tf_version in SpecifierSet(">=2.15.0rc0", prereleases=True): + extra_requires.append("tf-keras; python_version>='3.9'") + # only TF>=2.16 is compatible with Python 3.12 + extra_requires.append("tf-keras>=2.16.0rc0; python_version>='3.12'") if tf_version == "" or tf_version in SpecifierSet(">=1.15", prereleases=True): extra_select["mpi"] = [ "horovod", @@ -189,7 +198,7 @@ def get_tf_requirement(tf_version: str = "") -> dict: } -@lru_cache() +@lru_cache def get_tf_version(tf_path: Union[str, Path]) -> str: """Get TF version from a TF Python library path. diff --git a/backend/read_env.py b/backend/read_env.py index 079211d4d7..c97c854a13 100644 --- a/backend/read_env.py +++ b/backend/read_env.py @@ -13,13 +13,16 @@ Version, ) +from .find_pytorch import ( + find_pytorch, +) from .find_tensorflow import ( find_tensorflow, get_tf_version, ) -@lru_cache() +@lru_cache def get_argument_from_env() -> Tuple[str, list, list, dict, str]: """Get the arguments from environment variables. @@ -78,18 +81,41 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: cmake_args.append(f"-DLAMMPS_VERSION={dp_lammps_version}") if dp_ipi == "1": cmake_args.append("-DENABLE_IPI:BOOL=TRUE") - extra_scripts["dp_ipi"] = "deepmd.entrypoints.ipi:dp_ipi" + extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi" - tf_install_dir, _ = find_tensorflow() - tf_version = get_tf_version(tf_install_dir) - if tf_version == "" or Version(tf_version) >= Version("2.12"): + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1": + tf_install_dir, _ = find_tensorflow() + tf_version = get_tf_version(tf_install_dir) + if tf_version == "" or Version(tf_version) >= Version("2.12"): + find_libpython_requires = [] + else: + find_libpython_requires = ["find_libpython"] + cmake_args.extend( + [ + "-DENABLE_TENSORFLOW=ON", + f"-DTENSORFLOW_VERSION={tf_version}", + f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", + ] + ) + else: find_libpython_requires = [] + cmake_args.append("-DENABLE_TENSORFLOW=OFF") + tf_version = None + + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1": + pt_install_dir = find_pytorch() + if pt_install_dir is None: + raise RuntimeError("Cannot find installed PyTorch.") + cmake_args.extend( + [ + "-DENABLE_PYTORCH=ON", + f"-DCMAKE_PREFIX_PATH={pt_install_dir}", + ] + ) else: - find_libpython_requires = ["find_libpython"] - cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}") + cmake_args.append("-DENABLE_PYTORCH=OFF") cmake_args = [ - f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", "-DBUILD_PY_IF:BOOL=TRUE", *cmake_args, ] diff --git a/codecov.yml b/codecov.yml index 3654859423..8f639ec037 100644 --- a/codecov.yml +++ b/codecov.yml @@ -20,7 +20,6 @@ component_management: name: Python paths: - deepmd/** - - deepmd_utils/** - component_id: module_op name: OP paths: diff --git a/data/raw/copy_raw.py b/data/raw/copy_raw.py index 642865db86..69ccdf5c63 100755 --- a/data/raw/copy_raw.py +++ b/data/raw/copy_raw.py @@ -85,7 +85,7 @@ def _main(): ) args = parser.parse_args() - print("# copy the system by %s copies" % args.ncopies) + print("# copy the system by %s copies" % args.ncopies) # noqa: T201 assert np.all( np.array(args.ncopies, dtype=int) >= np.array([1, 1, 1], dtype=int) ), "number of copies should be larger than or equal to 1" diff --git a/data/raw/shuffle_raw.py b/data/raw/shuffle_raw.py index 51bb7466c9..b4fc1457e5 100755 --- a/data/raw/shuffle_raw.py +++ b/data/raw/shuffle_raw.py @@ -37,7 +37,7 @@ def _main(): outpath = args.OUTPUT if not os.path.isdir(inpath): - print("# no input dir " + inpath + ", exit") + print("# no input dir " + inpath + ", exit") # noqa: T201 return if not os.path.isdir(outpath): @@ -47,16 +47,16 @@ def _main(): raws = detect_raw(inpath) if len(raws) == 0: - print("# no file to shuffle, exit") + print("# no file to shuffle, exit") # noqa: T201 return assert "box.raw" in raws tmp = np.loadtxt(os.path.join(inpath, "box.raw")) tmp = np.reshape(tmp, [-1, 9]) nframe = tmp.shape[0] - print(nframe) + print(nframe) # noqa: T201 - print( + print( # noqa: T201 "# will shuffle raw files " + str(raws) + " in dir " diff --git a/deepmd/__init__.py b/deepmd/__init__.py index 0190bbc124..1ce4beb723 100644 --- a/deepmd/__init__.py +++ b/deepmd/__init__.py @@ -1,61 +1,45 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Root of the deepmd package, exposes all public classes and submodules.""" +"""DeePMD-kit is a package written in Python/C++, designed to +minimize the effort required to build deep learning-based model +of interatomic potential energy and force field and to perform +molecular dynamics (MD). -try: - from importlib import ( - metadata, - ) -except ImportError: # for Python<3.8 - import importlib_metadata as metadata - -import deepmd.utils.network as network - -from . import ( - cluster, - descriptor, - fit, - loss, - nvnmd, - utils, -) -from .env import ( - set_mkl, -) -from .infer import ( - DeepEval, - DeepPotential, -) -from .infer.data_modifier import ( - DipoleChargeModifier, -) - -set_mkl() +The top module (deepmd.__init__) should not import any third-party +modules for performance. +""" try: - from deepmd_utils._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: from .__about__ import ( __version__, ) -# load third-party plugins -try: - eps = metadata.entry_points(group="deepmd") -except TypeError: - eps = metadata.entry_points().get("deepmd", []) -for ep in eps: - ep.load() + +def DeepPotential(*args, **kwargs): + """Factory function that forwards to DeepEval (for compatbility + and performance). + + Parameters + ---------- + *args + positional arguments + **kwargs + keyword arguments + + Returns + ------- + DeepEval + potentials + """ + from deepmd.infer import ( + DeepPotential, + ) + + return DeepPotential(*args, **kwargs) + __all__ = [ "__version__", - "descriptor", - "fit", - "loss", - "utils", - "cluster", - "network", - "DeepEval", "DeepPotential", - "DipoleChargeModifier", - "nvnmd", ] diff --git a/deepmd/__main__.py b/deepmd/__main__.py index 6026b1c269..a31379b5e3 100644 --- a/deepmd/__main__.py +++ b/deepmd/__main__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Package dp entry point.""" -from .entrypoints.main import ( +from deepmd.main import ( main, ) diff --git a/deepmd/backend/__init__.py b/deepmd/backend/__init__.py new file mode 100644 index 0000000000..2b3f24c5ed --- /dev/null +++ b/deepmd/backend/__init__.py @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Backends. + +Avoid directly importing third-party libraries in this module for performance. +""" + +# copy from dpdata +from importlib import ( + import_module, + metadata, +) +from pathlib import ( + Path, +) + +PACKAGE_BASE = "deepmd.backend" +NOT_LOADABLE = ("__init__.py",) + +for module_file in Path(__file__).parent.glob("*.py"): + if module_file.name not in NOT_LOADABLE: + module_name = f".{module_file.stem}" + import_module(module_name, PACKAGE_BASE) + +# https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html +try: + eps = metadata.entry_points(group="deepmd.backend") +except TypeError: + eps = metadata.entry_points().get("deepmd.backend", []) +for ep in eps: + plugin = ep.load() diff --git a/deepmd/backend/backend.py b/deepmd/backend/backend.py new file mode 100644 index 0000000000..8f7bca319e --- /dev/null +++ b/deepmd/backend/backend.py @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + abstractmethod, +) +from enum import ( + Flag, + auto, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + Dict, + List, + Type, +) + +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +class Backend(PluginVariant, make_plugin_registry("backend")): + r"""General backend class. + + Examples + -------- + >>> @Backend.register("tf") + >>> @Backend.register("tensorflow") + >>> class TensorFlowBackend(Backend): + ... pass + """ + + @staticmethod + def get_backend(key: str) -> Type["Backend"]: + """Get the backend by key. + + Parameters + ---------- + key : str + the key of a backend + + Returns + ------- + Backend + the backend + """ + return Backend.get_class_by_type(key) + + @staticmethod + def get_backends() -> Dict[str, Type["Backend"]]: + """Get all the registered backend names. + + Returns + ------- + list + all the registered backends + """ + return Backend.get_plugins() + + @staticmethod + def get_backends_by_feature( + feature: "Backend.Feature", + ) -> Dict[str, Type["Backend"]]: + """Get all the registered backend names with a specific feature. + + Parameters + ---------- + feature : Backend.Feature + the feature flag + + Returns + ------- + list + all the registered backends with the feature + """ + return { + key: backend + for key, backend in Backend.get_backends().items() + if backend.features & feature + } + + @staticmethod + def detect_backend_by_model(filename: str) -> Type["Backend"]: + """Detect the backend of the given model file. + + Parameters + ---------- + filename : str + The model file name + """ + filename = str(filename).lower() + for backend in Backend.get_backends().values(): + for suffix in backend.suffixes: + if filename.endswith(suffix): + return backend + raise ValueError(f"Cannot detect the backend of the model file {filename}.") + + class Feature(Flag): + """Feature flag to indicate whether the backend supports certain features.""" + + ENTRY_POINT = auto() + """Support entry point hook.""" + DEEP_EVAL = auto() + """Support Deep Eval backend.""" + NEIGHBOR_STAT = auto() + """Support neighbor statistics.""" + IO = auto() + """Support IO hook.""" + + name: ClassVar[str] = "Unknown" + """The formal name of the backend. + + To be consistent, this name should be also registered in the plugin system.""" + + features: ClassVar[Feature] = Feature(0) + """The features of the backend.""" + suffixes: ClassVar[List[str]] = [] + """The supported suffixes of the saved model. + + The first element is considered as the default suffix.""" + + @abstractmethod + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + + @property + @abstractmethod + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + pass + + @property + @abstractmethod + def deep_eval(self) -> Type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + pass + + @property + @abstractmethod + def neighbor_stat(self) -> Type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + pass + + @property + @abstractmethod + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + pass + + @property + @abstractmethod + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + pass diff --git a/deepmd/backend/dpmodel.py b/deepmd/backend/dpmodel.py new file mode 100644 index 0000000000..64df95586d --- /dev/null +++ b/deepmd/backend/dpmodel.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + List, + Type, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("dp") +@Backend.register("dpmodel") +@Backend.register("np") +@Backend.register("numpy") +class DPModelBackend(Backend): + """DPModel backend that uses NumPy as the reference implementation.""" + + name = "DPModel" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.DEEP_EVAL | Backend.Feature.NEIGHBOR_STAT | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[List[str]] = [".dp"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return True + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + raise NotImplementedError(f"Unsupported backend: {self.name}") + + @property + def deep_eval(self) -> Type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.dpmodel.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> Type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.dpmodel.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.dpmodel.utils.network import ( + load_dp_model, + ) + + return load_dp_model + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.dpmodel.utils.network import ( + save_dp_model, + ) + + return save_dp_model diff --git a/deepmd/backend/pytorch.py b/deepmd/backend/pytorch.py new file mode 100644 index 0000000000..fb7d30e994 --- /dev/null +++ b/deepmd/backend/pytorch.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + List, + Type, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pt") +@Backend.register("pytorch") +class PyTorchBackend(Backend): + """PyTorch backend.""" + + name = "PyTorch" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[List[str]] = [".pth", ".pt"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("torch") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.pt.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> Type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.pt.infer.deep_eval import DeepEval as DeepEvalPT + + return DeepEvalPT + + @property + def neighbor_stat(self) -> Type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.pt.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py new file mode 100644 index 0000000000..273fbc0951 --- /dev/null +++ b/deepmd/backend/suffix.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +import operator +from pathlib import ( + Path, +) +from typing import ( + Optional, + Type, + Union, +) + +from deepmd.backend.backend import ( + Backend, +) + + +def format_model_suffix( + filename: str, + feature: Optional[Backend.Feature] = None, + preferred_backend: Optional[Union[str, Type["Backend"]]] = None, + strict_prefer: Optional[bool] = None, +) -> str: + """Check and format the suffixes of a filename. + + When preferred_backend is not given, this method checks the suffix of the filename + is within the suffixes of the any backends (with the given feature) and doesn't do formating. + When preferred_backend is given, strict_prefer must be given. + If strict_prefer is True and the suffix is not within the suffixes of the preferred backend, + or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature, + the filename will be formatted with the preferred suffix of the preferred backend. + + Parameters + ---------- + filename : str + The filename to be formatted. + feature : Backend.Feature, optional + The feature of the backend, by default None + preferred_backend : str or type of Backend, optional + The preferred backend, by default None + strict_prefer : bool, optional + Whether to strictly prefer the preferred backend, by default None + + Returns + ------- + str + The formatted filename with the correct suffix. + + Raises + ------ + ValueError + When preferred_backend is not given and the filename is not supported by any backend. + """ + if preferred_backend is not None and strict_prefer is None: + raise ValueError("strict_prefer must be given when preferred_backend is given.") + if isinstance(preferred_backend, str): + preferred_backend = Backend.get_backend(preferred_backend) + if preferred_backend is not None and strict_prefer: + all_backends = [preferred_backend] + elif feature is None: + all_backends = list(Backend.get_backends().values()) + else: + all_backends = list(Backend.get_backends_by_feature(feature).values()) + + all_suffixes = set( + functools.reduce( + operator.iconcat, [backend.suffixes for backend in all_backends], [] + ) + ) + pp = Path(filename) + current_suffix = pp.suffix + if current_suffix not in all_suffixes: + if preferred_backend is not None: + return str(pp) + preferred_backend.suffixes[0] + raise ValueError(f"Unsupported model file format: {filename}") + return filename diff --git a/deepmd/backend/tensorflow.py b/deepmd/backend/tensorflow.py new file mode 100644 index 0000000000..15b03ee7c8 --- /dev/null +++ b/deepmd/backend/tensorflow.py @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + List, + Type, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("tf") +@Backend.register("tensorflow") +class TensorFlowBackend(Backend): + """TensorFlow backend.""" + + name = "TensorFlow" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[List[str]] = [".pb"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + # deepmd.env imports expensive numpy + # avoid import outside the method + from deepmd.env import ( + GLOBAL_CONFIG, + ) + + return ( + find_spec("tensorflow") is not None + and GLOBAL_CONFIG["enable_tensorflow"] != "0" + ) + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.tf.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> Type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.tf.infer.deep_eval import DeepEval as DeepEvalTF + + return DeepEvalTF + + @property + def neighbor_stat(self) -> Type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.tf.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/calculator.py b/deepmd/calculator.py index b9c0a81006..2d3e7ce831 100644 --- a/deepmd/calculator.py +++ b/deepmd/calculator.py @@ -19,8 +19,8 @@ all_changes, ) -from deepmd import ( - DeepPotential, +from deepmd.infer import ( + DeepPot, ) if TYPE_CHECKING: @@ -53,7 +53,7 @@ class DP(Calculator): Compute potential energy >>> from ase import Atoms - >>> from deepmd.calculator import DP + >>> from deepmd.tf.calculator import DP >>> water = Atoms('H2O', >>> positions=[(0.7601, 1.9270, 1), >>> (1.9575, 1, 1), @@ -89,7 +89,7 @@ def __init__( **kwargs, ) -> None: Calculator.__init__(self, label=label, **kwargs) - self.dp = DeepPotential(str(Path(model).resolve()), neighbor_list=neighbor_list) + self.dp = DeepPot(str(Path(model).resolve()), neighbor_list=neighbor_list) if type_dict: self.type_dict = type_dict else: diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py deleted file mode 100644 index 5264622232..0000000000 --- a/deepmd/cluster/slurm.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""MOdule to get resources on SLURM cluster. - -References ----------- -https://github.com/deepsense-ai/tensorflow_on_slurm #### -""" - -import os -from typing import ( - List, - Optional, - Tuple, -) - -import hostlist - -from deepmd.cluster import ( - local, -) - -__all__ = ["get_resource"] - - -def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: - """Get SLURM resources: nodename, nodelist, and gpus. - - Returns - ------- - Tuple[str, List[str], Optional[List[int]]] - nodename, nodelist, and gpus - - Raises - ------ - RuntimeError - if number of nodes could not be retrieved - ValueError - list of nodes is not of the same length sa number of nodes - ValueError - if current nodename is not found in node list - """ - nodelist = hostlist.expand_hostlist(os.environ["SLURM_JOB_NODELIST"]) - nodename = os.environ["SLURMD_NODENAME"] - num_nodes_env = os.getenv("SLURM_JOB_NUM_NODES") - if num_nodes_env: - num_nodes = int(num_nodes_env) - else: - raise RuntimeError("Could not get SLURM number of nodes") - - if len(nodelist) != num_nodes: - raise ValueError( - f"Number of slurm nodes {len(nodelist)} not equal to {num_nodes}" - ) - if nodename not in nodelist: - raise ValueError( - f"Nodename({nodename}) not in nodelist({nodelist}). This should not happen!" - ) - gpus = local.get_gpus() - return nodename, nodelist, gpus diff --git a/deepmd/common.py b/deepmd/common.py index 54e3d0a6f8..098bb0ed11 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -1,46 +1,44 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Collection of functions and classes used throughout the whole package.""" - +import glob +import json +import os +import platform +import shutil import warnings -from functools import ( - wraps, +from hashlib import ( + sha1, +) +from pathlib import ( + Path, ) from typing import ( TYPE_CHECKING, Any, - Callable, + Dict, + List, + Optional, + Set, + TypeVar, Union, + get_args, ) -import tensorflow -from tensorflow.python.framework import ( - tensor_util, -) +try: + from typing import Literal # python >=3.8 +except ImportError: + from typing_extensions import Literal # type: ignore + +import numpy as np +import yaml from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - op_module, - tf, + GLOBAL_NP_FLOAT_PRECISION, ) -from deepmd_utils.common import ( - add_data_requirement, - data_requirement, - expand_sys_str, - get_np_precision, - j_loader, - j_must_have, - make_default_mesh, - select_idx_map, +from deepmd.utils.path import ( + DPPath, ) -if TYPE_CHECKING: - from deepmd_utils.common import ( - _ACTIVATION, - _PRECISION, - ) - __all__ = [ - # from deepmd_utils.common "data_requirement", "add_data_requirement", "select_idx_map", @@ -49,238 +47,311 @@ "j_loader", "expand_sys_str", "get_np_precision", - # from self - "PRECISION_DICT", + "VALID_PRECISION", + "VALID_ACTIVATION", +] + +_PRECISION = Literal["default", "float16", "float32", "float64"] +_ACTIVATION = Literal[ + "relu", + "relu6", + "softplus", + "sigmoid", + "tanh", "gelu", "gelu_tf", - "ACTIVATION_FN_DICT", - "get_activation_func", - "get_precision", - "safe_cast_tensor", - "cast_precision", - "clear_session", + "none", + "linear", ] +# get_args is new in py38 +VALID_PRECISION: Set[_PRECISION] = set(get_args(_PRECISION)) +VALID_ACTIVATION: Set[_ACTIVATION] = set(get_args(_ACTIVATION)) -# define constants -PRECISION_DICT = { - "default": GLOBAL_TF_FLOAT_PRECISION, - "float16": tf.float16, - "float32": tf.float32, - "float64": tf.float64, - "bfloat16": tf.bfloat16, -} +if TYPE_CHECKING: + _DICT_VAL = TypeVar("_DICT_VAL") + __all__.extend( + [ + "_DICT_VAL", + "_PRECISION", + "_ACTIVATION", + ] + ) -def gelu(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. +# TODO: refactor data_requirement to make it not a global variable +# this is not a good way to do things. This is some global variable to which +# anyone can write and there is no good way to keep track of the changes +data_requirement = {} - This is a smoother version of the RELU, implemented by custom operator. + +def add_data_requirement( + key: str, + ndof: int, + atomic: bool = False, + must: bool = False, + high_prec: bool = False, + type_sel: Optional[bool] = None, + repeat: int = 1, + default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, +): + """Specify data requirements for training. + + Parameters + ---------- + key : str + type of data stored in corresponding `*.npy` file e.g. `forces` or `energy` + ndof : int + number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces + have `atomic=True` and `ndof=3` + atomic : bool, optional + specifies whwther the `ndof` keyworrd applies to per atom quantity or not, + by default False + must : bool, optional + specifi if the `*.npy` data file must exist, by default False + high_prec : bool, optional + if true load data to `np.float64` else `np.float32`, by default False + type_sel : bool, optional + select only certain type of atoms, by default None + repeat : int, optional + if specify repaeat data `repeat` times, by default 1 + default : float, optional, default=0. + default value of data + dtype : np.dtype, optional + the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel + """ + data_requirement[key] = { + "ndof": ndof, + "atomic": atomic, + "must": must, + "high_prec": high_prec, + "type_sel": type_sel, + "repeat": repeat, + "default": default, + "dtype": dtype, + "output_natoms_for_type_sel": output_natoms_for_type_sel, + } + + +def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray: + """Build map of indices for element supplied element types from all atoms list. Parameters ---------- - x : tf.Tensor - float Tensor to perform activation + atom_types : np.ndarray + array specifing type for each atoms as integer + select_types : np.ndarray + types of atoms you want to find indices for Returns ------- - tf.Tensor - `x` with the GELU activation applied + np.ndarray + indices of types of atoms defined by `select_types` in `atom_types` array - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 + Warnings + -------- + `select_types` array will be sorted before finding indices in `atom_types` """ - return op_module.gelu_custom(x) + sort_select_types = np.sort(select_types) + idx_map = [] + for ii in sort_select_types: + idx_map.append(np.where(atom_types == ii)[0]) + return np.concatenate(idx_map) -def gelu_tf(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. +def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray: + """Make mesh. - This is a smoother version of the RELU, implemented by TF. + Only the size of mesh matters, not the values: + * 6 for PBC, no mixed types + * 0 for no PBC, no mixed types + * 7 for PBC, mixed types + * 1 for no PBC, mixed types Parameters ---------- - x : tf.Tensor - float Tensor to perform activation + pbc : bool + if True, the mesh will be made for periodic boundary conditions + mixed_type : bool + if True, the mesh will be made for mixed types Returns ------- - tf.Tensor - `x` with the GELU activation applied - - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 + np.ndarray + mesh """ + mesh_size = int(pbc) * 6 + int(mixed_type) + default_mesh = np.zeros(mesh_size, dtype=np.int32) + return default_mesh - def gelu_wrapper(x): - try: - return tensorflow.nn.gelu(x, approximate=True) - except AttributeError: - warnings.warn( - "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator." - ) - return op_module.gelu_custom(x) - - return (lambda x: gelu_wrapper(x))(x) - - -ACTIVATION_FN_DICT = { - "relu": tf.nn.relu, - "relu6": tf.nn.relu6, - "softplus": tf.nn.softplus, - "sigmoid": tf.sigmoid, - "tanh": tf.nn.tanh, - "gelu": gelu, - "gelu_tf": gelu_tf, - "None": None, - "none": None, -} - - -def get_activation_func( - activation_fn: Union["_ACTIVATION", None], -) -> Union[Callable[[tf.Tensor], tf.Tensor], None]: - """Get activation function callable based on string name. - Parameters - ---------- - activation_fn : _ACTIVATION - one of the defined activation functions +# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys +# maybe rename this to j_deprecated and only warn about deprecated keys, +# if the deprecated_key argument is left empty function puppose is only custom +# error since dict[key] already raises KeyError when the key is missing +def j_must_have( + jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = [] +) -> "_DICT_VAL": + """Assert that supplied dictionary conaines specified key. Returns ------- - Callable[[tf.Tensor], tf.Tensor] - correspondingg TF callable + _DICT_VAL + value that was store unde supplied key Raises ------ RuntimeError - if unknown activation function is specified + if the key is not present """ - if activation_fn is None: - return None - if activation_fn not in ACTIVATION_FN_DICT: - raise RuntimeError(f"{activation_fn} is not a valid activation function") - return ACTIVATION_FN_DICT[activation_fn] + if key not in jdata.keys(): + for ii in deprecated_key: + if ii in jdata.keys(): + warnings.warn(f"the key {ii} is deprecated, please use {key} instead") + return jdata[ii] + else: + raise RuntimeError(f"json database must provide key {key}") + else: + return jdata[key] -def get_precision(precision: "_PRECISION") -> Any: - """Convert str to TF DType constant. +def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: + """Load yaml or json settings file. Parameters ---------- - precision : _PRECISION - one of the allowed precisions + filename : Union[str, Path] + path to file Returns ------- - tf.python.framework.dtypes.DType - appropriate TF constant + Dict[str, Any] + loaded dictionary Raises ------ - RuntimeError - if supplied precision string does not have acorresponding TF constant + TypeError + if the supplied file is of unsupported type """ - if precision not in PRECISION_DICT: - raise RuntimeError(f"{precision} is not a valid precision") - return PRECISION_DICT[precision] - + filepath = Path(filename) + if filepath.suffix.endswith("json"): + with filepath.open() as fp: + return json.load(fp) + elif filepath.suffix.endswith(("yml", "yaml")): + with filepath.open() as fp: + return yaml.safe_load(fp) + else: + raise TypeError("config file must be json, or yaml/yml") -def safe_cast_tensor( - input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType -) -> tf.Tensor: - """Convert a Tensor from a precision to another precision. - If input is not a Tensor or without the specific precision, the method will not - cast it. +# TODO port expand_sys_str completely to pathlib when all callers are ported +def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: + """Recursively iterate over directories taking those that contain `type.raw` file. Parameters ---------- - input : tf.Tensor - input tensor - from_precision : tf.DType - Tensor data type that is casted from - to_precision : tf.DType - Tensor data type that casts to + root_dir : Union[str, Path] + starting directory Returns ------- - tf.Tensor - casted Tensor + List[str] + list of string pointing to system directories """ - if tensor_util.is_tensor(input) and input.dtype == from_precision: - return tf.cast(input, to_precision) - return input + root_dir = DPPath(root_dir) + matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] + if (root_dir / "type.raw").is_file(): + matches.append(str(root_dir)) + return matches -def cast_precision(func: Callable) -> Callable: - """A decorator that casts and casts back the input - and output tensor of a method. +def get_np_precision(precision: "_PRECISION") -> np.dtype: + """Get numpy precision constant from string. - The decorator should be used in a classmethod. - - The decorator will do the following thing: - (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION` - to precision defined by property `precision`. - (2) It casts output Tensors from `precision` to - `GLOBAL_TF_FLOAT_PRECISION`. - (3) It checks inputs and outputs and only casts when - input or output is a Tensor and its dtype matches - `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively. - If it does not match (e.g. it is an integer), the decorator - will do nothing on it. + Parameters + ---------- + precision : _PRECISION + string name of numpy constant or default Returns ------- - Callable - a decorator that casts and casts back the input and - output tensor of a method + np.dtype + numpy presicion constant - Examples - -------- - >>> class A: - ... @property - ... def precision(self): - ... return tf.float32 - ... - ... @cast_precision - ... def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: - ... return x ** 2 + y + Raises + ------ + RuntimeError + if string is invalid """ + if precision == "default": + return GLOBAL_NP_FLOAT_PRECISION + elif precision == "float16": + return np.float16 + elif precision == "float32": + return np.float32 + elif precision == "float64": + return np.float64 + else: + raise RuntimeError(f"{precision} is not a valid precision") + + +def symlink_prefix_files(old_prefix: str, new_prefix: str): + """Create symlinks from old checkpoint prefix to new one. - @wraps(func) - def wrapper(self, *args, **kwargs): - # only convert tensors - returned_tensor = func( - self, - *[ - safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for vv in args - ], - **{ - kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for kk, vv in kwargs.items() - }, - ) - if isinstance(returned_tensor, tuple): - return tuple( - safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION) - for vv in returned_tensor - ) + On Windows this function will copy files instead of creating symlinks. + + Parameters + ---------- + old_prefix : str + old checkpoint prefix, all files with this prefix will be symlinked + new_prefix : str + new checkpoint prefix + """ + original_files = glob.glob(old_prefix + ".*") + for ori_ff in original_files: + new_ff = new_prefix + ori_ff[len(old_prefix) :] + try: + # remove old one + os.remove(new_ff) + except OSError: + pass + if platform.system() != "Windows": + # by default one does not have access to create symlink on Windows + os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff) else: - return safe_cast_tensor( - returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION - ) + shutil.copyfile(ori_ff, new_ff) + + +def get_hash(obj) -> str: + """Get hash of object. + + Parameters + ---------- + obj + object to hash + """ + return sha1(json.dumps(obj).encode("utf-8")).hexdigest() + - return wrapper +def j_get_type(data: dict, class_name: str = "object") -> str: + """Get the type from the data. + Parameters + ---------- + data : dict + the data + class_name : str, optional + the name of the class for error message, by default "object" -def clear_session(): - """Reset all state generated by DeePMD-kit.""" - tf.reset_default_graph() - # TODO: remove this line when data_requirement is not a global variable - data_requirement.clear() + Returns + ------- + str + the type + """ + try: + return data["type"] + except KeyError as e: + raise KeyError(f"the type of the {class_name} should be set by `type`") from e diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py deleted file mode 100644 index 598f6f9ff8..0000000000 --- a/deepmd/descriptor/se.py +++ /dev/null @@ -1,162 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - Tuple, -) - -from deepmd.env import ( - tf, -) -from deepmd.utils.graph import ( - get_embedding_net_variables_from_graph_def, - get_tensor_by_name_from_graph, -) - -from .descriptor import ( - Descriptor, -) - - -class DescrptSe(Descriptor): - """A base class for smooth version of descriptors. - - Notes - ----- - All of these descriptors have an environmental matrix and an - embedding network (:meth:`deepmd.utils.network.embedding_net`), so - they can share some similiar methods without defining them twice. - - Attributes - ---------- - embedding_net_variables : dict - initial embedding network variables - descrpt_reshape : tf.Tensor - the reshaped descriptor - descrpt_deriv : tf.Tensor - the descriptor derivative - rij : tf.Tensor - distances between two atoms - nlist : tf.Tensor - the neighbor list - - """ - - def _identity_tensors(self, suffix: str = "") -> None: - """Identify tensors which are expected to be stored and restored. - - Notes - ----- - These tensors will be indentitied: - self.descrpt_reshape : o_rmat - self.descrpt_deriv : o_rmat_deriv - self.rij : o_rij - self.nlist : o_nlist - Thus, this method should be called during building the descriptor and - after these tensors are initialized. - - Parameters - ---------- - suffix : str - The suffix of the scope - """ - self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix) - self.descrpt_deriv = tf.identity( - self.descrpt_deriv, name="o_rmat_deriv" + suffix - ) - self.rij = tf.identity(self.rij, name="o_rij" + suffix) - self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix) - - def get_tensor_names(self, suffix: str = "") -> Tuple[str]: - """Get names of tensors. - - Parameters - ---------- - suffix : str - The suffix of the scope - - Returns - ------- - Tuple[str] - Names of tensors - """ - return ( - f"o_rmat{suffix}:0", - f"o_rmat_deriv{suffix}:0", - f"o_rij{suffix}:0", - f"o_nlist{suffix}:0", - ) - - def pass_tensors_from_frz_model( - self, - descrpt_reshape: tf.Tensor, - descrpt_deriv: tf.Tensor, - rij: tf.Tensor, - nlist: tf.Tensor, - ): - """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. - - Parameters - ---------- - descrpt_reshape - The passed descrpt_reshape tensor - descrpt_deriv - The passed descrpt_deriv tensor - rij - The passed rij tensor - nlist - The passed nlist tensor - """ - self.rij = rij - self.nlist = nlist - self.descrpt_deriv = descrpt_deriv - self.descrpt_reshape = descrpt_reshape - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - """ - self.embedding_net_variables = get_embedding_net_variables_from_graph_def( - graph_def, suffix=suffix - ) - self.davg = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_avg" % suffix - ) - self.dstd = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_std" % suffix - ) - - @property - def precision(self) -> tf.DType: - """Precision of filter network.""" - return self.filter_precision - - @classmethod - def update_sel(cls, global_jdata: dict, local_jdata: dict): - """Update the selection and perform neighbor statistics. - - Parameters - ---------- - global_jdata : dict - The global data, containing the training section - local_jdata : dict - The local data refer to the current class - """ - from deepmd.entrypoints.train import ( - update_one_sel, - ) - - # default behavior is to update sel which is a list - local_jdata_cpy = local_jdata.copy() - return update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/dpmodel/__init__.py b/deepmd/dpmodel/__init__.py new file mode 100644 index 0000000000..6a7bdb3585 --- /dev/null +++ b/deepmd/dpmodel/__init__.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .common import ( + DEFAULT_PRECISION, + PRECISION_DICT, + NativeOP, +) +from .model import ( + DPModel, +) +from .output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, + fitting_check_output, + get_deriv_name, + get_hessian_name, + get_reduce_name, + model_check_output, +) + +__all__ = [ + "DPModel", + "PRECISION_DICT", + "DEFAULT_PRECISION", + "NativeOP", + "ModelOutputDef", + "FittingOutputDef", + "OutputVariableDef", + "model_check_output", + "fitting_check_output", + "get_reduce_name", + "get_deriv_name", + "get_hessian_name", +] diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py new file mode 100644 index 0000000000..37f6b8bf28 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/__init__.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""The atomic model provides the prediction of some property on each +atom. All the atomic models are not supposed to be directly accessed +by users, but it provides a convenient interface for the +implementation of models. + +Taking the energy models for example, the developeres only needs to +implement the atomic energy prediction via an atomic model, and the +model can be automatically made by the `deepmd.dpmodel.make_model` +method. The `DPModel` is made by +``` +DPModel = make_model(DPAtomicModel) +``` + +""" + +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .linear_atomic_model import ( + DPZBLLinearEnergyAtomicModel, + LinearEnergyAtomicModel, +) +from .make_base_atomic_model import ( + make_base_atomic_model, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) + +__all__ = [ + "make_base_atomic_model", + "BaseAtomicModel", + "DPAtomicModel", + "PairTabAtomicModel", + "LinearEnergyAtomicModel", + "DPZBLLinearEnergyAtomicModel", +] diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py new file mode 100644 index 0000000000..42d1e67138 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, + Tuple, +) + +import numpy as np + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.dpmodel.utils import ( + AtomExcludeMask, + PairExcludeMask, +) + +from .make_base_atomic_model import ( + make_base_atomic_model, +) + +BaseAtomicModel_ = make_base_atomic_model(np.ndarray) + + +class BaseAtomicModel(BaseAtomicModel_): + def __init__( + self, + atom_exclude_types: List[int] = [], + pair_exclude_types: List[Tuple[int, int]] = [], + ): + super().__init__() + self.reinit_atom_exclude(atom_exclude_types) + self.reinit_pair_exclude(pair_exclude_types) + + def reinit_atom_exclude( + self, + exclude_types: List[int] = [], + ): + self.atom_exclude_types = exclude_types + if exclude_types == []: + self.atom_excl = None + else: + self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types) + + def reinit_pair_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.pair_exclude_types = exclude_types + if exclude_types == []: + self.pair_excl = None + else: + self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types) + + def atomic_output_def(self) -> FittingOutputDef: + old_def = self.fitting_output_def() + old_list = list(old_def.get_data().values()) + return FittingOutputDef( + old_list # noqa:RUF005 + + [ + OutputVariableDef( + name="mask", + shape=[1], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ) + ] + ) + + def forward_common_atomic( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Common interface for atomic inference. + + This method accept extended coordinates, extended atom typs, neighbor list, + and predict the atomic contribution of the fit property. + + Parameters + ---------- + extended_coord + extended coodinates, shape: nf x (nall x 3) + extended_atype + extended atom typs, shape: nf x nall + for a type < 0 indicating the atomic is virtual. + nlist + neighbor list, shape: nf x nloc x nsel + mapping + extended to local index mapping, shape: nf x nall + fparam + frame parameters, shape: nf x dim_fparam + aparam + atomic parameter, shape: nf x nloc x dim_aparam + + Returns + ------- + ret_dict + dict of output atomic properties. + should implement the definition of `fitting_output_def`. + ret_dict["mask"] of shape nf x nloc will be provided. + ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real. + ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual. + + """ + _, nloc, _ = nlist.shape + atype = extended_atype[:, :nloc] + if self.pair_excl is not None: + pair_mask = self.pair_excl.build_type_exclude_mask(nlist, extended_atype) + # exclude neighbors in the nlist + nlist = np.where(pair_mask == 1, nlist, -1) + + ext_atom_mask = self.make_atom_mask(extended_atype) + ret_dict = self.forward_atomic( + extended_coord, + np.where(ext_atom_mask, extended_atype, 0), + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + ) + + # nf x nloc + atom_mask = ext_atom_mask[:, :nloc].astype(np.int32) + if self.atom_excl is not None: + atom_mask *= self.atom_excl.build_type_exclude_mask(atype) + + for kk in ret_dict.keys(): + out_shape = ret_dict[kk].shape + ret_dict[kk] = ( + ret_dict[kk].reshape([out_shape[0], out_shape[1], -1]) + * atom_mask[:, :, None] + ).reshape(out_shape) + ret_dict["mask"] = atom_mask + + return ret_dict + + def serialize(self) -> dict: + return { + "atom_exclude_types": self.atom_exclude_types, + "pair_exclude_types": self.pair_exclude_types, + } diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py new file mode 100644 index 0000000000..8a40f8d238 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py @@ -0,0 +1,206 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.dpmodel.fitting.base_fitting import ( + BaseFitting, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + + +@BaseAtomicModel.register("standard") +class DPAtomicModel(BaseAtomicModel): + """Model give atomic prediction of some physical property. + + Parameters + ---------- + descriptor + Descriptor + fitting_net + Fitting net + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + + """ + + def __init__( + self, + descriptor, + fitting, + type_map: List[str], + **kwargs, + ): + self.type_map = type_map + self.descriptor = descriptor + self.fitting = fitting + self.type_map = type_map + super().__init__(**kwargs) + + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of the fitting net.""" + return self.fitting.output_def() + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.descriptor.get_rcut() + + def get_sel(self) -> List[int]: + """Get the neighbor selection.""" + return self.descriptor.get_sel() + + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.type_map + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.descriptor.mixed_types() + + def set_out_bias(self, out_bias: np.ndarray, add=False) -> None: + """ + Modify the output bias for the atomic model. + + Parameters + ---------- + out_bias : np.ndarray + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + self.fitting["bias_atom_e"] = ( + out_bias + self.fitting["bias_atom_e"] if add else out_bias + ) + + def get_out_bias(self) -> np.ndarray: + """Return the output bias of the atomic model.""" + return self.fitting["bias_atom_e"] + + def forward_atomic( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Models' atomic predictions. + + Parameters + ---------- + extended_coord + coodinates in extended region + extended_atype + atomic type in extended region + nlist + neighbor list. nf x nloc x nsel + mapping + mapps the extended indices to local indices. nf x nall + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nloc, nnei = nlist.shape + atype = extended_atype[:, :nloc] + descriptor, rot_mat, g2, h2, sw = self.descriptor( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + ) + ret = self.fitting( + descriptor, + atype, + gr=rot_mat, + g2=g2, + h2=h2, + fparam=fparam, + aparam=aparam, + ) + return ret + + def serialize(self) -> dict: + dd = super().serialize() + dd.update( + { + "@class": "Model", + "type": "standard", + "@version": 1, + "type_map": self.type_map, + "descriptor": self.descriptor.serialize(), + "fitting": self.fitting.serialize(), + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "DPAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class") + data.pop("type") + descriptor_obj = BaseDescriptor.deserialize(data.pop("descriptor")) + fitting_obj = BaseFitting.deserialize(data.pop("fitting")) + type_map = data.pop("type_map") + obj = cls(descriptor_obj, fitting_obj, type_map=type_map, **data) + return obj + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.fitting.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.fitting.get_dim_aparam() + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.fitting.get_sel_type() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py new file mode 100644 index 0000000000..93a885f3ab --- /dev/null +++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py @@ -0,0 +1,441 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np + +from deepmd.dpmodel.utils.nlist import ( + build_multiple_neighbor_list, + get_multiple_nlist_key, + nlist_distinguish_types, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from ..output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) + + +class LinearEnergyAtomicModel(BaseAtomicModel): + """Linear model make linear combinations of several existing models. + + Parameters + ---------- + models : list[DPAtomicModel or PairTabAtomicModel] + A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel. + type_map : list[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + models: List[BaseAtomicModel], + type_map: List[str], + **kwargs, + ): + self.models = models + sub_model_type_maps = [md.get_type_map() for md in models] + err_msg = [] + self.mapping_list = [] + common_type_map = set(type_map) + self.type_map = type_map + for tpmp in sub_model_type_maps: + if not common_type_map.issubset(set(tpmp)): + err_msg.append( + f"type_map {tpmp} is not a subset of type_map {type_map}" + ) + self.mapping_list.append(self.remap_atype(tpmp, self.type_map)) + assert len(err_msg) == 0, "\n".join(err_msg) + self.mixed_types_list = [model.mixed_types() for model in self.models] + super().__init__(**kwargs) + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return True + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return max(self.get_model_rcuts()) + + def get_type_map(self) -> List[str]: + """Get the type map.""" + raise self.type_map + + def get_model_rcuts(self) -> List[float]: + """Get the cut-off radius for each individual models.""" + return [model.get_rcut() for model in self.models] + + def get_sel(self) -> List[int]: + return [max([model.get_nsel() for model in self.models])] + + def get_model_nsels(self) -> List[int]: + """Get the processed sels for each individual models. Not distinguishing types.""" + return [model.get_nsel() for model in self.models] + + def get_model_sels(self) -> List[Union[int, List[int]]]: + """Get the sels for each individual models.""" + return [model.get_sel() for model in self.models] + + def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]: + # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut. + zipped = sorted( + zip(self.get_model_rcuts(), self.get_model_nsels()), + key=lambda x: (x[1], x[0]), + ) + return [p[0] for p in zipped], [p[1] for p in zipped] + + def forward_atomic( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Return atomic prediction. + + Parameters + ---------- + extended_coord + coodinates in extended region, (nframes, nall * 3) + extended_atype + atomic type in extended region, (nframes, nall) + nlist + neighbor list, (nframes, nloc, nsel). + mapping + mapps the extended indices to local indices. + fparam + frame parameter. (nframes, ndf) + aparam + atomic parameter. (nframes, nloc, nda) + + Returns + ------- + result_dict + the result dict, defined by the fitting net output def. + """ + nframes, nloc, nnei = nlist.shape + extended_coord = extended_coord.reshape(nframes, -1, 3) + sorted_rcuts, sorted_sels = self._sort_rcuts_sels() + nlists = build_multiple_neighbor_list( + extended_coord, + nlist, + sorted_rcuts, + sorted_sels, + ) + raw_nlists = [ + nlists[get_multiple_nlist_key(rcut, sel)] + for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels()) + ] + nlists_ = [ + nl if mt else nlist_distinguish_types(nl, extended_atype, sel) + for mt, nl, sel in zip( + self.mixed_types_list, raw_nlists, self.get_model_sels() + ) + ] + ener_list = [] + + for i, model in enumerate(self.models): + mapping = self.mapping_list[i] + ener_list.append( + model.forward_atomic( + extended_coord, + mapping[extended_atype], + nlists_[i], + mapping, + fparam, + aparam, + )["energy"] + ) + self.weights = self._compute_weight(extended_coord, extended_atype, nlists_) + self.atomic_bias = None + if self.atomic_bias is not None: + raise NotImplementedError("Need to add bias in a future PR.") + else: + fit_ret = { + "energy": np.sum(np.stack(ener_list) * np.stack(self.weights), axis=0), + } # (nframes, nloc, 1) + return fit_ret + + @staticmethod + def remap_atype(ori_map: List[str], new_map: List[str]) -> np.ndarray: + """ + This method is used to map the atype from the common type_map to the original type_map of + indivial AtomicModels. + + Parameters + ---------- + ori_map : List[str] + The original type map of an AtomicModel. + new_map : List[str] + The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method, + must be a subset of the ori_map. + + Returns + ------- + np.ndarray + """ + type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)} + # this maps the atype in the new map to the original map + mapping = np.array([type_2_idx[new_map[idx]] for idx in range(len(new_map))]) + return mapping + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def serialize(self) -> dict: + return { + "@class": "Model", + "type": "linear", + "@version": 1, + "models": [model.serialize() for model in self.models], + "type_map": self.type_map, + } + + @classmethod + def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class") + data.pop("type") + type_map = data.pop("type_map") + models = [ + BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model) + for model in data["models"] + ] + data.pop("models") + return cls(models, type_map, **data) + + def _compute_weight( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlists_: List[np.ndarray], + ) -> List[np.ndarray]: + """This should be a list of user defined weights that matches the number of models to be combined.""" + nmodels = len(self.models) + return [np.ones(1) / nmodels for _ in range(nmodels)] + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + # tricky... + return max([model.get_dim_fparam() for model in self.models]) + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return max([model.get_dim_aparam() for model in self.models]) + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + if any(model.get_sel_type() == [] for model in self.models): + return [] + # join all the selected types + return list(set().union(*[model.get_sel_type() for model in self.models])) + + def set_out_bias(self, out_bias: np.ndarray, add=False) -> None: + """ + Modify the output bias for all the models in the linear atomic model. + + Parameters + ---------- + out_bias : torch.Tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + for model in self.models: + model.set_out_bias(out_bias, add=add) + + def get_out_bias(self) -> np.ndarray: + """Return the weighted output bias of the linear atomic model.""" + # TODO add get_out_bias for linear atomic model + raise NotImplementedError + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False + + +class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel): + """Model linearly combine a list of AtomicModels. + + Parameters + ---------- + dp_model + The DPAtomicModel being combined. + zbl_model + The PairTable model being combined. + sw_rmin + The lower boundary of the interpolation between short-range tabulated interaction and DP. + sw_rmax + The upper boundary of the interpolation between short-range tabulated interaction and DP. + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + smin_alpha + The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. + This distance is calculated by softmin. + """ + + def __init__( + self, + dp_model: DPAtomicModel, + zbl_model: PairTabAtomicModel, + sw_rmin: float, + sw_rmax: float, + type_map: List[str], + smin_alpha: Optional[float] = 0.1, + **kwargs, + ): + models = [dp_model, zbl_model] + super().__init__(models, type_map, **kwargs) + self.dp_model = dp_model + self.zbl_model = zbl_model + + self.sw_rmin = sw_rmin + self.sw_rmax = sw_rmax + self.smin_alpha = smin_alpha + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "type": "zbl", + "@version": 2, + "models": LinearEnergyAtomicModel( + models=[self.models[0], self.models[1]], type_map=self.type_map + ).serialize(), + "sw_rmin": self.sw_rmin, + "sw_rmax": self.sw_rmax, + "smin_alpha": self.smin_alpha, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 2, 1) + data.pop("@class") + data.pop("type") + sw_rmin = data.pop("sw_rmin") + sw_rmax = data.pop("sw_rmax") + smin_alpha = data.pop("smin_alpha") + linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models")) + dp_model, zbl_model = linear_model.models + type_map = linear_model.type_map + + return cls( + dp_model=dp_model, + zbl_model=zbl_model, + sw_rmin=sw_rmin, + sw_rmax=sw_rmax, + type_map=type_map, + smin_alpha=smin_alpha, + **data, + ) + + def _compute_weight( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlists_: List[np.ndarray], + ) -> List[np.ndarray]: + """ZBL weight. + + Returns + ------- + List[np.ndarray] + the atomic ZBL weight for interpolation. (nframes, nloc, 1) + """ + assert ( + self.sw_rmax > self.sw_rmin + ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`." + + dp_nlist = nlists_[0] + zbl_nlist = nlists_[1] + + zbl_nnei = zbl_nlist.shape[-1] + dp_nnei = dp_nlist.shape[-1] + + # use the larger rr based on nlist + nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist + masked_nlist = np.clip(nlist_larger, 0, None) + pairwise_rr = PairTabAtomicModel._get_pairwise_dist( + extended_coord, masked_nlist + ) + + numerator = np.sum( + pairwise_rr * np.exp(-pairwise_rr / self.smin_alpha), axis=-1 + ) # masked nnei will be zero, no need to handle + denominator = np.sum( + np.where( + nlist_larger != -1, + np.exp(-pairwise_rr / self.smin_alpha), + np.zeros_like(nlist_larger), + ), + axis=-1, + ) # handle masked nnei. + with np.errstate(divide="ignore", invalid="ignore"): + sigma = numerator / denominator + u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin) + coef = np.zeros_like(u) + left_mask = sigma < self.sw_rmin + mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax) + right_mask = sigma >= self.sw_rmax + coef[left_mask] = 1 + with np.errstate(invalid="ignore"): + smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1 + coef[mid_mask] = smooth[mid_mask] + coef[right_mask] = 0 + self.zbl_weight = coef + return [1 - np.expand_dims(coef, -1), np.expand_dims(coef, -1)] diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py new file mode 100644 index 0000000000..3e02a5d076 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Dict, + List, + Optional, +) + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_atomic_model( + t_tensor, + fwd_method_name: str = "forward_atomic", +): + """Make the base class for the atomic model. + + Parameters + ---------- + t_tensor + The type of the tensor. used in the type hint. + fwd_method_name + Name of the forward method. For dpmodels, it should be "call". + For torch models, it should be "forward". + + """ + + class BAM(ABC, PluginVariant, make_plugin_registry("atomic model")): + """Base Atomic Model provides the interfaces of an atomic model.""" + + @abstractmethod + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of developer implemented atomic models.""" + pass + + def atomic_output_def(self) -> FittingOutputDef: + """Get the output def of the atomic model. + + By default it is the same as FittingOutputDef, but it + allows model level wrapper of the output defined by the developer. + + """ + return self.fitting_output_def() + + @abstractmethod + def get_rcut(self) -> float: + """Get the cut-off radius.""" + pass + + @abstractmethod + def get_type_map(self) -> List[str]: + """Get the type map.""" + pass + + def get_ntypes(self) -> int: + """Get the number of atom types.""" + return len(self.get_type_map()) + + @abstractmethod + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + pass + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return sum(self.get_sel()) + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.get_nsel() + + @abstractmethod + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + + @abstractmethod + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + + @abstractmethod + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + + @abstractmethod + def set_out_bias(self, out_bias: t_tensor, add=False) -> None: + """ + Modify the output bias for the atomic model. + + Parameters + ---------- + out_bias : t_tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + + @abstractmethod + def get_out_bias(self) -> t_tensor: + """Return the output bias of the atomic model.""" + + @abstractmethod + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + + @abstractmethod + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + pass + + @abstractmethod + def fwd( + self, + extended_coord: t_tensor, + extended_atype: t_tensor, + nlist: t_tensor, + mapping: Optional[t_tensor] = None, + fparam: Optional[t_tensor] = None, + aparam: Optional[t_tensor] = None, + ) -> Dict[str, t_tensor]: + pass + + @abstractmethod + def serialize(self) -> dict: + pass + + @classmethod + @abstractmethod + def deserialize(cls, data: dict): + pass + + def make_atom_mask( + self, + atype: t_tensor, + ) -> t_tensor: + """The atoms with type < 0 are treated as virutal atoms, + which serves as place-holders for multi-frame calculations + with different number of atoms in different frames. + + Parameters + ---------- + atype + Atom types. >= 0 for real atoms <0 for virtual atoms. + + Returns + ------- + mask + True for real atoms and False for virutal atoms. + + """ + # supposed to be supported by all backends + return atype >= 0 + + def do_grad_r( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is r_differentiable. + if var_name is None, returns if any of the variable is r_differentiable. + + """ + odef = self.fitting_output_def() + if var_name is None: + require: List[bool] = [] + for vv in odef.keys(): + require.append(self.do_grad_(vv, "r")) + return any(require) + else: + return self.do_grad_(var_name, "r") + + def do_grad_c( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is c_differentiable. + if var_name is None, returns if any of the variable is c_differentiable. + + """ + odef = self.fitting_output_def() + if var_name is None: + require: List[bool] = [] + for vv in odef.keys(): + require.append(self.do_grad_(vv, "c")) + return any(require) + else: + return self.do_grad_(var_name, "c") + + def do_grad_(self, var_name: str, base: str) -> bool: + """Tell if the output variable `var_name` is differentiable.""" + assert var_name is not None + assert base in ["c", "r"] + if base == "c": + return self.fitting_output_def()[var_name].c_differentiable + return self.fitting_output_def()[var_name].r_differentiable + + setattr(BAM, fwd_method_name, BAM.fwd) + delattr(BAM, "fwd") + + return BAM diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py new file mode 100644 index 0000000000..30ab58928b --- /dev/null +++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py @@ -0,0 +1,406 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Dict, + List, + Optional, + Union, +) + +import numpy as np + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.utils.pair_tab import ( + PairTab, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + + +@BaseAtomicModel.register("pairtab") +class PairTabAtomicModel(BaseAtomicModel): + """Pairwise tabulation energy model. + + This model can be used to tabulate the pairwise energy between atoms for either + short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not + be used alone, but rather as one submodel of a linear (sum) model, such as + DP+D3. + + Do not put the model on the first model of a linear model, since the linear + model fetches the type map from the first model. + + At this moment, the model does not smooth the energy at the cutoff radius, so + one needs to make sure the energy has been smoothed to zero. + + Parameters + ---------- + tab_file : str + The path to the tabulation file. + rcut : float + The cutoff radius. + sel : int or list[int] + The maxmum number of atoms in the cut-off radius. + type_map : list[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + tab_file: str, + rcut: float, + sel: Union[int, List[int]], + type_map: List[str], + **kwargs, + ): + super().__init__() + self.tab_file = tab_file + self.rcut = rcut + self.type_map = type_map + + self.tab = PairTab(self.tab_file, rcut=rcut) + self.type_map = type_map + self.ntypes = len(type_map) + + if self.tab_file is not None: + self.tab_info, self.tab_data = self.tab.get() + nspline, ntypes_tab = self.tab_info[-2:].astype(int) + self.tab_data = self.tab_data.reshape(ntypes_tab, ntypes_tab, nspline, 4) + if self.ntypes != ntypes_tab: + raise ValueError( + "The `type_map` provided does not match the number of columns in the table." + ) + else: + self.tab_info, self.tab_data = None, None + + if isinstance(sel, int): + self.sel = sel + elif isinstance(sel, list): + self.sel = sum(sel) + else: + raise TypeError("sel must be int or list[int]") + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def get_rcut(self) -> float: + return self.rcut + + def get_type_map(self) -> List[str]: + return self.type_map + + def get_sel(self) -> List[int]: + return [self.sel] + + def get_nsel(self) -> int: + return self.sel + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + # to match DPA1 and DPA2. + return True + + def set_out_bias(self, out_bias: np.ndarray, add=False) -> None: + """ + Modify the output bias for the atomic model. + + Parameters + ---------- + out_bias : torch.Tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias + + def get_out_bias(self) -> np.ndarray: + """Return the output bias of the atomic model.""" + return self.bias_atom_e + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "type": "pairtab", + "@version": 1, + "tab": self.tab.serialize(), + "rcut": self.rcut, + "sel": self.sel, + "type_map": self.type_map, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "PairTabAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class") + data.pop("type") + rcut = data.pop("rcut") + sel = data.pop("sel") + type_map = data.pop("type_map") + tab = PairTab.deserialize(data.pop("tab")) + tab_model = cls(None, rcut, sel, type_map, **data) + tab_model.tab = tab + tab_model.tab_info = tab_model.tab.tab_info + nspline, ntypes = tab_model.tab_info[-2:].astype(int) + tab_model.tab_data = tab_model.tab.tab_data.reshape(ntypes, ntypes, nspline, 4) + return tab_model + + def forward_atomic( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + nframes, nloc, nnei = nlist.shape + extended_coord = extended_coord.reshape(nframes, -1, 3) + + # this will mask all -1 in the nlist + mask = nlist >= 0 + masked_nlist = nlist * mask + + atype = extended_atype[:, :nloc] # (nframes, nloc) + pairwise_rr = self._get_pairwise_dist( + extended_coord, masked_nlist + ) # (nframes, nloc, nnei) + self.tab_data = self.tab_data.reshape( + self.tab.ntypes, self.tab.ntypes, self.tab.nspline, 4 + ) + + # (nframes, nloc, nnei) + j_type = extended_atype[ + np.arange(extended_atype.shape[0])[:, None, None], masked_nlist + ] + + raw_atomic_energy = self._pair_tabulated_inter( + nlist, atype, j_type, pairwise_rr + ) + atomic_energy = 0.5 * np.sum( + np.where(nlist != -1, raw_atomic_energy, np.zeros_like(raw_atomic_energy)), + axis=-1, + ).reshape(nframes, nloc, 1) + + return {"energy": atomic_energy} + + def _pair_tabulated_inter( + self, + nlist: np.ndarray, + i_type: np.ndarray, + j_type: np.ndarray, + rr: np.ndarray, + ) -> np.ndarray: + """Pairwise tabulated energy. + + Parameters + ---------- + nlist : np.ndarray + The unmasked neighbour list. (nframes, nloc) + i_type : np.ndarray + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : np.ndarray + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + rr : np.ndarray + The salar distance vector between two atoms. (nframes, nloc, nnei) + + Returns + ------- + np.ndarray + The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + + Raises + ------ + Exception + If the distance is beyond the table. + + Notes + ----- + This function is used to calculate the pairwise energy between two atoms. + It uses a table containing cubic spline coefficients calculated in PairTab. + """ + nframes, nloc, nnei = nlist.shape + rmin = self.tab_info[0] + hh = self.tab_info[1] + hi = 1.0 / hh + + nspline = int(self.tab_info[2] + 0.1) + + uu = (rr - rmin) * hi # this is broadcasted to (nframes,nloc,nnei) + + # if nnei of atom 0 has -1 in the nlist, uu would be 0. + # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms. + uu = np.where(nlist != -1, uu, nspline + 1) + + if np.any(uu < 0): + raise Exception("coord go beyond table lower boundary") + + idx = uu.astype(int) + + uu -= idx + table_coef = self._extract_spline_coefficient( + i_type, j_type, idx, self.tab_data, nspline + ) + table_coef = table_coef.reshape(nframes, nloc, nnei, 4) + ener = self._calculate_ener(table_coef, uu) + # here we need to overwrite energy to zero at rcut and beyond. + mask_beyond_rcut = rr >= self.rcut + # also overwrite values beyond extrapolation to zero + extrapolation_mask = rr >= self.tab.rmin + nspline * self.tab.hh + ener[mask_beyond_rcut] = 0 + ener[extrapolation_mask] = 0 + + return ener + + @staticmethod + def _get_pairwise_dist(coords: np.ndarray, nlist: np.ndarray) -> np.ndarray: + """Get pairwise distance `dr`. + + Parameters + ---------- + coords : np.ndarray + The coordinate of the atoms, shape of (nframes, nall, 3). + nlist + The masked nlist, shape of (nframes, nloc, nnei). + + Returns + ------- + np.ndarray + The pairwise distance between the atoms (nframes, nloc, nnei). + """ + batch_indices = np.arange(nlist.shape[0])[:, None, None] + neighbor_atoms = coords[batch_indices, nlist] + loc_atoms = coords[:, : nlist.shape[1], :] + pairwise_dr = loc_atoms[:, :, None, :] - neighbor_atoms + pairwise_rr = np.sqrt(np.sum(np.power(pairwise_dr, 2), axis=-1)) + + return pairwise_rr + + @staticmethod + def _extract_spline_coefficient( + i_type: np.ndarray, + j_type: np.ndarray, + idx: np.ndarray, + tab_data: np.ndarray, + nspline: int, + ) -> np.ndarray: + """Extract the spline coefficient from the table. + + Parameters + ---------- + i_type : np.ndarray + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : np.ndarray + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + idx : np.ndarray + The index of the spline coefficient. (nframes, nloc, nnei) + tab_data : np.ndarray + The table storing all the spline coefficient. (ntype, ntype, nspline, 4) + nspline : int + The number of splines in the table. + + Returns + ------- + np.ndarray + The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed. + """ + # (nframes, nloc, nnei) + expanded_i_type = np.broadcast_to( + i_type[:, :, np.newaxis], + (i_type.shape[0], i_type.shape[1], j_type.shape[-1]), + ) + + # (nframes, nloc, nnei, nspline, 4) + expanded_tab_data = tab_data[expanded_i_type, j_type] + + # (nframes, nloc, nnei, 1, 4) + expanded_idx = np.broadcast_to( + idx[..., np.newaxis, np.newaxis], (*idx.shape, 1, 4) + ) + clipped_indices = np.clip(expanded_idx, 0, nspline - 1).astype(int) + + # (nframes, nloc, nnei, 4) + final_coef = np.squeeze( + np.take_along_axis(expanded_tab_data, clipped_indices, 3) + ) + + # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`. + final_coef[expanded_idx.squeeze() > nspline] = 0 + return final_coef + + @staticmethod + def _calculate_ener(coef: np.ndarray, uu: np.ndarray) -> np.ndarray: + """Calculate energy using spline coeeficients. + + Parameters + ---------- + coef : np.ndarray + The spline coefficients. (nframes, nloc, nnei, 4) + uu : np.ndarray + The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei) + + Returns + ------- + np.ndarray + The atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + """ + a3, a2, a1, a0 = coef[..., 0], coef[..., 1], coef[..., 2], coef[..., 3] + etmp = (a3 * uu + a2) * uu + a1 # this should be elementwise operations. + ener = etmp * uu + a0 # this energy has the extrapolated value when rcut > rmax + return ener + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return 0 + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return 0 + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return [] + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/dpmodel/common.py b/deepmd/dpmodel/common.py new file mode 100644 index 0000000000..8030432385 --- /dev/null +++ b/deepmd/dpmodel/common.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) + +import ml_dtypes +import numpy as np + +from deepmd.common import ( + VALID_PRECISION, +) +from deepmd.env import ( + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, +) + +PRECISION_DICT = { + "float16": np.float16, + "float32": np.float32, + "float64": np.float64, + "half": np.float16, + "single": np.float32, + "double": np.float64, + "int32": np.int32, + "int64": np.int64, + "default": GLOBAL_NP_FLOAT_PRECISION, + # NumPy doesn't have bfloat16 (and does't plan to add) + # ml_dtypes is a solution, but it seems not supporting np.save/np.load + # hdf5 hasn't supported bfloat16 as well (see https://forum.hdfgroup.org/t/11975) + "bfloat16": ml_dtypes.bfloat16, +} +assert VALID_PRECISION.issubset(PRECISION_DICT.keys()) + +RESERVED_PRECISON_DICT = { + np.float16: "float16", + np.float32: "float32", + np.float64: "float64", + np.int32: "int32", + np.int64: "int64", + ml_dtypes.bfloat16: "bfloat16", +} +assert set(RESERVED_PRECISON_DICT.keys()) == set(PRECISION_DICT.values()) +DEFAULT_PRECISION = "float64" + + +class NativeOP(ABC): + """The unit operation of a native model.""" + + @abstractmethod + def call(self, *args, **kwargs): + """Forward pass in NumPy implementation.""" + pass + + def __call__(self, *args, **kwargs): + """Forward pass in NumPy implementation.""" + return self.call(*args, **kwargs) + + +__all__ = [ + "GLOBAL_NP_FLOAT_PRECISION", + "GLOBAL_ENER_FLOAT_PRECISION", + "PRECISION_DICT", + "RESERVED_PRECISON_DICT", + "DEFAULT_PRECISION", + "NativeOP", +] diff --git a/deepmd/dpmodel/descriptor/__init__.py b/deepmd/dpmodel/descriptor/__init__.py new file mode 100644 index 0000000000..a19a2aa034 --- /dev/null +++ b/deepmd/dpmodel/descriptor/__init__.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .hybrid import ( + DescrptHybrid, +) +from .make_base_descriptor import ( + make_base_descriptor, +) +from .se_e2_a import ( + DescrptSeA, +) +from .se_r import ( + DescrptSeR, +) + +__all__ = [ + "DescrptSeA", + "DescrptSeR", + "DescrptHybrid", + "make_base_descriptor", +] diff --git a/deepmd/dpmodel/descriptor/base_descriptor.py b/deepmd/dpmodel/descriptor/base_descriptor.py new file mode 100644 index 0000000000..7429d3f213 --- /dev/null +++ b/deepmd/dpmodel/descriptor/base_descriptor.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +import numpy as np + +from .make_base_descriptor import ( + make_base_descriptor, +) + +BaseDescriptor = make_base_descriptor(np.ndarray, "call") diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py new file mode 100644 index 0000000000..96640d75c8 --- /dev/null +++ b/deepmd/dpmodel/descriptor/hybrid.py @@ -0,0 +1,244 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Any, + Dict, + List, + Optional, + Union, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + NativeOP, +) +from deepmd.dpmodel.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.dpmodel.utils.nlist import ( + nlist_distinguish_types, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + + +@BaseDescriptor.register("hybrid") +class DescrptHybrid(BaseDescriptor, NativeOP): + """Concate a list of descriptors to form a new descriptor. + + Parameters + ---------- + list : list : List[Union[BaseDescriptor, Dict[str, Any]]] + Build a descriptor from the concatenation of the list of descriptors. + The descriptor can be either an object or a dictionary. + """ + + def __init__( + self, + list: List[Union[BaseDescriptor, Dict[str, Any]]], + ) -> None: + super().__init__() + # warning: list is conflict with built-in list + descrpt_list = list + if descrpt_list == [] or descrpt_list is None: + raise RuntimeError( + "cannot build descriptor from an empty list of descriptors." + ) + formatted_descript_list = [] + for ii in descrpt_list: + if isinstance(ii, BaseDescriptor): + formatted_descript_list.append(ii) + elif isinstance(ii, dict): + formatted_descript_list.append(BaseDescriptor(**ii)) + else: + raise NotImplementedError + self.descrpt_list = formatted_descript_list + self.numb_descrpt = len(self.descrpt_list) + for ii in range(1, self.numb_descrpt): + assert ( + self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes() + ), f"number of atom types in {ii}th descrptor {self.descrpt_list[0].__class__.__name__} does not match others" + # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type + hybrid_sel = self.get_sel() + self.nlist_cut_idx: List[np.ndarray] = [] + if self.mixed_types() and not all( + descrpt.mixed_types() for descrpt in self.descrpt_list + ): + self.sel_no_mixed_types = np.max( + [ + descrpt.get_sel() + for descrpt in self.descrpt_list + if not descrpt.mixed_types() + ], + axis=0, + ).tolist() + else: + self.sel_no_mixed_types = None + for ii in range(self.numb_descrpt): + if self.mixed_types() == self.descrpt_list[ii].mixed_types(): + hybrid_sel = self.get_sel() + else: + assert self.sel_no_mixed_types is not None + hybrid_sel = self.sel_no_mixed_types + sub_sel = self.descrpt_list[ii].get_sel() + start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1] + end_idx = start_idx + np.array(sub_sel) + cut_idx = np.concatenate( + [range(ss, ee) for ss, ee in zip(start_idx, end_idx)] + ) + self.nlist_cut_idx.append(cut_idx) + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return np.max([descrpt.get_rcut() for descrpt in self.descrpt_list]).item() + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + if self.mixed_types(): + return [ + np.max( + [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0 + ).item() + ] + else: + return np.max( + [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0 + ).tolist() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.descrpt_list[0].get_ntypes() + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return np.sum([descrpt.get_dim_out() for descrpt in self.descrpt_list]).item() + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + return np.sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list]).item() + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return any(descrpt.mixed_types() for descrpt in self.descrpt_list) + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None): + """Update mean and stddev for descriptor elements.""" + for descrpt in self.descrpt_list: + descrpt.compute_input_stats(merged, path) + + def call( + self, + coord_ext, + atype_ext, + nlist, + mapping: Optional[np.ndarray] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3. + g2 + The rotationally invariant pair-partical representation. + h2 + The rotationally equivariant pair-partical representation. + sw + The smooth switch function. + """ + out_descriptor = [] + out_gr = [] + out_g2 = None + out_h2 = None + out_sw = None + if self.sel_no_mixed_types is not None: + nl_distinguish_types = nlist_distinguish_types( + nlist, + atype_ext, + self.sel_no_mixed_types, + ) + else: + nl_distinguish_types = None + for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx): + # cut the nlist to the correct length + if self.mixed_types() == descrpt.mixed_types(): + nl = nlist[:, :, nci] + else: + # mixed_types is True, but descrpt.mixed_types is False + assert nl_distinguish_types is not None + nl = nl_distinguish_types[:, :, nci] + odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping) + out_descriptor.append(odescriptor) + if gr is not None: + out_gr.append(gr) + + out_descriptor = np.concatenate(out_descriptor, axis=-1) + out_gr = np.concatenate(out_gr, axis=-2) if out_gr else None + return out_descriptor, out_gr, out_g2, out_h2, out_sw + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["list"] = [ + BaseDescriptor.update_sel(global_jdata, sub_jdata) + for sub_jdata in local_jdata["list"] + ] + return local_jdata_cpy + + def serialize(self) -> dict: + return { + "@class": "Descriptor", + "type": "hybrid", + "@version": 1, + "list": [descrpt.serialize() for descrpt in self.descrpt_list], + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptHybrid": + data = data.copy() + class_name = data.pop("@class") + assert class_name == "Descriptor" + class_type = data.pop("type") + assert class_type == "hybrid" + check_version_compatibility(data.pop("@version"), 1, 1) + obj = cls( + list=[BaseDescriptor.deserialize(ii) for ii in data["list"]], + ) + return obj diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py new file mode 100644 index 0000000000..940bd0cd27 --- /dev/null +++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Callable, + List, + Optional, + Union, +) + +from deepmd.common import ( + j_get_type, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_descriptor( + t_tensor, + fwd_method_name: str = "forward", +): + """Make the base class for the descriptor. + + Parameters + ---------- + t_tensor + The type of the tensor. used in the type hint. + fwd_method_name + Name of the forward method. For dpmodels, it should be "call". + For torch models, it should be "forward". + + """ + + class BD(ABC, PluginVariant, make_plugin_registry("descriptor")): + """Base descriptor provides the interfaces of descriptor.""" + + def __new__(cls, *args, **kwargs): + if cls is BD: + cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__)) + return super().__new__(cls) + + @abstractmethod + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + pass + + @abstractmethod + def get_sel(self) -> List[int]: + """Returns the number of selected neighboring atoms for each type.""" + pass + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return sum(self.get_sel()) + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.get_nsel() + + @abstractmethod + def get_ntypes(self) -> int: + """Returns the number of element types.""" + pass + + @abstractmethod + def get_dim_out(self) -> int: + """Returns the output descriptor dimension.""" + pass + + @abstractmethod + def get_dim_emb(self) -> int: + """Returns the embedding dimension of g2.""" + pass + + @abstractmethod + def mixed_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + pass + + @abstractmethod + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + pass + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + + @abstractmethod + def fwd( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[t_tensor] = None, + ): + """Calculate descriptor.""" + pass + + @abstractmethod + def serialize(self) -> dict: + """Serialize the obj to dict.""" + pass + + @classmethod + def deserialize(cls, data: dict) -> "BD": + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + BD + The deserialized descriptor + """ + if cls is BD: + return BD.get_class_by_type(data["type"]).deserialize(data) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + @classmethod + @abstractmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + # call subprocess + cls = cls.get_class_by_type(j_get_type(local_jdata, cls.__name__)) + return cls.update_sel(global_jdata, local_jdata) + + setattr(BD, fwd_method_name, BD.fwd) + delattr(BD, "fwd") + + return BD diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py similarity index 55% rename from deepmd_utils/model_format/se_e2_a.py rename to deepmd/dpmodel/descriptor/se_e2_a.py index b9143ee360..8d926034dd 100644 --- a/deepmd_utils/model_format/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -1,8 +1,23 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import itertools + import numpy as np +from deepmd.dpmodel.utils.update_sel import ( + UpdateSel, +) +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + try: - from deepmd_utils._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: __version__ = "unknown" @@ -11,22 +26,29 @@ Any, List, Optional, + Tuple, ) -from .common import ( +from deepmd.dpmodel import ( DEFAULT_PRECISION, + PRECISION_DICT, NativeOP, ) -from .env_mat import ( - EnvMat, -) -from .network import ( +from deepmd.dpmodel.utils import ( EmbeddingNet, + EnvMat, NetworkCollection, + PairExcludeMask, +) + +from .base_descriptor import ( + BaseDescriptor, ) -class DescrptSeA(NativeOP): +@BaseDescriptor.register("se_e2_a") +@BaseDescriptor.register("se_a") +class DescrptSeA(NativeOP, BaseDescriptor): r"""DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input. @@ -65,7 +87,7 @@ class DescrptSeA(NativeOP): :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at - :meth:`deepmd.utils.network.embedding_net`. + :meth:`deepmd.tf.utils.network.embedding_net`. Parameters ---------- @@ -73,7 +95,7 @@ class DescrptSeA(NativeOP): The cut-off radius :math:`r_c` rcut_smth From where the environment matrix should be smoothed :math:`r_s` - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` @@ -89,6 +111,8 @@ class DescrptSeA(NativeOP): exclude_types : List[List[int]] The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection: float + Protection parameter to prevent division by zero errors during environment matrix calculations. set_davg_zero Set the shift of embedding net input to zero. activation_function @@ -120,23 +144,22 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, trainable: bool = True, type_one_side: bool = True, exclude_types: List[List[int]] = [], + env_protection: float = 0.0, set_davg_zero: bool = False, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, spin: Optional[Any] = None, + # consistent with argcheck, not used though + seed: Optional[int] = None, ) -> None: ## seed, uniform_seed, multi_task, not included. - if not type_one_side: - raise NotImplementedError("type_one_side == False not implemented") - if exclude_types != []: - raise NotImplementedError("exclude_types is not implemented") if spin is not None: raise NotImplementedError("spin is not implemented") @@ -149,11 +172,13 @@ def __init__( self.resnet_dt = resnet_dt self.trainable = trainable self.type_one_side = type_one_side - self.exclude_types = exclude_types + self.env_protection = env_protection self.set_davg_zero = set_davg_zero self.activation_function = activation_function self.precision = precision self.spin = spin + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) in_dim = 1 # not considiering type embedding self.embeddings = NetworkCollection( @@ -161,19 +186,24 @@ def __init__( ndim=(1 if self.type_one_side else 2), network_type="embedding_network", ) - for ii in range(self.ntypes): - self.embeddings[(ii,)] = EmbeddingNet( + for embedding_idx in itertools.product( + range(self.ntypes), repeat=self.embeddings.ndim + ): + self.embeddings[embedding_idx] = EmbeddingNet( in_dim, self.neuron, self.activation_function, self.resnet_dt, self.precision, ) - self.env_mat = EnvMat(self.rcut, self.rcut_smth) + self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) - self.nneix4 = self.nnei * 4 - self.davg = np.zeros([self.ntypes, self.nneix4]) - self.dstd = np.ones([self.ntypes, self.nneix4]) + self.davg = np.zeros( + [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision] + ) + self.dstd = np.ones( + [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision] + ) self.orig_sel = self.sel def __setitem__(self, key, value): @@ -192,22 +222,73 @@ def __getitem__(self, key): else: raise KeyError(key) + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.get_dim_out() + + def get_dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.neuron[-1] * self.axis_neuron + + def get_dim_emb(self): + """Returns the embedding (g2) dimension of this descriptor.""" + return self.neuron[-1] + + def get_rcut(self): + """Returns cutoff radius.""" + return self.rcut + + def get_sel(self): + """Returns cutoff radius.""" + return self.sel + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return False + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None): + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + def cal_g( self, ss, - ll, + embedding_idx, ): - nf, nloc, nnei = ss.shape[0:3] - ss = ss.reshape(nf, nloc, nnei, 1) - # nf x nloc x nnei x ng - gg = self.embeddings[(ll,)].call(ss) + nf_times_nloc, nnei = ss.shape[0:2] + ss = ss.reshape(nf_times_nloc, nnei, 1) + # (nf x nloc) x nnei x ng + gg = self.embeddings[embedding_idx].call(ss) return gg + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + def call( self, coord_ext, atype_ext, nlist, + mapping: Optional[np.ndarray] = None, ): """Compute the descriptor. @@ -219,35 +300,79 @@ def call( The extended aotm types. shape: nf x nall nlist The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping from extended to lcoal region. not used by this descriptor. Returns ------- descriptor - The descriptor. shape: nf x nloc x ng x axis_neuron + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. """ + del mapping # nf x nloc x nnei x 4 rr, ww = self.env_mat.call(coord_ext, atype_ext, nlist, self.davg, self.dstd) nf, nloc, nnei, _ = rr.shape sec = np.append([0], np.cumsum(self.sel)) ng = self.neuron[-1] - gr = np.zeros([nf, nloc, ng, 4]) - for tt in range(self.ntypes): - tr = rr[:, :, sec[tt] : sec[tt + 1], :] + gr = np.zeros([nf * nloc, ng, 4], dtype=PRECISION_DICT[self.precision]) + exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext) + # merge nf and nloc axis, so for type_one_side == False, + # we don't require atype is the same in all frames + exclude_mask = exclude_mask.reshape(nf * nloc, nnei) + rr = rr.reshape(nf * nloc, nnei, 4) + + for embedding_idx in itertools.product( + range(self.ntypes), repeat=self.embeddings.ndim + ): + if self.type_one_side: + (tt,) = embedding_idx + ti_mask = np.s_[:] + else: + ti, tt = embedding_idx + ti_mask = atype_ext[:, :nloc].ravel() == ti + mm = exclude_mask[ti_mask, sec[tt] : sec[tt + 1]] + tr = rr[ti_mask, sec[tt] : sec[tt + 1], :] + tr = tr * mm[:, :, None] ss = tr[..., 0:1] - gg = self.cal_g(ss, tt) - # nf x nloc x ng x 4 - gr += np.einsum("flni,flnj->flij", gg, tr) + gg = self.cal_g(ss, embedding_idx) + gr_tmp = np.einsum("lni,lnj->lij", gg, tr) + gr[ti_mask] += gr_tmp + gr = gr.reshape(nf, nloc, ng, 4) + # nf x nloc x ng x 4 gr /= self.nnei gr1 = gr[:, :, : self.axis_neuron, :] # nf x nloc x ng x ng1 grrg = np.einsum("flid,fljd->flij", gr, gr1) # nf x nloc x (ng x ng1) - grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron) - return grrg + grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron).astype( + GLOBAL_NP_FLOAT_PRECISION + ) + return grrg, gr[..., 1:], None, None, ww def serialize(self) -> dict: + """Serialize the descriptor to dict.""" + if not self.type_one_side and self.exclude_types: + for embedding_idx in itertools.product(range(self.ntypes), repeat=2): + # not actually used; to match serilization data from TF to pass the test + if embedding_idx in self.emask: + self.embeddings[embedding_idx].clear() + return { + "@class": "Descriptor", + "type": "se_e2_a", + "@version": 1, "rcut": self.rcut, "rcut_smth": self.rcut_smth, "sel": self.sel, @@ -257,9 +382,11 @@ def serialize(self) -> dict: "trainable": self.trainable, "type_one_side": self.type_one_side, "exclude_types": self.exclude_types, + "env_protection": self.env_protection, "set_davg_zero": self.set_davg_zero, "activation_function": self.activation_function, - "precision": self.precision, + # make deterministic + "precision": np.dtype(PRECISION_DICT[self.precision]).name, "spin": self.spin, "env_mat": self.env_mat.serialize(), "embeddings": self.embeddings.serialize(), @@ -271,7 +398,11 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "DescrptSeA": + """Deserialize from dict.""" data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("type", None) variables = data.pop("@variables") embeddings = data.pop("embeddings") env_mat = data.pop("env_mat") @@ -280,5 +411,18 @@ def deserialize(cls, data: dict) -> "DescrptSeA": obj["davg"] = variables["davg"] obj["dstd"] = variables["dstd"] obj.embeddings = NetworkCollection.deserialize(embeddings) - obj.env_mat = EnvMat.deserialize(env_mat) return obj + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py new file mode 100644 index 0000000000..9c9b4e096e --- /dev/null +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -0,0 +1,353 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + +from deepmd.dpmodel.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +try: + from deepmd._version import version as __version__ +except ImportError: + __version__ = "unknown" + +import copy +from typing import ( + Any, + List, + Optional, +) + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, + PRECISION_DICT, + NativeOP, +) +from deepmd.dpmodel.utils import ( + EmbeddingNet, + EnvMat, + NetworkCollection, + PairExcludeMask, +) +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) + +from .base_descriptor import ( + BaseDescriptor, +) + + +@BaseDescriptor.register("se_e2_r") +@BaseDescriptor.register("se_r") +class DescrptSeR(NativeOP, BaseDescriptor): + r"""DeepPot-SE_R constructed from only the radial imformation of atomic configurations. + + + Parameters + ---------- + rcut + The cut-off radius :math:`r_c` + rcut_smth + From where the environment matrix should be smoothed :math:`r_s` + sel : list[int] + sel[i] specifies the maxmum number of type i atoms in the cut-off radius + neuron : list[int] + Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` + resnet_dt + Time-step `dt` in the resnet construction: + y = x + dt * \phi (Wx + b) + trainable + If the weights of embedding net are trainable. + type_one_side + Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets + exclude_types : List[List[int]] + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + set_davg_zero + Set the shift of embedding net input to zero. + activation_function + The activation function in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + multi_task + If the model has multi fitting nets to train. + spin + The deepspin object. + + Limitations + ----------- + The currently implementation does not support the following features + + 1. type_one_side == False + 2. exclude_types != [] + 3. spin is not None + + References + ---------- + .. [1] Linfeng Zhang, Jiequn Han, Han Wang, Wissam A. Saidi, Roberto Car, and E. Weinan. 2018. + End-to-end symmetry preserving inter-atomic potential energy model for finite and extended + systems. In Proceedings of the 32nd International Conference on Neural Information Processing + Systems (NIPS'18). Curran Associates Inc., Red Hook, NY, USA, 4441-4451. + """ + + def __init__( + self, + rcut: float, + rcut_smth: float, + sel: List[int], + neuron: List[int] = [24, 48, 96], + resnet_dt: bool = False, + trainable: bool = True, + type_one_side: bool = True, + exclude_types: List[List[int]] = [], + env_protection: float = 0.0, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + spin: Optional[Any] = None, + # consistent with argcheck, not used though + seed: Optional[int] = None, + ) -> None: + ## seed, uniform_seed, multi_task, not included. + if not type_one_side: + raise NotImplementedError("type_one_side == False not implemented") + if spin is not None: + raise NotImplementedError("spin is not implemented") + + self.rcut = rcut + self.rcut_smth = rcut_smth + self.sel = sel + self.ntypes = len(self.sel) + self.neuron = neuron + self.resnet_dt = resnet_dt + self.trainable = trainable + self.type_one_side = type_one_side + self.exclude_types = exclude_types + self.set_davg_zero = set_davg_zero + self.activation_function = activation_function + self.precision = precision + self.spin = spin + self.emask = PairExcludeMask(self.ntypes, self.exclude_types) + self.env_protection = env_protection + + in_dim = 1 # not considiering type embedding + self.embeddings = NetworkCollection( + ntypes=self.ntypes, + ndim=(1 if self.type_one_side else 2), + network_type="embedding_network", + ) + if not self.type_one_side: + raise NotImplementedError("type_one_side == False not implemented") + for ii in range(self.ntypes): + self.embeddings[(ii,)] = EmbeddingNet( + in_dim, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + ) + self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) + self.nnei = np.sum(self.sel) + self.davg = np.zeros( + [self.ntypes, self.nnei, 1], dtype=PRECISION_DICT[self.precision] + ) + self.dstd = np.ones( + [self.ntypes, self.nnei, 1], dtype=PRECISION_DICT[self.precision] + ) + self.orig_sel = self.sel + + def __setitem__(self, key, value): + if key in ("avg", "data_avg", "davg"): + self.davg = value + elif key in ("std", "data_std", "dstd"): + self.dstd = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.davg + elif key in ("std", "data_std", "dstd"): + return self.dstd + else: + raise KeyError(key) + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.get_dim_out() + + def get_dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.neuron[-1] + + def get_dim_emb(self): + """Returns the embedding (g2) dimension of this descriptor.""" + raise NotImplementedError + + def get_rcut(self): + """Returns cutoff radius.""" + return self.rcut + + def get_sel(self): + """Returns cutoff radius.""" + return self.sel + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return False + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None): + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + + def cal_g( + self, + ss, + ll, + ): + nf, nloc, nnei = ss.shape[0:3] + ss = ss.reshape(nf, nloc, nnei, 1) + # nf x nloc x nnei x ng + gg = self.embeddings[(ll,)].call(ss) + return gg + + def call( + self, + coord_ext, + atype_ext, + nlist, + mapping: Optional[np.ndarray] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping from extended to lcoal region. not used by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. + """ + del mapping + # nf x nloc x nnei x 1 + rr, ww = self.env_mat.call( + coord_ext, atype_ext, nlist, self.davg, self.dstd, True + ) + nf, nloc, nnei, _ = rr.shape + sec = np.append([0], np.cumsum(self.sel)) + + ng = self.neuron[-1] + xyz_scatter = np.zeros([nf, nloc, ng], dtype=PRECISION_DICT[self.precision]) + exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext) + for tt in range(self.ntypes): + mm = exclude_mask[:, :, sec[tt] : sec[tt + 1]] + tr = rr[:, :, sec[tt] : sec[tt + 1], :] + tr = tr * mm[:, :, :, None] + gg = self.cal_g(tr, tt) + gg = np.mean(gg, axis=2) + # nf x nloc x ng x 1 + xyz_scatter += gg * (self.sel[tt] / self.nnei) + + res_rescale = 1.0 / 5.0 + res = xyz_scatter * res_rescale + res = res.reshape(nf, nloc, -1).astype(GLOBAL_NP_FLOAT_PRECISION) + return res, None, None, None, ww + + def serialize(self) -> dict: + """Serialize the descriptor to dict.""" + return { + "@class": "Descriptor", + "type": "se_r", + "@version": 1, + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "sel": self.sel, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "trainable": self.trainable, + "type_one_side": self.type_one_side, + "exclude_types": self.exclude_types, + "env_protection": self.env_protection, + "set_davg_zero": self.set_davg_zero, + "activation_function": self.activation_function, + # make deterministic + "precision": np.dtype(PRECISION_DICT[self.precision]).name, + "spin": self.spin, + "env_mat": self.env_mat.serialize(), + "embeddings": self.embeddings.serialize(), + "@variables": { + "davg": self.davg, + "dstd": self.dstd, + }, + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptSeR": + """Deserialize from dict.""" + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("type", None) + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + env_mat = data.pop("env_mat") + obj = cls(**data) + + obj["davg"] = variables["davg"] + obj["dstd"] = variables["dstd"] + obj.embeddings = NetworkCollection.deserialize(embeddings) + return obj + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/dpmodel/fitting/__init__.py b/deepmd/dpmodel/fitting/__init__.py new file mode 100644 index 0000000000..866a710a3b --- /dev/null +++ b/deepmd/dpmodel/fitting/__init__.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .dipole_fitting import ( + DipoleFitting, +) +from .dos_fitting import ( + DOSFittingNet, +) +from .ener_fitting import ( + EnergyFittingNet, +) +from .invar_fitting import ( + InvarFitting, +) +from .make_base_fitting import ( + make_base_fitting, +) +from .polarizability_fitting import ( + PolarFitting, +) + +__all__ = [ + "InvarFitting", + "make_base_fitting", + "DipoleFitting", + "EnergyFittingNet", + "PolarFitting", + "DOSFittingNet", +] diff --git a/deepmd/dpmodel/fitting/base_fitting.py b/deepmd/dpmodel/fitting/base_fitting.py new file mode 100644 index 0000000000..bb1853a4a0 --- /dev/null +++ b/deepmd/dpmodel/fitting/base_fitting.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + +from .make_base_fitting import ( + make_base_fitting, +) + +BaseFitting = make_base_fitting(np.ndarray, fwd_method_name="call") diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py new file mode 100644 index 0000000000..6d6324770c --- /dev/null +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -0,0 +1,224 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Any, + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, +) +from deepmd.dpmodel.fitting.base_fitting import ( + BaseFitting, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .general_fitting import ( + GeneralFitting, +) + + +@BaseFitting.register("dipole") +@fitting_check_output +class DipoleFitting(GeneralFitting): + r"""Fitting rotationally equivariant diploe of the system. + + Parameters + ---------- + var_name + The name of the output variable. + ntypes + The number of atom types. + dim_descrpt + The dimension of the input descriptor. + embedding_width : int + The dimension of rotation matrix, m1. + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + mixed_types + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + exclude_types + Atomic contributions of the excluded atom types are set zero. + r_differentiable + If the variable is differentiated with respect to coordinates of atoms. + Only reduciable variable are differentiable. + c_differentiable + If the variable is differentiated with respect to the cell tensor (pbc case). + Only reduciable variable are differentiable. + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + embedding_width: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + mixed_types: bool = False, + exclude_types: List[int] = [], + r_differentiable: bool = True, + c_differentiable: bool = True, + old_impl=False, + # not used + seed: Optional[int] = None, + ): + # seed, uniform_seed are not included + if tot_ener_zero: + raise NotImplementedError("tot_ener_zero is not implemented") + if spin is not None: + raise NotImplementedError("spin is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if layer_name is not None: + raise NotImplementedError("layer_name is not implemented") + + self.embedding_width = embedding_width + self.r_differentiable = r_differentiable + self.c_differentiable = c_differentiable + super().__init__( + var_name=var_name, + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + rcond=rcond, + tot_ener_zero=tot_ener_zero, + trainable=trainable, + activation_function=activation_function, + precision=precision, + layer_name=layer_name, + use_aparam_as_mask=use_aparam_as_mask, + spin=spin, + mixed_types=mixed_types, + exclude_types=exclude_types, + ) + self.old_impl = False + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return self.embedding_width + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "dipole" + data["embedding_width"] = self.embedding_width + data["old_impl"] = self.old_impl + data["r_differentiable"] = self.r_differentiable + data["c_differentiable"] = self.c_differentiable + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + return super().deserialize(data) + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [3], + reduciable=True, + r_differentiable=self.r_differentiable, + c_differentiable=self.c_differentiable, + ), + ] + ) + + def call( + self, + descriptor: np.ndarray, + atype: np.ndarray, + gr: Optional[np.ndarray] = None, + g2: Optional[np.ndarray] = None, + h2: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Calculate the fitting. + + Parameters + ---------- + descriptor + input descriptor. shape: nf x nloc x nd + atype + the atom type. shape: nf x nloc + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + fparam + The frame parameter. shape: nf x nfp. nfp being `numb_fparam` + aparam + The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` + + """ + nframes, nloc, _ = descriptor.shape + assert gr is not None, "Must provide the rotation matrix for dipole fitting." + # (nframes, nloc, m1) + out = self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam)[ + self.var_name + ] + # (nframes * nloc, 1, m1) + out = out.reshape(-1, 1, self.embedding_width) + # (nframes * nloc, m1, 3) + gr = gr.reshape(nframes * nloc, -1, 3) + # (nframes, nloc, 3) + out = np.einsum("bim,bmj->bij", out, gr).squeeze(-2).reshape(nframes, nloc, 3) + return {self.var_name: out} diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py new file mode 100644 index 0000000000..7c86d392b0 --- /dev/null +++ b/deepmd/dpmodel/fitting/dos_fitting.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + TYPE_CHECKING, + List, + Optional, + Union, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + DEFAULT_PRECISION, +) +from deepmd.dpmodel.fitting.invar_fitting import ( + InvarFitting, +) + +if TYPE_CHECKING: + from deepmd.dpmodel.fitting.general_fitting import ( + GeneralFitting, + ) + +from deepmd.utils.version import ( + check_version_compatibility, +) + + +@InvarFitting.register("dos") +class DOSFittingNet(InvarFitting): + def __init__( + self, + ntypes: int, + dim_descrpt: int, + numb_dos: int = 300, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + bias_dos: Optional[np.ndarray] = None, + rcond: Optional[float] = None, + trainable: Union[bool, List[bool]] = True, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = False, + exclude_types: List[int] = [], + # not used + seed: Optional[int] = None, + ): + if bias_dos is not None: + self.bias_dos = bias_dos + else: + self.bias_dos = np.zeros((ntypes, numb_dos), dtype=DEFAULT_PRECISION) + super().__init__( + var_name="dos", + ntypes=ntypes, + dim_descrpt=dim_descrpt, + dim_out=numb_dos, + neuron=neuron, + resnet_dt=resnet_dt, + bias_atom=bias_dos, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + rcond=rcond, + trainable=trainable, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + exclude_types=exclude_types, + ) + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data["numb_dos"] = data.pop("dim_out") + data.pop("tot_ener_zero", None) + data.pop("var_name", None) + data.pop("layer_name", None) + data.pop("use_aparam_as_mask", None) + data.pop("spin", None) + data.pop("atom_ener", None) + return super().deserialize(data) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + dd = { + **super().serialize(), + "type": "dos", + } + dd["@variables"]["bias_atom_e"] = self.bias_atom_e + + return dd diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py new file mode 100644 index 0000000000..7f83f1e886 --- /dev/null +++ b/deepmd/dpmodel/fitting/ener_fitting.py @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, +) + +from deepmd.dpmodel.common import ( + DEFAULT_PRECISION, +) +from deepmd.dpmodel.fitting.invar_fitting import ( + InvarFitting, +) + +if TYPE_CHECKING: + from deepmd.dpmodel.fitting.general_fitting import ( + GeneralFitting, + ) +from deepmd.utils.version import ( + check_version_compatibility, +) + + +@InvarFitting.register("ener") +class EnergyFittingNet(InvarFitting): + def __init__( + self, + ntypes: int, + dim_descrpt: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + atom_ener: Optional[List[float]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + mixed_types: bool = False, + exclude_types: List[int] = [], + # not used + seed: Optional[int] = None, + ): + super().__init__( + var_name="energy", + ntypes=ntypes, + dim_descrpt=dim_descrpt, + dim_out=1, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + rcond=rcond, + tot_ener_zero=tot_ener_zero, + trainable=trainable, + atom_ener=atom_ener, + activation_function=activation_function, + precision=precision, + layer_name=layer_name, + use_aparam_as_mask=use_aparam_as_mask, + spin=spin, + mixed_types=mixed_types, + exclude_types=exclude_types, + ) + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("var_name") + data.pop("dim_out") + return super().deserialize(data) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + **super().serialize(), + "type": "ener", + } diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py new file mode 100644 index 0000000000..5681f5bf0c --- /dev/null +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -0,0 +1,388 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from abc import ( + abstractmethod, +) +from typing import ( + Any, + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, + NativeOP, +) +from deepmd.dpmodel.utils import ( + AtomExcludeMask, + FittingNet, + NetworkCollection, +) + +from .base_fitting import ( + BaseFitting, +) + + +class GeneralFitting(NativeOP, BaseFitting): + r"""General fitting class. + + Parameters + ---------- + var_name + The name of the output variable. + ntypes + The number of atom types. + dim_descrpt + The dimension of the input descriptor. + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + bias_atom_e + Average enery per atom for each element. + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + mixed_types + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + exclude_types: List[int] + Atomic contributions of the excluded atom types are set zero. + remove_vaccum_contribution: List[bool], optional + Remove vaccum contribution before the bias is added. The list assigned each + type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same + length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list. + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + bias_atom_e: Optional[np.ndarray] = None, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + mixed_types: bool = True, + exclude_types: List[int] = [], + remove_vaccum_contribution: Optional[List[bool]] = None, + ): + self.var_name = var_name + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt + self.neuron = neuron + self.resnet_dt = resnet_dt + self.numb_fparam = numb_fparam + self.numb_aparam = numb_aparam + self.rcond = rcond + self.tot_ener_zero = tot_ener_zero + self.trainable = trainable + if self.trainable is None: + self.trainable = [True for ii in range(len(self.neuron) + 1)] + if isinstance(self.trainable, bool): + self.trainable = [self.trainable] * (len(self.neuron) + 1) + self.activation_function = activation_function + self.precision = precision + self.layer_name = layer_name + self.use_aparam_as_mask = use_aparam_as_mask + self.spin = spin + self.mixed_types = mixed_types + # order matters, should be place after the assignment of ntypes + self.reinit_exclude(exclude_types) + if self.spin is not None: + raise NotImplementedError("spin is not supported") + self.remove_vaccum_contribution = remove_vaccum_contribution + + net_dim_out = self._net_out_dim() + # init constants + if bias_atom_e is None: + self.bias_atom_e = np.zeros([self.ntypes, net_dim_out]) + else: + assert bias_atom_e.shape == (self.ntypes, net_dim_out) + self.bias_atom_e = bias_atom_e + if self.numb_fparam > 0: + self.fparam_avg = np.zeros(self.numb_fparam) + self.fparam_inv_std = np.ones(self.numb_fparam) + else: + self.fparam_avg, self.fparam_inv_std = None, None + if self.numb_aparam > 0: + self.aparam_avg = np.zeros(self.numb_aparam) + self.aparam_inv_std = np.ones(self.numb_aparam) + else: + self.aparam_avg, self.aparam_inv_std = None, None + # init networks + in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam + self.nets = NetworkCollection( + 1 if not self.mixed_types else 0, + self.ntypes, + network_type="fitting_network", + networks=[ + FittingNet( + in_dim, + net_dim_out, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + bias_out=True, + ) + for ii in range(self.ntypes if not self.mixed_types else 1) + ], + ) + + @abstractmethod + def _net_out_dim(self): + """Set the FittingNet output dim.""" + pass + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.numb_fparam + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.numb_aparam + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return [ii for ii in range(self.ntypes) if ii not in self.exclude_types] + + def __setitem__(self, key, value): + if key in ["bias_atom_e"]: + self.bias_atom_e = value + elif key in ["fparam_avg"]: + self.fparam_avg = value + elif key in ["fparam_inv_std"]: + self.fparam_inv_std = value + elif key in ["aparam_avg"]: + self.aparam_avg = value + elif key in ["aparam_inv_std"]: + self.aparam_inv_std = value + elif key in ["scale"]: + self.scale = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ["bias_atom_e"]: + return self.bias_atom_e + elif key in ["fparam_avg"]: + return self.fparam_avg + elif key in ["fparam_inv_std"]: + return self.fparam_inv_std + elif key in ["aparam_avg"]: + return self.aparam_avg + elif key in ["aparam_inv_std"]: + return self.aparam_inv_std + elif key in ["scale"]: + return self.scale + else: + raise KeyError(key) + + def reinit_exclude( + self, + exclude_types: List[int] = [], + ): + self.exclude_types = exclude_types + self.emask = AtomExcludeMask(self.ntypes, self.exclude_types) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + "@class": "Fitting", + "@version": 1, + "var_name": self.var_name, + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "rcond": self.rcond, + "activation_function": self.activation_function, + "precision": self.precision, + "mixed_types": self.mixed_types, + "exclude_types": self.exclude_types, + "nets": self.nets.serialize(), + "@variables": { + "bias_atom_e": self.bias_atom_e, + "fparam_avg": self.fparam_avg, + "fparam_inv_std": self.fparam_inv_std, + "aparam_avg": self.aparam_avg, + "aparam_inv_std": self.aparam_inv_std, + }, + # not supported + "tot_ener_zero": self.tot_ener_zero, + "trainable": self.trainable, + "layer_name": self.layer_name, + "use_aparam_as_mask": self.use_aparam_as_mask, + "spin": self.spin, + } + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + data.pop("@class") + data.pop("type") + variables = data.pop("@variables") + nets = data.pop("nets") + obj = cls(**data) + for kk in variables.keys(): + obj[kk] = variables[kk] + obj.nets = NetworkCollection.deserialize(nets) + return obj + + def _call_common( + self, + descriptor: np.ndarray, + atype: np.ndarray, + gr: Optional[np.ndarray] = None, + g2: Optional[np.ndarray] = None, + h2: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Calculate the fitting. + + Parameters + ---------- + descriptor + input descriptor. shape: nf x nloc x nd + atype + the atom type. shape: nf x nloc + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + fparam + The frame parameter. shape: nf x nfp. nfp being `numb_fparam` + aparam + The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` + + """ + nf, nloc, nd = descriptor.shape + net_dim_out = self._net_out_dim() + # check input dim + if nd != self.dim_descrpt: + raise ValueError( + "get an input descriptor of dim {nd}," + "which is not consistent with {self.dim_descrpt}." + ) + xx = descriptor + if self.remove_vaccum_contribution is not None: + # TODO: comput the input for vaccum when setting remove_vaccum_contribution + # Idealy, the input for vaccum should be computed; + # we consider it as always zero for convenience. + # Needs a compute_input_stats for vaccum passed from the + # descriptor. + xx_zeros = np.zeros_like(xx) + else: + xx_zeros = None + # check fparam dim, concate to input descriptor + if self.numb_fparam > 0: + assert fparam is not None, "fparam should not be None" + if fparam.shape[-1] != self.numb_fparam: + raise ValueError( + "get an input fparam of dim {fparam.shape[-1]}, ", + "which is not consistent with {self.numb_fparam}.", + ) + fparam = (fparam - self.fparam_avg) * self.fparam_inv_std + fparam = np.tile(fparam.reshape([nf, 1, self.numb_fparam]), [1, nloc, 1]) + xx = np.concatenate( + [xx, fparam], + axis=-1, + ) + if xx_zeros is not None: + xx_zeros = np.concatenate( + [xx_zeros, fparam], + axis=-1, + ) + # check aparam dim, concate to input descriptor + if self.numb_aparam > 0: + assert aparam is not None, "aparam should not be None" + if aparam.shape[-1] != self.numb_aparam: + raise ValueError( + "get an input aparam of dim {aparam.shape[-1]}, ", + "which is not consistent with {self.numb_aparam}.", + ) + aparam = aparam.reshape([nf, nloc, self.numb_aparam]) + aparam = (aparam - self.aparam_avg) * self.aparam_inv_std + xx = np.concatenate( + [xx, aparam], + axis=-1, + ) + if xx_zeros is not None: + xx_zeros = np.concatenate( + [xx_zeros, aparam], + axis=-1, + ) + + # calcualte the prediction + if not self.mixed_types: + outs = np.zeros([nf, nloc, net_dim_out]) + for type_i in range(self.ntypes): + mask = np.tile( + (atype == type_i).reshape([nf, nloc, 1]), [1, 1, net_dim_out] + ) + atom_property = self.nets[(type_i,)](xx) + if self.remove_vaccum_contribution is not None and not ( + len(self.remove_vaccum_contribution) > type_i + and not self.remove_vaccum_contribution[type_i] + ): + assert xx_zeros is not None + atom_property -= self.nets[(type_i,)](xx_zeros) + atom_property = atom_property + self.bias_atom_e[type_i] + atom_property = atom_property * mask + outs = outs + atom_property # Shape is [nframes, natoms[0], 1] + else: + outs = self.nets[()](xx) + self.bias_atom_e[atype] + if xx_zeros is not None: + outs -= self.nets[()](xx_zeros) + # nf x nloc + exclude_mask = self.emask.build_type_exclude_mask(atype) + # nf x nloc x nod + outs = outs * exclude_mask[:, :, None] + return {self.var_name: outs} diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py new file mode 100644 index 0000000000..9bf1731830 --- /dev/null +++ b/deepmd/dpmodel/fitting/invar_fitting.py @@ -0,0 +1,240 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Any, + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .general_fitting import ( + GeneralFitting, +) + + +@GeneralFitting.register("invar") +@fitting_check_output +class InvarFitting(GeneralFitting): + r"""Fitting the energy (or a rotationally invariant porperty of `dim_out`) of the system. The force and the virial can also be trained. + + Lets take the energy fitting task as an example. + The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`: + + .. math:: + E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)} + \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)} + + The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by + + .. math:: + \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})= + \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}) + + where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` + is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and + :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, + both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}` + is the activation function. + + The output layer :math:`\mathcal{L}^{(n)}` is given by + + .. math:: + \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})= + \mathbf{x}^T\mathbf{w}+\mathbf{b} + + where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` + is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and + :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively, + both of which are trainable if `trainable[n]` is `True`. + + Parameters + ---------- + var_name + The name of the output variable. + ntypes + The number of atom types. + dim_descrpt + The dimension of the input descriptor. + dim_out + The dimension of the output fit property. + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + bias_atom + Bias for each element. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + atom_ener + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + mixed_types + If false, different atomic types uses different fitting net, otherwise different atom types share the same fitting net. + exclude_types: List[int] + Atomic contributions of the excluded atom types are set zero. + + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + dim_out: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + bias_atom: Optional[np.ndarray] = None, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + atom_ener: Optional[List[float]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + mixed_types: bool = True, + exclude_types: List[int] = [], + ): + # seed, uniform_seed are not included + if tot_ener_zero: + raise NotImplementedError("tot_ener_zero is not implemented") + if spin is not None: + raise NotImplementedError("spin is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if layer_name is not None: + raise NotImplementedError("layer_name is not implemented") + + self.dim_out = dim_out + self.atom_ener = atom_ener + super().__init__( + var_name=var_name, + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + rcond=rcond, + bias_atom_e=bias_atom, + tot_ener_zero=tot_ener_zero, + trainable=trainable, + activation_function=activation_function, + precision=precision, + layer_name=layer_name, + use_aparam_as_mask=use_aparam_as_mask, + spin=spin, + mixed_types=mixed_types, + exclude_types=exclude_types, + remove_vaccum_contribution=None + if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0 + else [x is not None for x in atom_ener], + ) + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "invar" + data["dim_out"] = self.dim_out + data["atom_ener"] = self.atom_ener + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + return super().deserialize(data) + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return self.dim_out + + def compute_output_stats(self, merged): + """Update the output bias for fitting net.""" + raise NotImplementedError + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [self.dim_out], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ), + ] + ) + + def call( + self, + descriptor: np.ndarray, + atype: np.ndarray, + gr: Optional[np.ndarray] = None, + g2: Optional[np.ndarray] = None, + h2: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Calculate the fitting. + + Parameters + ---------- + descriptor + input descriptor. shape: nf x nloc x nd + atype + the atom type. shape: nf x nloc + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + fparam + The frame parameter. shape: nf x nfp. nfp being `numb_fparam` + aparam + The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` + + """ + return self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam) diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py new file mode 100644 index 0000000000..c7341798c3 --- /dev/null +++ b/deepmd/dpmodel/fitting/make_base_fitting.py @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Dict, + Optional, +) + +from deepmd.common import ( + j_get_type, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_fitting( + t_tensor, + fwd_method_name: str = "forward", +): + """Make the base class for the fitting. + + Parameters + ---------- + t_tensor + The type of the tensor. used in the type hint. + fwd_method_name + Name of the forward method. For dpmodels, it should be "call". + For torch models, it should be "forward". + + """ + + class BF(ABC, PluginVariant, make_plugin_registry("fitting")): + """Base fitting provides the interfaces of fitting net.""" + + def __new__(cls, *args, **kwargs): + if cls is BF: + cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__)) + return super().__new__(cls) + + @abstractmethod + def output_def(self) -> FittingOutputDef: + """Returns the output def of the fitting net.""" + pass + + @abstractmethod + def fwd( + self, + descriptor: t_tensor, + atype: t_tensor, + gr: Optional[t_tensor] = None, + g2: Optional[t_tensor] = None, + h2: Optional[t_tensor] = None, + fparam: Optional[t_tensor] = None, + aparam: Optional[t_tensor] = None, + ) -> Dict[str, t_tensor]: + """Calculate fitting.""" + pass + + def compute_output_stats(self, merged): + """Update the output bias for fitting net.""" + raise NotImplementedError + + @abstractmethod + def serialize(self) -> dict: + """Serialize the obj to dict.""" + pass + + @classmethod + def deserialize(cls, data: dict) -> "BF": + """Deserialize the fitting. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + BF + The deserialized fitting + """ + if cls is BF: + return BF.get_class_by_type(data["type"]).deserialize(data) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + setattr(BF, fwd_method_name, BF.fwd) + delattr(BF, "fwd") + + return BF diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py new file mode 100644 index 0000000000..5d75037137 --- /dev/null +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -0,0 +1,284 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Any, + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.common import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.dpmodel import ( + DEFAULT_PRECISION, +) +from deepmd.dpmodel.fitting.base_fitting import ( + BaseFitting, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .general_fitting import ( + GeneralFitting, +) + + +@BaseFitting.register("polar") +@fitting_check_output +class PolarFitting(GeneralFitting): + r"""Fitting rotationally equivariant polarizability of the system. + + Parameters + ---------- + var_name + The name of the output variable. + ntypes + The number of atom types. + dim_descrpt + The dimension of the input descriptor. + embedding_width : int + The dimension of rotation matrix, m1. + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + mixed_types + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + fit_diag : bool + Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to + normal polarizability matrix by contracting with the rotation matrix. + scale : List[float] + The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i] + shift_diag : bool + Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale. + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + embedding_width: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + mixed_types: bool = False, + exclude_types: List[int] = [], + old_impl: bool = False, + fit_diag: bool = True, + scale: Optional[List[float]] = None, + shift_diag: bool = True, + # not used + seed: Optional[int] = None, + ): + # seed, uniform_seed are not included + if tot_ener_zero: + raise NotImplementedError("tot_ener_zero is not implemented") + if spin is not None: + raise NotImplementedError("spin is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if layer_name is not None: + raise NotImplementedError("layer_name is not implemented") + + self.embedding_width = embedding_width + self.fit_diag = fit_diag + self.scale = scale + if self.scale is None: + self.scale = [1.0 for _ in range(ntypes)] + else: + if isinstance(self.scale, list): + assert ( + len(self.scale) == ntypes + ), "Scale should be a list of length ntypes." + elif isinstance(self.scale, float): + self.scale = [self.scale for _ in range(ntypes)] + else: + raise ValueError( + "Scale must be a list of float of length ntypes or a float." + ) + self.scale = np.array(self.scale, dtype=GLOBAL_NP_FLOAT_PRECISION).reshape( + ntypes, 1 + ) + self.shift_diag = shift_diag + self.constant_matrix = np.zeros(ntypes, dtype=GLOBAL_NP_FLOAT_PRECISION) + super().__init__( + var_name=var_name, + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + rcond=rcond, + tot_ener_zero=tot_ener_zero, + trainable=trainable, + activation_function=activation_function, + precision=precision, + layer_name=layer_name, + use_aparam_as_mask=use_aparam_as_mask, + spin=spin, + mixed_types=mixed_types, + exclude_types=exclude_types, + ) + self.old_impl = False + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return ( + self.embedding_width + if self.fit_diag + else self.embedding_width * self.embedding_width + ) + + def __setitem__(self, key, value): + if key in ["constant_matrix"]: + self.constant_matrix = value + else: + super().__setitem__(key, value) + + def __getitem__(self, key): + if key in ["constant_matrix"]: + return self.constant_matrix + else: + return super().__getitem__(key) + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "polar" + data["@version"] = 2 + data["embedding_width"] = self.embedding_width + data["old_impl"] = self.old_impl + data["fit_diag"] = self.fit_diag + data["shift_diag"] = self.shift_diag + data["@variables"]["scale"] = self.scale + data["@variables"]["constant_matrix"] = self.constant_matrix + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 2, 1) + return super().deserialize(data) + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [3, 3], + reduciable=True, + r_differentiable=False, + c_differentiable=False, + ), + ] + ) + + def call( + self, + descriptor: np.ndarray, + atype: np.ndarray, + gr: Optional[np.ndarray] = None, + g2: Optional[np.ndarray] = None, + h2: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Dict[str, np.ndarray]: + """Calculate the fitting. + + Parameters + ---------- + descriptor + input descriptor. shape: nf x nloc x nd + atype + the atom type. shape: nf x nloc + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + fparam + The frame parameter. shape: nf x nfp. nfp being `numb_fparam` + aparam + The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` + + """ + nframes, nloc, _ = descriptor.shape + assert ( + gr is not None + ), "Must provide the rotation matrix for polarizability fitting." + # (nframes, nloc, _net_out_dim) + out = self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam)[ + self.var_name + ] + out = out * self.scale[atype] + # (nframes * nloc, m1, 3) + gr = gr.reshape(nframes * nloc, -1, 3) + + if self.fit_diag: + out = out.reshape(-1, self.embedding_width) + out = np.einsum("ij,ijk->ijk", out, gr) + else: + out = out.reshape(-1, self.embedding_width, self.embedding_width) + out = (out + np.transpose(out, axes=(0, 2, 1))) / 2 + out = np.einsum("bim,bmj->bij", out, gr) # (nframes * nloc, m1, 3) + out = np.einsum( + "bim,bmj->bij", np.transpose(gr, axes=(0, 2, 1)), out + ) # (nframes * nloc, 3, 3) + out = out.reshape(nframes, nloc, 3, 3) + if self.shift_diag: + bias = self.constant_matrix[atype] + # (nframes, nloc, 1) + bias = np.expand_dims(bias, axis=-1) * self.scale[atype] + eye = np.eye(3) + eye = np.tile(eye, (nframes, nloc, 1, 1)) + # (nframes, nloc, 3, 3) + bias = np.expand_dims(bias, axis=-1) * eye + out = out + bias + return {self.var_name: out} diff --git a/deepmd/train/__init__.py b/deepmd/dpmodel/infer/__init__.py similarity index 100% rename from deepmd/train/__init__.py rename to deepmd/dpmodel/infer/__init__.py diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py new file mode 100644 index 0000000000..22267c895a --- /dev/null +++ b/deepmd/dpmodel/infer/deep_eval.py @@ -0,0 +1,372 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Type, + Union, +) + +import numpy as np + +from deepmd.dpmodel.model.base_model import ( + BaseModel, +) +from deepmd.dpmodel.output_def import ( + ModelOutputDef, + OutputVariableCategory, + OutputVariableDef, +) +from deepmd.dpmodel.utils.batch_size import ( + AutoBatchSize, +) +from deepmd.dpmodel.utils.network import ( + load_dp_model, +) +from deepmd.infer.deep_dipole import ( + DeepDipole, +) +from deepmd.infer.deep_dos import ( + DeepDOS, +) +from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper +from deepmd.infer.deep_eval import ( + DeepEvalBackend, +) +from deepmd.infer.deep_polar import ( + DeepPolar, +) +from deepmd.infer.deep_pot import ( + DeepPot, +) +from deepmd.infer.deep_wfc import ( + DeepWFC, +) + +if TYPE_CHECKING: + import ase.neighborlist + + +class DeepEval(DeepEvalBackend): + """NumPy backend implementaion of DeepEval. + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + output_def : ModelOutputDef + The output definition of the model. + *args : list + Positional arguments. + auto_batch_size : bool or int or AutomaticBatchSize, default: False + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. + """ + + def __init__( + self, + model_file: str, + output_def: ModelOutputDef, + *args: List[Any], + auto_batch_size: Union[bool, int, AutoBatchSize] = True, + neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + **kwargs: Dict[str, Any], + ): + self.output_def = output_def + self.model_path = model_file + + model_data = load_dp_model(model_file) + self.dp = BaseModel.deserialize(model_data["model"]) + self.rcut = self.dp.get_rcut() + self.type_map = self.dp.get_type_map() + if isinstance(auto_batch_size, bool): + if auto_batch_size: + self.auto_batch_size = AutoBatchSize() + else: + self.auto_batch_size = None + elif isinstance(auto_batch_size, int): + self.auto_batch_size = AutoBatchSize(auto_batch_size) + elif isinstance(auto_batch_size, AutoBatchSize): + self.auto_batch_size = auto_batch_size + else: + raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + + def get_rcut(self) -> float: + """Get the cutoff radius of this model.""" + return self.rcut + + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" + return len(self.type_map) + + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" + return self.type_map + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" + return self.dp.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" + return self.dp.get_dim_aparam() + + @property + def model_type(self) -> Type["DeepEvalWrapper"]: + """The the evaluator of the model type.""" + model_output_type = self.dp.model_output_type() + if "energy" in model_output_type: + return DeepPot + elif "dos" in model_output_type: + return DeepDOS + elif "dipole" in model_output_type: + return DeepDipole + elif "polar" in model_output_type: + return DeepPolar + elif "wfc" in model_output_type: + return DeepWFC + else: + raise RuntimeError("Unknown model type") + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.dp.get_sel_type() + + def get_numb_dos(self) -> int: + """Get the number of DOS.""" + return 0 + + def get_has_efield(self): + """Check if the model has efield.""" + return False + + def get_ntypes_spin(self): + """Get the number of spin atom types of this model.""" + return 0 + + def eval( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + atomic: bool = False, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + **kwargs: Dict[str, Any], + ) -> Dict[str, np.ndarray]: + """Evaluate the energy, force and virial by using this DP. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + Calculate the atomic energy and virial + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + **kwargs + Other parameters + + Returns + ------- + output_dict : dict + The output of the evaluation. The keys are the names of the output + variables, and the values are the corresponding output arrays. + """ + if fparam is not None or aparam is not None: + raise NotImplementedError + # convert all of the input to numpy array + atom_types = np.array(atom_types, dtype=np.int32) + coords = np.array(coords) + if cells is not None: + cells = np.array(cells) + natoms, numb_test = self._get_natoms_and_nframes( + coords, atom_types, len(atom_types.shape) > 1 + ) + request_defs = self._get_request_defs(atomic) + out = self._eval_func(self._eval_model, numb_test, natoms)( + coords, cells, atom_types, request_defs + ) + return dict( + zip( + [x.name for x in request_defs], + out, + ) + ) + + def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]: + """Get the requested output definitions. + + When atomic is True, all output_def are requested. + When atomic is False, only energy (tensor), force, and virial + are requested. + + Parameters + ---------- + atomic : bool + Whether to request the atomic output. + + Returns + ------- + list[OutputVariableDef] + The requested output definitions. + """ + if atomic: + return list(self.output_def.var_defs.values()) + else: + return [ + x + for x in self.output_def.var_defs.values() + if x.category + in ( + OutputVariableCategory.REDU, + OutputVariableCategory.DERV_R, + OutputVariableCategory.DERV_C_REDU, + ) + ] + + def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: + """Wrapper method with auto batch size. + + Parameters + ---------- + inner_func : Callable + the method to be wrapped + numb_test : int + number of tests + natoms : int + number of atoms + + Returns + ------- + Callable + the wrapper + """ + if self.auto_batch_size is not None: + + def eval_func(*args, **kwargs): + return self.auto_batch_size.execute_all( + inner_func, numb_test, natoms, *args, **kwargs + ) + + else: + eval_func = inner_func + return eval_func + + def _get_natoms_and_nframes( + self, + coords: np.ndarray, + atom_types: np.ndarray, + mixed_type: bool = False, + ) -> Tuple[int, int]: + if mixed_type: + natoms = len(atom_types[0]) + else: + natoms = len(atom_types) + if natoms == 0: + assert coords.size == 0 + else: + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + return natoms, nframes + + def _eval_model( + self, + coords: np.ndarray, + cells: Optional[np.ndarray], + atom_types: np.ndarray, + request_defs: List[OutputVariableDef], + ): + model = self.dp + + nframes = coords.shape[0] + if len(atom_types.shape) == 1: + natoms = len(atom_types) + atom_types = np.tile(atom_types, nframes).reshape(nframes, -1) + else: + natoms = len(atom_types[0]) + + coord_input = coords.reshape([-1, natoms, 3]) + type_input = atom_types + if cells is not None: + box_input = cells.reshape([-1, 3, 3]) + else: + box_input = None + + do_atomic_virial = any( + x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs + ) + batch_output = model( + coord_input, type_input, box=box_input, do_atomic_virial=do_atomic_virial + ) + if isinstance(batch_output, tuple): + batch_output = batch_output[0] + + results = [] + for odef in request_defs: + # it seems not doing conversion + # dp_name = self._OUTDEF_DP2BACKEND[odef.name] + dp_name = odef.name + if dp_name in batch_output: + shape = self._get_output_shape(odef, nframes, natoms) + if batch_output[dp_name] is not None: + out = batch_output[dp_name].reshape(shape) + else: + out = np.full(shape, np.nan) + results.append(out) + else: + shape = self._get_output_shape(odef, nframes, natoms) + results.append(np.full(np.abs(shape), np.nan)) # this is kinda hacky + return tuple(results) + + def _get_output_shape(self, odef, nframes, natoms): + if odef.category == OutputVariableCategory.DERV_C_REDU: + # virial + return [nframes, *odef.shape[:-1], 9] + elif odef.category == OutputVariableCategory.REDU: + # energy + return [nframes, *odef.shape, 1] + elif odef.category == OutputVariableCategory.DERV_C: + # atom_virial + return [nframes, *odef.shape[:-1], natoms, 9] + elif odef.category == OutputVariableCategory.DERV_R: + # force + return [nframes, *odef.shape[:-1], natoms, 3] + elif odef.category == OutputVariableCategory.OUT: + # atom_energy, atom_tensor + # Something wrong here? + # return [nframes, *shape, natoms, 1] + return [nframes, natoms, *odef.shape, 1] + else: + raise RuntimeError("unknown category") diff --git a/deepmd/dpmodel/model/__init__.py b/deepmd/dpmodel/model/__init__.py new file mode 100644 index 0000000000..c1ff15ab0d --- /dev/null +++ b/deepmd/dpmodel/model/__init__.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""The model that takes the coordinates, cell and atom types as input +and predicts some property. The models are automatically generated from +atomic models by the `deepmd.dpmodel.make_model` method. + +The `make_model` method does the reduction, auto-differentiation +(dummy for dpmodels) and communication of the atomic properties +according to output variable definition +`deepmd.dpmodel.OutputVariableDef`. + +All models should be inherited from :class:`deepmd.dpmodel.model.base_model.BaseModel`. +Models generated by `make_model` have already done it. +""" + +from .dp_model import ( + DPModel, +) +from .make_model import ( + make_model, +) +from .spin_model import ( + SpinModel, +) + +__all__ = [ + "DPModel", + "SpinModel", + "make_model", +] diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py new file mode 100644 index 0000000000..5169d1b5fe --- /dev/null +++ b/deepmd/dpmodel/model/base_model.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import inspect +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Any, + List, + Type, +) + +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_model() -> Type[object]: + class BaseBaseModel(ABC, PluginVariant, make_plugin_registry("model")): + """Base class for final exported model that will be directly used for inference. + + The class defines some abstractmethods that will be directly called by the + inference interface. If the final model class inherits some of those methods + from other classes, `BaseModel` should be inherited as the last class to ensure + the correct method resolution order. + + This class is backend-indepedent. + + See Also + -------- + deepmd.dpmodel.model.base_model.BaseModel + BaseModel class for DPModel backend. + """ + + def __new__(cls, *args, **kwargs): + if inspect.isabstract(cls): + cls = cls.get_class_by_type(kwargs.get("type", "standard")) + return super().__new__(cls) + + @abstractmethod + def __call__(self, *args: Any, **kwds: Any) -> Any: + """Inference method. + + Parameters + ---------- + *args : Any + The input data for inference. + **kwds : Any + The input data for inference. + + Returns + ------- + Any + The output of the inference. + """ + pass + + @abstractmethod + def get_type_map(self) -> List[str]: + """Get the type map.""" + + @abstractmethod + def get_rcut(self): + """Get the cut-off radius.""" + + @abstractmethod + def get_dim_fparam(self): + """Get the number (dimension) of frame parameters of this atomic model.""" + + @abstractmethod + def get_dim_aparam(self): + """Get the number (dimension) of atomic parameters of this atomic model.""" + + @abstractmethod + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + + @abstractmethod + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + + @abstractmethod + def model_output_type(self) -> List[str]: + """Get the output type for the model.""" + + @abstractmethod + def serialize(self) -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + pass + + @classmethod + def deserialize(cls, data: dict) -> "BaseBaseModel": + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + BaseModel + The deserialized model + """ + if inspect.isabstract(cls): + return cls.get_class_by_type(data["type"]).deserialize(data) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + model_def_script: str + + @abstractmethod + def get_model_def_script(self) -> str: + """Get the model definition script.""" + pass + + @abstractmethod + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + # for C++ interface + pass + + @abstractmethod + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + pass + + @classmethod + @abstractmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + cls = cls.get_class_by_type(local_jdata.get("type", "standard")) + return cls.update_sel(global_jdata, local_jdata) + + return BaseBaseModel + + +class BaseModel(make_base_model()): + """Base class for final exported model that will be directly used for inference. + + The class defines some abstractmethods that will be directly called by the + inference interface. If the final model class inherbits some of those methods + from other classes, `BaseModel` should be inherited as the last class to ensure + the correct method resolution order. + + This class is for the DPModel backend. + + See Also + -------- + deepmd.dpmodel.model.base_model.BaseBaseModel + Backend-independent BaseModel class. + """ + + def __init__(self) -> None: + self.model_def_script = "" + + def get_model_def_script(self) -> str: + """Get the model definition script.""" + return self.model_def_script diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py new file mode 100644 index 0000000000..8d84c435b4 --- /dev/null +++ b/deepmd/dpmodel/model/dp_model.py @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from deepmd.dpmodel.atomic_model import ( + DPAtomicModel, +) +from deepmd.dpmodel.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.dpmodel.model.base_model import ( + BaseModel, +) + +from .make_model import ( + make_model, +) + + +# use "class" to resolve "Variable not allowed in type expression" +@BaseModel.register("standard") +class DPModel(make_model(DPAtomicModel)): + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["descriptor"] = BaseDescriptor.update_sel( + global_jdata, local_jdata["descriptor"] + ) + return local_jdata_cpy diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py new file mode 100644 index 0000000000..68889ad331 --- /dev/null +++ b/deepmd/dpmodel/model/make_model.py @@ -0,0 +1,476 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, + Tuple, + Type, +) + +import numpy as np + +from deepmd.dpmodel.atomic_model.base_atomic_model import ( + BaseAtomicModel, +) +from deepmd.dpmodel.common import ( + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, + PRECISION_DICT, + RESERVED_PRECISON_DICT, + NativeOP, +) +from deepmd.dpmodel.model.base_model import ( + BaseModel, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableCategory, + OutputVariableOperation, + check_operation_applied, +) +from deepmd.dpmodel.utils import ( + build_neighbor_list, + extend_coord_with_ghosts, + nlist_distinguish_types, + normalize_coord, +) + +from .transform_output import ( + communicate_extended_output, + fit_output_to_model_output, +) + + +def make_model(T_AtomicModel: Type[BaseAtomicModel]): + """Make a model as a derived class of an atomic model. + + The model provide two interfaces. + + 1. the `call_lower`, that takes extended coordinates, atyps and neighbor list, + and outputs the atomic and property and derivatives (if required) on the extended region. + + 2. the `call`, that takes coordinates, atypes and cell and predicts + the atomic and reduced property, and derivatives (if required) on the local region. + + Parameters + ---------- + T_AtomicModel + The atomic model. + + Returns + ------- + CM + The model. + + """ + + class CM(NativeOP, BaseModel): + def __init__( + self, + *args, + # underscore to prevent conflict with normal inputs + atomic_model_: Optional[T_AtomicModel] = None, + **kwargs, + ): + BaseModel.__init__(self) + if atomic_model_ is not None: + self.atomic_model: T_AtomicModel = atomic_model_ + else: + self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs) + self.precision_dict = PRECISION_DICT + self.reverse_precision_dict = RESERVED_PRECISON_DICT + self.global_np_float_precision = GLOBAL_NP_FLOAT_PRECISION + self.global_ener_float_precision = GLOBAL_ENER_FLOAT_PRECISION + + def model_output_def(self): + """Get the output def for the model.""" + return ModelOutputDef(self.atomic_output_def()) + + def model_output_type(self) -> List[str]: + """Get the output type for the model.""" + output_def = self.model_output_def() + var_defs = output_def.var_defs + vars = [ + kk + for kk, vv in var_defs.items() + if vv.category == OutputVariableCategory.OUT + ] + return vars + + def call( + self, + coord, + atype, + box: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, np.ndarray]: + """Return model prediction. + + Parameters + ---------- + coord + The coordinates of the atoms. + shape: nf x (nloc x 3) + atype + The type of atoms. shape: nf x nloc + box + The simulation box. shape: nf x 9 + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + If calculate the atomic virial. + + Returns + ------- + ret_dict + The result dict of type Dict[str,np.ndarray]. + The keys are defined by the `ModelOutputDef`. + + """ + nframes, nloc = atype.shape[:2] + cc, bb, fp, ap, input_prec = self.input_type_cast( + coord, box=box, fparam=fparam, aparam=aparam + ) + del coord, box, fparam, aparam + if bb is not None: + coord_normalized = normalize_coord( + cc.reshape(nframes, nloc, 3), + bb.reshape(nframes, 3, 3), + ) + else: + coord_normalized = cc.copy() + extended_coord, extended_atype, mapping = extend_coord_with_ghosts( + coord_normalized, atype, bb, self.get_rcut() + ) + nlist = build_neighbor_list( + extended_coord, + extended_atype, + nloc, + self.get_rcut(), + self.get_sel(), + distinguish_types=not self.mixed_types(), + ) + extended_coord = extended_coord.reshape(nframes, -1, 3) + model_predict_lower = self.call_lower( + extended_coord, + extended_atype, + nlist, + mapping, + fparam=fp, + aparam=ap, + do_atomic_virial=do_atomic_virial, + ) + model_predict = communicate_extended_output( + model_predict_lower, + self.model_output_def(), + mapping, + do_atomic_virial=do_atomic_virial, + ) + model_predict = self.output_type_cast(model_predict, input_prec) + return model_predict + + def call_lower( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + do_atomic_virial: bool = False, + ): + """Return model prediction. Lower interface that takes + extended atomic coordinates and types, nlist, and mapping + as input, and returns the predictions on the extended region. + The predictions are not reduced. + + Parameters + ---------- + extended_coord + coodinates in extended region. nf x (nall x 3). + extended_atype + atomic type in extended region. nf x nall. + nlist + neighbor list. nf x nloc x nsel. + mapping + mapps the extended indices to local indices. nf x nall. + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + whether calculate atomic virial + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nall = extended_atype.shape[:2] + extended_coord = extended_coord.reshape(nframes, -1, 3) + nlist = self.format_nlist(extended_coord, extended_atype, nlist) + cc_ext, _, fp, ap, input_prec = self.input_type_cast( + extended_coord, fparam=fparam, aparam=aparam + ) + del extended_coord, fparam, aparam + atomic_ret = self.atomic_model.forward_common_atomic( + cc_ext, + extended_atype, + nlist, + mapping=mapping, + fparam=fp, + aparam=ap, + ) + model_predict = fit_output_to_model_output( + atomic_ret, + self.atomic_output_def(), + cc_ext, + do_atomic_virial=do_atomic_virial, + ) + model_predict = self.output_type_cast(model_predict, input_prec) + return model_predict + + def input_type_cast( + self, + coord: np.ndarray, + box: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> Tuple[ + np.ndarray, + Optional[np.ndarray], + Optional[np.ndarray], + Optional[np.ndarray], + str, + ]: + """Cast the input data to global float type.""" + input_prec = self.reverse_precision_dict[ + self.precision_dict[coord.dtype.name] + ] + ### + ### type checking would not pass jit, convert to coord prec anyway + ### + _lst: List[Optional[np.ndarray]] = [ + vv.astype(coord.dtype) if vv is not None else None + for vv in [box, fparam, aparam] + ] + box, fparam, aparam = _lst + if ( + input_prec + == self.reverse_precision_dict[self.global_np_float_precision] + ): + return coord, box, fparam, aparam, input_prec + else: + pp = self.global_np_float_precision + return ( + coord.astype(pp), + box.astype(pp) if box is not None else None, + fparam.astype(pp) if fparam is not None else None, + aparam.astype(pp) if aparam is not None else None, + input_prec, + ) + + def output_type_cast( + self, + model_ret: Dict[str, np.ndarray], + input_prec: str, + ) -> Dict[str, np.ndarray]: + """Convert the model output to the input prec.""" + do_cast = ( + input_prec + != self.reverse_precision_dict[self.global_np_float_precision] + ) + pp = self.precision_dict[input_prec] + odef = self.model_output_def() + for kk in odef.keys(): + if kk not in model_ret.keys(): + # do not return energy_derv_c if not do_atomic_virial + continue + if check_operation_applied(odef[kk], OutputVariableOperation.REDU): + model_ret[kk] = ( + model_ret[kk].astype(self.global_ener_float_precision) + if model_ret[kk] is not None + else None + ) + elif do_cast: + model_ret[kk] = ( + model_ret[kk].astype(pp) if model_ret[kk] is not None else None + ) + return model_ret + + def format_nlist( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + ): + """Format the neighbor list. + + 1. If the number of neighbors in the `nlist` is equal to sum(self.sel), + it does nothong + + 2. If the number of neighbors in the `nlist` is smaller than sum(self.sel), + the `nlist` is pad with -1. + + 3. If the number of neighbors in the `nlist` is larger than sum(self.sel), + the nearest sum(sel) neighbors will be preseved. + + Known limitations: + + In the case of not self.mixed_types, the nlist is always formatted. + May have side effact on the efficiency. + + Parameters + ---------- + extended_coord + coodinates in extended region. nf x nall x 3 + extended_atype + atomic type in extended region. nf x nall + nlist + neighbor list. nf x nloc x nsel + + Returns + ------- + formated_nlist + the formated nlist. + + """ + n_nf, n_nloc, n_nnei = nlist.shape + mixed_types = self.mixed_types() + ret = self._format_nlist(extended_coord, nlist, sum(self.get_sel())) + if not mixed_types: + ret = nlist_distinguish_types(ret, extended_atype, self.get_sel()) + return ret + + def _format_nlist( + self, + extended_coord: np.ndarray, + nlist: np.ndarray, + nnei: int, + ): + n_nf, n_nloc, n_nnei = nlist.shape + extended_coord = extended_coord.reshape([n_nf, -1, 3]) + nall = extended_coord.shape[1] + rcut = self.get_rcut() + + if n_nnei < nnei: + # make a copy before revise + ret = np.concatenate( + [ + nlist, + -1 * np.ones([n_nf, n_nloc, nnei - n_nnei], dtype=nlist.dtype), + ], + axis=-1, + ) + elif n_nnei > nnei: + # make a copy before revise + m_real_nei = nlist >= 0 + ret = np.where(m_real_nei, nlist, 0) + coord0 = extended_coord[:, :n_nloc, :] + index = ret.reshape(n_nf, n_nloc * n_nnei, 1).repeat(3, axis=2) + coord1 = np.take_along_axis(extended_coord, index, axis=1) + coord1 = coord1.reshape(n_nf, n_nloc, n_nnei, 3) + rr = np.linalg.norm(coord0[:, :, None, :] - coord1, axis=-1) + rr = np.where(m_real_nei, rr, float("inf")) + rr, ret_mapping = np.sort(rr, axis=-1), np.argsort(rr, axis=-1) + ret = np.take_along_axis(ret, ret_mapping, axis=2) + ret = np.where(rr > rcut, -1, ret) + ret = ret[..., :nnei] + else: # n_nnei == nnei: + # copy anyway... + ret = nlist + assert ret.shape[-1] == nnei + return ret + + def do_grad_r( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is r_differentiable. + if var_name is None, returns if any of the variable is r_differentiable. + """ + return self.atomic_model.do_grad_r(var_name) + + def do_grad_c( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is c_differentiable. + if var_name is None, returns if any of the variable is c_differentiable. + """ + return self.atomic_model.do_grad_c(var_name) + + def serialize(self) -> dict: + return self.atomic_model.serialize() + + @classmethod + def deserialize(cls, data) -> "CM": + return cls(atomic_model_=T_AtomicModel.deserialize(data)) + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.atomic_model.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.atomic_model.get_dim_aparam() + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.atomic_model.get_sel_type() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return self.atomic_model.is_aparam_nall() + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.atomic_model.get_rcut() + + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.atomic_model.get_type_map() + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.atomic_model.get_nsel() + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.atomic_model.get_nnei() + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.atomic_model.get_sel() + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.atomic_model.mixed_types() + + def atomic_output_def(self) -> FittingOutputDef: + """Get the output def of the atomic model.""" + return self.atomic_model.atomic_output_def() + + return CM diff --git a/deepmd/dpmodel/model/model.py b/deepmd/dpmodel/model/model.py new file mode 100644 index 0000000000..3fdf5b802b --- /dev/null +++ b/deepmd/dpmodel/model/model.py @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.dpmodel.descriptor.se_e2_a import ( + DescrptSeA, +) +from deepmd.dpmodel.fitting.ener_fitting import ( + EnergyFittingNet, +) +from deepmd.dpmodel.model.dp_model import ( + DPModel, +) +from deepmd.dpmodel.model.spin_model import ( + SpinModel, +) +from deepmd.utils.spin import ( + Spin, +) + + +def get_standard_model(data: dict) -> DPModel: + """Get a standard DPModel from a dictionary. + + Parameters + ---------- + data : dict + The data to construct the model. + """ + descriptor_type = data["descriptor"].pop("type") + fitting_type = data["fitting_net"].pop("type") + if descriptor_type == "se_e2_a": + descriptor = DescrptSeA( + **data["descriptor"], + ) + else: + raise ValueError(f"Unknown descriptor type {descriptor_type}") + if fitting_type == "ener": + fitting = EnergyFittingNet( + ntypes=descriptor.get_ntypes(), + dim_descrpt=descriptor.get_dim_out(), + mixed_types=descriptor.mixed_types(), + **data["fitting_net"], + ) + else: + raise ValueError(f"Unknown fitting type {fitting_type}") + return DPModel( + descriptor=descriptor, + fitting=fitting, + type_map=data["type_map"], + atom_exclude_types=data.get("atom_exclude_types", []), + pair_exclude_types=data.get("pair_exclude_types", []), + ) + + +def get_spin_model(data: dict) -> SpinModel: + """Get a spin model from a dictionary. + + Parameters + ---------- + data : dict + The data to construct the model. + """ + # include virtual spin and placeholder types + data["type_map"] += [item + "_spin" for item in data["type_map"]] + spin = Spin( + use_spin=data["spin"]["use_spin"], + virtual_scale=data["spin"]["virtual_scale"], + ) + pair_exclude_types = spin.get_pair_exclude_types( + exclude_types=data.get("pair_exclude_types", None) + ) + data["pair_exclude_types"] = pair_exclude_types + # for descriptor data stat + data["descriptor"]["exclude_types"] = pair_exclude_types + atom_exclude_types = spin.get_atom_exclude_types( + exclude_types=data.get("atom_exclude_types", None) + ) + data["atom_exclude_types"] = atom_exclude_types + if "env_protection" not in data["descriptor"]: + data["descriptor"]["env_protection"] = 1e-6 + if data["descriptor"]["type"] in ["se_e2_a"]: + # only expand sel for se_e2_a + data["descriptor"]["sel"] += data["descriptor"]["sel"] + backbone_model = get_standard_model(data) + return SpinModel(backbone_model=backbone_model, spin=spin) + + +def get_model(data: dict): + """Get a model from a dictionary. + + Parameters + ---------- + data : dict + The data to construct the model. + """ + if "spin" in data: + return get_spin_model(data) + else: + return get_standard_model(data) diff --git a/deepmd/dpmodel/model/spin_model.py b/deepmd/dpmodel/model/spin_model.py new file mode 100644 index 0000000000..5b31b64fdf --- /dev/null +++ b/deepmd/dpmodel/model/spin_model.py @@ -0,0 +1,394 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel.model.dp_model import ( + DPModel, +) +from deepmd.utils.spin import ( + Spin, +) + + +class SpinModel: + """A spin model wrapper, with spin input preprocess and output split.""" + + def __init__( + self, + backbone_model, + spin: Spin, + ): + super().__init__() + self.backbone_model = backbone_model + self.spin = spin + self.ntypes_real = self.spin.ntypes_real + self.virtual_scale_mask = self.spin.get_virtual_scale_mask() + self.spin_mask = self.spin.get_spin_mask() + + def process_spin_input(self, coord, atype, spin): + """Generate virtual coordinates and types, concat into the input.""" + nframes, nloc = coord.shape[:-1] + atype_spin = np.concatenate([atype, atype + self.ntypes_real], axis=-1) + virtual_coord = coord + spin * self.virtual_scale_mask[atype].reshape( + [nframes, nloc, 1] + ) + coord_spin = np.concatenate([coord, virtual_coord], axis=-2) + return coord_spin, atype_spin + + def process_spin_input_lower( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + extended_spin: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + ): + """ + Add `extended_spin` into `extended_coord` to generate virtual atoms, and extend `nlist` and `mapping`. + Note that the final `extended_coord_updated` with shape [nframes, nall + nall, 3] has the following order: + - [:, :nloc]: original nloc real atoms. + - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms. + - [:, nloc + nloc: nloc + nall]: ghost real atoms. + - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms. + """ + nframes, nall = extended_coord.shape[:2] + nloc = nlist.shape[1] + virtual_extended_coord = ( + extended_coord + + extended_spin + * self.virtual_scale_mask[extended_atype].reshape([nframes, nall, 1]) + ) + virtual_extended_atype = extended_atype + self.ntypes_real + extended_coord_updated = self.concat_switch_virtual( + extended_coord, virtual_extended_coord, nloc + ) + extended_atype_updated = self.concat_switch_virtual( + extended_atype, virtual_extended_atype, nloc + ) + if mapping is not None: + virtual_mapping = mapping + nloc + mapping_updated = self.concat_switch_virtual(mapping, virtual_mapping, nloc) + else: + mapping_updated = None + # extend the nlist + nlist_updated = self.extend_nlist(extended_atype, nlist) + return ( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping_updated, + ) + + def process_spin_output( + self, atype, out_tensor, add_mag: bool = True, virtual_scale: bool = True + ): + """Split the output both real and virtual atoms, and scale the latter.""" + nframes, nloc_double = out_tensor.shape[:2] + nloc = nloc_double // 2 + if virtual_scale: + virtual_scale_mask = self.virtual_scale_mask + else: + virtual_scale_mask = self.spin_mask + atomic_mask = virtual_scale_mask[atype].reshape([nframes, nloc, 1]) + out_real, out_mag = np.split(out_tensor, [nloc], axis=1) + if add_mag: + out_real = out_real + out_mag + out_mag = (out_mag.reshape([nframes, nloc, -1]) * atomic_mask).reshape( + out_mag.shape + ) + return out_real, out_mag, atomic_mask > 0.0 + + def process_spin_output_lower( + self, + extended_atype, + extended_out_tensor, + nloc: int, + add_mag: bool = True, + virtual_scale: bool = True, + ): + """Split the extended output of both real and virtual atoms with switch, and scale the latter.""" + nframes, nall_double = extended_out_tensor.shape[:2] + nall = nall_double // 2 + if virtual_scale: + virtual_scale_mask = self.virtual_scale_mask + else: + virtual_scale_mask = self.spin_mask + atomic_mask = virtual_scale_mask[extended_atype].reshape([nframes, nall, 1]) + extended_out_real = np.concatenate( + [ + extended_out_tensor[:, :nloc], + extended_out_tensor[:, nloc + nloc : nloc + nall], + ], + axis=1, + ) + extended_out_mag = np.concatenate( + [ + extended_out_tensor[:, nloc : nloc + nloc], + extended_out_tensor[:, nloc + nall :], + ], + axis=1, + ) + if add_mag: + extended_out_real = extended_out_real + extended_out_mag + extended_out_mag = ( + extended_out_mag.reshape([nframes, nall, -1]) * atomic_mask + ).reshape(extended_out_mag.shape) + return extended_out_real, extended_out_mag, atomic_mask > 0.0 + + @staticmethod + def extend_nlist(extended_atype, nlist): + nframes, nloc, nnei = nlist.shape + nall = extended_atype.shape[1] + nlist_mask = nlist != -1 + nlist[nlist == -1] = 0 + nlist_shift = nlist + nall + nlist[~nlist_mask] = -1 + nlist_shift[~nlist_mask] = -1 + self_spin = np.arange(0, nloc, dtype=nlist.dtype) + nall + self_spin = self_spin.reshape(1, -1, 1).repeat(nframes, axis=0) + # self spin + real neighbor + virtual neighbor + # nf x nloc x (1 + nnei + nnei) + extended_nlist = np.concatenate([self_spin, nlist, nlist_shift], axis=-1) + # nf x (nloc + nloc) x (1 + nnei + nnei) + extended_nlist = np.concatenate( + [extended_nlist, -1 * np.ones_like(extended_nlist)], axis=-2 + ) + # update the index for switch + first_part_index = (nloc <= extended_nlist) & (extended_nlist < nall) + second_part_index = (nall <= extended_nlist) & (extended_nlist < (nall + nloc)) + extended_nlist[first_part_index] += nloc + extended_nlist[second_part_index] -= nall - nloc + return extended_nlist + + @staticmethod + def concat_switch_virtual(extended_tensor, extended_tensor_virtual, nloc: int): + nframes, nall = extended_tensor.shape[:2] + out_shape = list(extended_tensor.shape) + out_shape[1] *= 2 + extended_tensor_updated = np.zeros( + out_shape, + dtype=extended_tensor.dtype, + ) + extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc] + extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[ + :, :nloc + ] + extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[ + :, nloc: + ] + extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:] + return extended_tensor_updated.reshape(out_shape) + + def get_type_map(self) -> List[str]: + """Get the type map.""" + tmap = self.backbone_model.get_type_map() + ntypes = len(tmap) // 2 # ignore the virtual type + return tmap[:ntypes] + + def get_rcut(self): + """Get the cut-off radius.""" + return self.backbone_model.get_rcut() + + def get_dim_fparam(self): + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.backbone_model.get_dim_fparam() + + def get_dim_aparam(self): + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.backbone_model.get_dim_aparam() + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.backbone_model.get_sel_type() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + If False, the shape is (nframes, nloc, ndim). + """ + return self.backbone_model.is_aparam_nall() + + def model_output_type(self) -> List[str]: + """Get the output type for the model.""" + return self.backbone_model.model_output_type() + + def get_model_def_script(self) -> str: + """Get the model definition script.""" + return self.backbone_model.get_model_def_script() + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + # for C++ interface + if not self.backbone_model.mixed_types(): + return self.backbone_model.get_nnei() // 2 # ignore the virtual selected + else: + return self.backbone_model.get_nnei() + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + if not self.backbone_model.mixed_types(): + return self.backbone_model.get_nsel() // 2 # ignore the virtual selected + else: + return self.backbone_model.get_nsel() + + @staticmethod + def has_spin() -> bool: + """Returns whether it has spin input and output.""" + return True + + def __getattr__(self, name): + """Get attribute from the wrapped model.""" + if name in self.__dict__: + return self.__dict__[name] + else: + return getattr(self.backbone_model, name) + + def serialize(self) -> dict: + return { + "backbone_model": self.backbone_model.serialize(), + "spin": self.spin.serialize(), + } + + @classmethod + def deserialize(cls, data) -> "SpinModel": + backbone_model_obj = DPModel.deserialize(data["backbone_model"]) + spin = Spin.deserialize(data["spin"]) + return cls( + backbone_model=backbone_model_obj, + spin=spin, + ) + + def call( + self, + coord, + atype, + spin, + box: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, np.ndarray]: + """Return model prediction. + + Parameters + ---------- + coord + The coordinates of the atoms. + shape: nf x (nloc x 3) + atype + The type of atoms. shape: nf x nloc + spin + The spins of the atoms. + shape: nf x (nloc x 3) + box + The simulation box. shape: nf x 9 + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + If calculate the atomic virial. + + Returns + ------- + ret_dict + The result dict of type Dict[str,np.ndarray]. + The keys are defined by the `ModelOutputDef`. + + """ + nframes, nloc = coord.shape[:2] + coord_updated, atype_updated = self.process_spin_input(coord, atype, spin) + model_predict = self.backbone_model.call( + coord_updated, + atype_updated, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_output_type = self.backbone_model.model_output_type() + if "mask" in model_output_type: + model_output_type.pop(model_output_type.index("mask")) + var_name = model_output_type[0] + model_predict[f"{var_name}"] = np.split( + model_predict[f"{var_name}"], [nloc], axis=1 + )[0] + # for now omit the grad output + return model_predict + + def call_lower( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + extended_spin: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + do_atomic_virial: bool = False, + ): + """Return model prediction. Lower interface that takes + extended atomic coordinates, types and spins, nlist, and mapping + as input, and returns the predictions on the extended region. + The predictions are not reduced. + + Parameters + ---------- + extended_coord + coordinates in extended region. nf x (nall x 3). + extended_atype + atomic type in extended region. nf x nall. + extended_spin + spins in extended region. nf x (nall x 3). + nlist + neighbor list. nf x nloc x nsel. + mapping + maps the extended indices to local indices. nf x nall. + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + whether calculate atomic virial + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nloc = nlist.shape[:2] + ( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping_updated, + ) = self.process_spin_input_lower( + extended_coord, extended_atype, extended_spin, nlist, mapping=mapping + ) + model_predict = self.backbone_model.call_lower( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping=mapping_updated, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_output_type = self.backbone_model.model_output_type() + if "mask" in model_output_type: + model_output_type.pop(model_output_type.index("mask")) + var_name = model_output_type[0] + model_predict[f"{var_name}"] = np.split( + model_predict[f"{var_name}"], [nloc], axis=1 + )[0] + # for now omit the grad output + return model_predict diff --git a/deepmd/dpmodel/model/transform_output.py b/deepmd/dpmodel/model/transform_output.py new file mode 100644 index 0000000000..c87c79f7d4 --- /dev/null +++ b/deepmd/dpmodel/model/transform_output.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + GLOBAL_ENER_FLOAT_PRECISION, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, + get_deriv_name, + get_reduce_name, +) + + +def fit_output_to_model_output( + fit_ret: Dict[str, np.ndarray], + fit_output_def: FittingOutputDef, + coord_ext: np.ndarray, + do_atomic_virial: bool = False, +) -> Dict[str, np.ndarray]: + """Transform the output of the fitting network to + the model output. + + """ + model_ret = dict(fit_ret.items()) + for kk, vv in fit_ret.items(): + vdef = fit_output_def[kk] + shap = vdef.shape + atom_axis = -(len(shap) + 1) + if vdef.reduciable: + kk_redu = get_reduce_name(kk) + # cast to energy prec brefore reduction + model_ret[kk_redu] = np.sum( + vv.astype(GLOBAL_ENER_FLOAT_PRECISION), axis=atom_axis + ) + if vdef.r_differentiable: + kk_derv_r, kk_derv_c = get_deriv_name(kk) + # name-holders + model_ret[kk_derv_r] = None + if vdef.c_differentiable: + assert vdef.r_differentiable + kk_derv_r, kk_derv_c = get_deriv_name(kk) + model_ret[kk_derv_c] = None + return model_ret + + +def communicate_extended_output( + model_ret: Dict[str, np.ndarray], + model_output_def: ModelOutputDef, + mapping: np.ndarray, # nf x nloc + do_atomic_virial: bool = False, +) -> Dict[str, np.ndarray]: + """Transform the output of the model network defined on + local and ghost (extended) atoms to local atoms. + + """ + new_ret = {} + for kk in model_output_def.keys_outp(): + vv = model_ret[kk] + vdef = model_output_def[kk] + new_ret[kk] = vv + if vdef.reduciable: + kk_redu = get_reduce_name(kk) + new_ret[kk_redu] = model_ret[kk_redu] + if vdef.r_differentiable: + kk_derv_r, kk_derv_c = get_deriv_name(kk) + # name holders + new_ret[kk_derv_r] = None + if vdef.c_differentiable: + assert vdef.r_differentiable + kk_derv_r, kk_derv_c = get_deriv_name(kk) + new_ret[kk_derv_c] = None + new_ret[kk_derv_c + "_redu"] = None + if not do_atomic_virial: + # pop atomic virial, because it is not correctly calculated. + new_ret.pop(kk_derv_c) + return new_ret diff --git a/deepmd/dpmodel/output_def.py b/deepmd/dpmodel/output_def.py new file mode 100644 index 0000000000..cbebb4908a --- /dev/null +++ b/deepmd/dpmodel/output_def.py @@ -0,0 +1,501 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +from enum import ( + IntEnum, +) +from typing import ( + Dict, + List, + Tuple, +) + + +def check_shape( + shape: List[int], + def_shape: List[int], +): + """Check if the shape satisfies the defined shape.""" + assert len(shape) == len(def_shape) + if def_shape[-1] == -1: + if list(shape[:-1]) != def_shape[:-1]: + raise ValueError(f"{shape[:-1]} shape not matching def {def_shape[:-1]}") + else: + if list(shape) != def_shape: + raise ValueError(f"{shape} shape not matching def {def_shape}") + + +def check_var(var, var_def): + if var_def.atomic: + # var.shape == [nf, nloc, *var_def.shape] + if len(var.shape) != len(var_def.shape) + 2: + raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}") + check_shape(list(var.shape[2:]), var_def.shape) + else: + # var.shape == [nf, *var_def.shape] + if len(var.shape) != len(var_def.shape) + 1: + raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}") + check_shape(list(var.shape[1:]), var_def.shape) + + +def model_check_output(cls): + """Check if the output of the Model is consistent with the definition. + + Two methods are assumed to be provided by the Model: + 1. Model.output_def that gives the output definition. + 2. Model.__call__ that defines the forward path of the model. + + """ + + @functools.wraps(cls, updated=()) + class wrapper(cls): + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.md = self.output_def() + + def __call__( + self, + *args, + **kwargs, + ): + ret = cls.__call__(self, *args, **kwargs) + for kk in self.md.keys_outp(): + dd = self.md[kk] + check_var(ret[kk], dd) + if dd.reduciable: + rk = get_reduce_name(kk) + check_var(ret[rk], self.md[rk]) + if dd.r_differentiable: + dnr, dnc = get_deriv_name(kk) + check_var(ret[dnr], self.md[dnr]) + if dd.c_differentiable: + assert dd.r_differentiable + check_var(ret[dnc], self.md[dnc]) + return ret + + return wrapper + + +def fitting_check_output(cls): + """Check if the output of the Fitting is consistent with the definition. + + Two methods are assumed to be provided by the Fitting: + 1. Fitting.output_def that gives the output definition. + 2. Fitting.__call__ defines the forward path of the fitting. + + """ + + @functools.wraps(cls, updated=()) + class wrapper(cls): + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.md = self.output_def() + + def __call__( + self, + *args, + **kwargs, + ): + ret = cls.__call__(self, *args, **kwargs) + for kk in self.md.keys(): + dd = self.md[kk] + check_var(ret[kk], dd) + return ret + + return wrapper + + +class OutputVariableOperation(IntEnum): + """Defines the operation of the output variable.""" + + _NONE = 0 + """No operation.""" + REDU = 1 + """Reduce the output variable.""" + DERV_R = 2 + """Derivative w.r.t. coordinates.""" + DERV_C = 4 + """Derivative w.r.t. cell.""" + _SEC_DERV_R = 8 + """Second derivative w.r.t. coordinates.""" + MAG = 16 + """Magnetic output.""" + + +class OutputVariableCategory(IntEnum): + """Defines the category of the output variable.""" + + OUT = OutputVariableOperation._NONE + """Output variable. (e.g. atom energy)""" + REDU = OutputVariableOperation.REDU + """Reduced output variable. (e.g. system energy)""" + DERV_R = OutputVariableOperation.DERV_R + """Negative derivative w.r.t. coordinates. (e.g. force)""" + DERV_C = OutputVariableOperation.DERV_C + """Atomic component of the virial, see PRB 104, 224202 (2021) """ + DERV_C_REDU = OutputVariableOperation.DERV_C | OutputVariableOperation.REDU + """Virial, the transposed negative gradient with cell tensor times cell tensor, see eq 40 JCP 159, 054801 (2023). """ + DERV_R_DERV_R = OutputVariableOperation.DERV_R | OutputVariableOperation._SEC_DERV_R + """Hession matrix, the second derivative w.r.t. coordinates.""" + DERV_R_MAG = OutputVariableOperation.DERV_R | OutputVariableOperation.MAG + """Magnetic part of negative derivative w.r.t. coordinates. (e.g. magnetic force)""" + DERV_C_MAG = OutputVariableOperation.DERV_C | OutputVariableOperation.MAG + """Magnetic part of atomic component of the virial.""" + + +class OutputVariableDef: + """Defines the shape and other properties of the one output variable. + + It is assume that the fitting network output variables for each + local atom. This class defines one output variable, including its + name, shape, reducibility and differentiability. + + Parameters + ---------- + name + Name of the output variable. Notice that the xxxx_redu, + xxxx_derv_c, xxxx_derv_r are reserved names that should + not be used to define variables. + shape + The shape of the variable. e.g. energy should be [1], + dipole should be [3], polarizabilty should be [3,3]. + reduciable + If the variable is reduced. + r_differentiable + If the variable is differentiated with respect to coordinates + of atoms. Only reduciable variable are differentiable. + Negative derivative w.r.t. coordinates will be calcualted. (e.g. force) + c_differentiable + If the variable is differentiated with respect to the + cell tensor (pbc case). Only reduciable variable + are differentiable. + Virial, the transposed negative gradient with cell tensor times + cell tensor, will be calculated, see eq 40 JCP 159, 054801 (2023). + atomic : bool + If the variable is defined for each atom. + category : int + The category of the output variable. + r_hessian : bool + If hessian is requred + magnetic : bool + If the derivatives of variable have magnetic parts. + """ + + def __init__( + self, + name: str, + shape: List[int], + reduciable: bool = False, + r_differentiable: bool = False, + c_differentiable: bool = False, + atomic: bool = True, + category: int = OutputVariableCategory.OUT.value, + r_hessian: bool = False, + magnetic: bool = False, + ): + self.name = name + self.shape = list(shape) + # jit doesn't support math.prod(self.shape) + self.output_size = 1 + len_shape = len(self.shape) + for i in range(len_shape): + self.output_size *= self.shape[i] + self.atomic = atomic + self.reduciable = reduciable + self.r_differentiable = r_differentiable + self.c_differentiable = c_differentiable + if self.c_differentiable and not self.r_differentiable: + raise ValueError("c differentiable requires r_differentiable") + if self.reduciable and not self.atomic: + raise ValueError("a reduciable variable should be atomic") + self.category = category + self.r_hessian = r_hessian + self.magnetic = magnetic + if self.r_hessian: + if not self.reduciable: + raise ValueError("only reduciable variable can calculate hessian") + if not self.r_differentiable: + raise ValueError("only r_differentiable variable can calculate hessian") + + +class FittingOutputDef: + """Defines the shapes and other properties of the fitting network outputs. + + It is assume that the fitting network output variables for each + local atom. This class defines all the outputs. + + Parameters + ---------- + var_defs + List of output variable definitions. + + """ + + def __init__( + self, + var_defs: List[OutputVariableDef], + ): + self.var_defs = {vv.name: vv for vv in var_defs} + + def __getitem__( + self, + key: str, + ) -> OutputVariableDef: + return self.var_defs[key] + + def get_data(self) -> Dict[str, OutputVariableDef]: + return self.var_defs + + def keys(self): + return self.var_defs.keys() + + +class ModelOutputDef: + """Defines the shapes and other properties of the model outputs. + + The model reduce and differentiate fitting outputs if applicable. + If a variable is named by foo, then the reduced variable is called + foo_redu, the derivative w.r.t. coordinates is called foo_derv_r + and the derivative w.r.t. cell is called foo_derv_c. + + Parameters + ---------- + fit_defs + Definition for the fitting net output + + """ + + def __init__( + self, + fit_defs: FittingOutputDef, + ): + self.def_outp = fit_defs + self.def_redu = do_reduce(self.def_outp.get_data()) + self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp.get_data()) + self.def_hess_r, _ = do_derivative(self.def_derv_r) + self.def_derv_c_redu = do_reduce(self.def_derv_c) + self.def_mask = do_mask(self.def_outp.get_data()) + self.var_defs: Dict[str, OutputVariableDef] = {} + for ii in [ + self.def_outp.get_data(), + self.def_redu, + self.def_derv_c, + self.def_derv_r, + self.def_derv_c_redu, + self.def_hess_r, + self.def_mask, + ]: + self.var_defs.update(ii) + + def __getitem__( + self, + key: str, + ) -> OutputVariableDef: + return self.var_defs[key] + + def get_data( + self, + key: str, + ) -> Dict[str, OutputVariableDef]: + return self.var_defs + + def keys(self): + return self.var_defs.keys() + + def keys_outp(self): + return self.def_outp.keys() + + def keys_redu(self): + return self.def_redu.keys() + + def keys_derv_r(self): + return self.def_derv_r.keys() + + def keys_hess_r(self): + return self.def_hess_r.keys() + + def keys_derv_c(self): + return self.def_derv_c.keys() + + def keys_derv_c_redu(self): + return self.def_derv_c_redu.keys() + + +def get_reduce_name(name: str) -> str: + return name + "_redu" + + +def get_deriv_name(name: str) -> Tuple[str, str]: + return name + "_derv_r", name + "_derv_c" + + +def get_deriv_name_mag(name: str) -> Tuple[str, str]: + return name + "_derv_r_mag", name + "_derv_c_mag" + + +def get_hessian_name(name: str) -> str: + return name + "_derv_r_derv_r" + + +def apply_operation(var_def: OutputVariableDef, op: OutputVariableOperation) -> int: + """Apply an operation to the category of a variable definition. + + Parameters + ---------- + var_def : OutputVariableDef + The variable definition. + op : OutputVariableOperation + The operation to be applied. + + Returns + ------- + int + The new category of the variable definition. + + Raises + ------ + ValueError + If the operation has been applied to the variable definition, + and exceed the maximum limitation. + """ + if op == OutputVariableOperation.REDU or op == OutputVariableOperation.DERV_C: + if check_operation_applied(var_def, op): + raise ValueError(f"operation {op} has been applied") + elif op == OutputVariableOperation.DERV_R: + if check_operation_applied(var_def, OutputVariableOperation.DERV_R): + op = OutputVariableOperation._SEC_DERV_R + if check_operation_applied(var_def, OutputVariableOperation._SEC_DERV_R): + raise ValueError(f"operation {op} has been applied twice") + else: + raise ValueError(f"operation {op} not supported") + return var_def.category | op.value + + +def check_operation_applied( + var_def: OutputVariableDef, op: OutputVariableOperation +) -> bool: + """Check if a operation has been applied to a variable definition. + + Parameters + ---------- + var_def : OutputVariableDef + The variable definition. + op : OutputVariableOperation + The operation to be checked. + + Returns + ------- + bool + True if the operation has been applied, False otherwise. + """ + return var_def.category & op.value == op.value + + +def do_reduce( + def_outp_data: Dict[str, OutputVariableDef], +) -> Dict[str, OutputVariableDef]: + def_redu: Dict[str, OutputVariableDef] = {} + for kk, vv in def_outp_data.items(): + if vv.reduciable: + rk = get_reduce_name(kk) + def_redu[rk] = OutputVariableDef( + rk, + vv.shape, + reduciable=False, + r_differentiable=False, + c_differentiable=False, + atomic=False, + category=apply_operation(vv, OutputVariableOperation.REDU), + ) + return def_redu + + +def do_mask( + def_outp_data: Dict[str, OutputVariableDef], +) -> Dict[str, OutputVariableDef]: + def_mask: Dict[str, OutputVariableDef] = {} + # for deep eval when has atomic mask + def_mask["mask"] = OutputVariableDef( + name="mask", + shape=[1], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ) + for kk, vv in def_outp_data.items(): + if vv.magnetic: + # for deep eval when has atomic mask for magnetic atoms + def_mask["mask_mag"] = OutputVariableDef( + name="mask_mag", + shape=[1], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ) + return def_mask + + +def do_derivative( + def_outp_data: Dict[str, OutputVariableDef], +) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]: + def_derv_r: Dict[str, OutputVariableDef] = {} + def_derv_c: Dict[str, OutputVariableDef] = {} + for kk, vv in def_outp_data.items(): + rkr, rkc = get_deriv_name(kk) + rkrm, rkcm = get_deriv_name_mag(kk) + if vv.r_differentiable: + def_derv_r[rkr] = OutputVariableDef( + rkr, + vv.shape + [3], # noqa: RUF005 + reduciable=False, + r_differentiable=( + vv.r_hessian and vv.category == OutputVariableCategory.OUT.value + ), + c_differentiable=False, + atomic=True, + category=apply_operation(vv, OutputVariableOperation.DERV_R), + ) + if vv.magnetic: + def_derv_r[rkrm] = OutputVariableDef( + rkrm, + vv.shape + [3], # noqa: RUF005 + reduciable=False, + r_differentiable=( + vv.r_hessian and vv.category == OutputVariableCategory.OUT.value + ), + c_differentiable=False, + atomic=True, + category=apply_operation(vv, OutputVariableOperation.DERV_R), + magnetic=True, + ) + + if vv.c_differentiable: + assert vv.r_differentiable + def_derv_c[rkc] = OutputVariableDef( + rkc, + vv.shape + [9], # noqa: RUF005 + reduciable=True, + r_differentiable=False, + c_differentiable=False, + atomic=True, + category=apply_operation(vv, OutputVariableOperation.DERV_C), + ) + if vv.magnetic: + def_derv_r[rkcm] = OutputVariableDef( + rkcm, + vv.shape + [9], # noqa: RUF005 + reduciable=True, + r_differentiable=False, + c_differentiable=False, + atomic=True, + category=apply_operation(vv, OutputVariableOperation.DERV_C), + magnetic=True, + ) + return def_derv_r, def_derv_c diff --git a/deepmd_utils/model_format/__init__.py b/deepmd/dpmodel/utils/__init__.py similarity index 52% rename from deepmd_utils/model_format/__init__.py rename to deepmd/dpmodel/utils/__init__.py index 253bca3507..60a4486d52 100644 --- a/deepmd_utils/model_format/__init__.py +++ b/deepmd/dpmodel/utils/__init__.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from .common import ( - DEFAULT_PRECISION, - PRECISION_DICT, - NativeOP, -) from .env_mat import ( EnvMat, ) +from .exclude_mask import ( + AtomExcludeMask, + PairExcludeMask, +) from .network import ( EmbeddingNet, FittingNet, @@ -20,21 +19,21 @@ save_dp_model, traverse_model_dict, ) -from .output_def import ( - FittingOutputDef, - ModelOutputDef, - OutputVariableDef, - fitting_check_output, - get_deriv_name, - get_reduce_name, - model_check_output, +from .nlist import ( + build_multiple_neighbor_list, + build_neighbor_list, + extend_coord_with_ghosts, + get_multiple_nlist_key, + nlist_distinguish_types, ) -from .se_e2_a import ( - DescrptSeA, +from .region import ( + inter2phys, + normalize_coord, + phys2inter, + to_face_distance, ) __all__ = [ - "DescrptSeA", "EnvMat", "make_multilayer_network", "make_embedding_network", @@ -44,17 +43,20 @@ "NativeLayer", "NativeNet", "NetworkCollection", - "NativeOP", "load_dp_model", "save_dp_model", "traverse_model_dict", "PRECISION_DICT", "DEFAULT_PRECISION", - "ModelOutputDef", - "FittingOutputDef", - "OutputVariableDef", - "model_check_output", - "fitting_check_output", - "get_reduce_name", - "get_deriv_name", + "build_neighbor_list", + "nlist_distinguish_types", + "get_multiple_nlist_key", + "build_multiple_neighbor_list", + "extend_coord_with_ghosts", + "normalize_coord", + "inter2phys", + "phys2inter", + "to_face_distance", + "AtomExcludeMask", + "PairExcludeMask", ] diff --git a/deepmd/dpmodel/utils/batch_size.py b/deepmd/dpmodel/utils/batch_size.py new file mode 100644 index 0000000000..ec9503f3b1 --- /dev/null +++ b/deepmd/dpmodel/utils/batch_size.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase + + +class AutoBatchSize(AutoBatchSizeBase): + """Automatic batch size for NumPy.""" + + def is_gpu_available(self) -> bool: + """Check if GPU is available. + + Returns + ------- + bool + True if GPU is available + """ + return False + + def is_oom_error(self, e: Exception) -> bool: + """Check if the exception is an OOM error. + + Parameters + ---------- + e : Exception + Exception + """ + # NumPy never export numpy.core._exceptions.MemoryError + return False diff --git a/deepmd_utils/model_format/env_mat.py b/deepmd/dpmodel/utils/env_mat.py similarity index 66% rename from deepmd_utils/model_format/env_mat.py rename to deepmd/dpmodel/utils/env_mat.py index 7822bd7d0c..0c2ca43c40 100644 --- a/deepmd_utils/model_format/env_mat.py +++ b/deepmd/dpmodel/utils/env_mat.py @@ -6,7 +6,7 @@ import numpy as np -from .common import ( +from deepmd.dpmodel import ( NativeOP, ) @@ -17,6 +17,8 @@ def compute_smooth_weight( rmax: float, ): """Compute smooth weight for descriptor elements.""" + if rmin >= rmax: + raise ValueError("rmin should be less than rmax.") min_mask = distance <= rmin max_mask = distance >= rmax mid_mask = np.logical_not(np.logical_or(min_mask, max_mask)) @@ -30,6 +32,8 @@ def _make_env_mat( coord, rcut: float, ruct_smth: float, + radial_only: bool = False, + protection: float = 0.0, ): """Make smooth environment matrix.""" nf, nloc, nnei = nlist.shape @@ -50,11 +54,15 @@ def _make_env_mat( length = np.linalg.norm(diff, axis=-1, keepdims=True) # for index 0 nloc atom length = length + ~np.expand_dims(mask, -1) - t0 = 1 / length - t1 = diff / length**2 + t0 = 1 / (length + protection) + t1 = diff / (length + protection) ** 2 weight = compute_smooth_weight(length, ruct_smth, rcut) - env_mat_se_a = np.concatenate([t0, t1], axis=-1) * weight * np.expand_dims(mask, -1) - return env_mat_se_a, diff * np.expand_dims(mask, -1), weight + weight = weight * np.expand_dims(mask, -1) + if radial_only: + env_mat = t0 * weight + else: + env_mat = np.concatenate([t0, t1], axis=-1) * weight + return env_mat, diff * np.expand_dims(mask, -1), weight class EnvMat(NativeOP): @@ -62,9 +70,11 @@ def __init__( self, rcut, rcut_smth, + protection: float = 0.0, ): self.rcut = rcut self.rcut_smth = rcut_smth + self.protection = protection def call( self, @@ -73,6 +83,7 @@ def call( nlist: np.ndarray, davg: Optional[np.ndarray] = None, dstd: Optional[np.ndarray] = None, + radial_only: bool = False, ) -> Union[np.ndarray, np.ndarray]: """Compute the environment matrix. @@ -85,18 +96,23 @@ def call( atype_ext The extended aotm types. shape: nf x nall davg - The data avg. shape: nt x nnei x 4 + The data avg. shape: nt x nnei x (4 or 1) dstd - The inverse of data std. shape: nt x nnei x 4 + The inverse of data std. shape: nt x nnei x (4 or 1) + radial_only + Whether to only compute radial part of the environment matrix. + If True, the output will be of shape nf x nloc x nnei x 1. + Otherwise, the output will be of shape nf x nloc x nnei x 4. + Default: False. Returns ------- env_mat - The environment matrix. shape: nf x nloc x nnei x 4 + The environment matrix. shape: nf x nloc x nnei x (4 or 1) switch The value of switch function. shape: nf x nloc x nnei """ - em, sw = self._call(nlist, coord_ext) + em, sw = self._call(nlist, coord_ext, radial_only) nf, nloc, nnei = nlist.shape atype = atype_ext[:, :nloc] if davg is not None: @@ -105,12 +121,15 @@ def call( em /= dstd[atype] return em, sw - def _call( - self, - nlist, - coord_ext, - ): - em, diff, ww = _make_env_mat(nlist, coord_ext, self.rcut, self.rcut_smth) + def _call(self, nlist, coord_ext, radial_only): + em, diff, ww = _make_env_mat( + nlist, + coord_ext, + self.rcut, + self.rcut_smth, + radial_only=radial_only, + protection=self.protection, + ) return em, ww def serialize( diff --git a/deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/utils/exclude_mask.py new file mode 100644 index 0000000000..ff668b8153 --- /dev/null +++ b/deepmd/dpmodel/utils/exclude_mask.py @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Tuple, +) + +import numpy as np + + +class AtomExcludeMask: + """Computes the type exclusion mask for atoms.""" + + def __init__( + self, + ntypes: int, + exclude_types: List[int] = [], + ): + self.ntypes = ntypes + self.exclude_types = exclude_types + self.type_mask = np.array( + [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)], + dtype=np.int32, + ) + # (ntypes) + self.type_mask = self.type_mask.reshape([-1]) + + def get_exclude_types(self): + return self.exclude_types + + def get_type_mask(self): + return self.type_mask + + def build_type_exclude_mask( + self, + atype: np.ndarray, + ): + """Compute type exclusion mask for atoms. + + Parameters + ---------- + atype + The extended aotm types. shape: nf x natom + + Returns + ------- + mask + The type exclusion mask for atoms. shape: nf x natom + Element [ff,ii] being 0 if type(ii) is excluded, + otherwise being 1. + + """ + nf, natom = atype.shape + return self.type_mask[atype].reshape(nf, natom) + + +class PairExcludeMask: + """Computes the type exclusion mask for atom pairs.""" + + def __init__( + self, + ntypes: int, + exclude_types: List[Tuple[int, int]] = [], + ): + self.ntypes = ntypes + self.exclude_types = set() + for tt in exclude_types: + assert len(tt) == 2 + self.exclude_types.add((tt[0], tt[1])) + self.exclude_types.add((tt[1], tt[0])) + # ntypes + 1 for nlist masks + self.type_mask = np.array( + [ + [ + 1 if (tt_i, tt_j) not in self.exclude_types else 0 + for tt_i in range(ntypes + 1) + ] + for tt_j in range(ntypes + 1) + ], + dtype=np.int32, + ) + # (ntypes+1 x ntypes+1) + self.type_mask = self.type_mask.reshape([-1]) + + def get_exclude_types(self): + return self.exclude_types + + def build_type_exclude_mask( + self, + nlist: np.ndarray, + atype_ext: np.ndarray, + ): + """Compute type exclusion mask for atom pairs. + + Parameters + ---------- + nlist + The neighbor list. shape: nf x nloc x nnei + atype_ext + The extended aotm types. shape: nf x nall + + Returns + ------- + mask + The type exclusion mask for pair atoms of shape: nf x nloc x nnei. + Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded, + otherwise being 1. + + """ + if len(self.exclude_types) == 0: + # safely return 1 if nothing is excluded. + return np.ones_like(nlist, dtype=np.int32) + nf, nloc, nnei = nlist.shape + nall = atype_ext.shape[1] + # add virtual atom of type ntypes. nf x nall+1 + ae = np.concatenate( + [atype_ext, self.ntypes * np.ones([nf, 1], dtype=atype_ext.dtype)], axis=-1 + ) + type_i = atype_ext[:, :nloc].reshape(nf, nloc) * (self.ntypes + 1) + # nf x nloc x nnei + index = np.where(nlist == -1, nall, nlist).reshape(nf, nloc * nnei) + type_j = np.take_along_axis(ae, index, axis=1).reshape(nf, nloc, nnei) + type_ij = type_i[:, :, None] + type_j + # nf x (nloc x nnei) + type_ij = type_ij.reshape(nf, nloc * nnei) + mask = self.type_mask[type_ij].reshape(nf, nloc, nnei) + return mask + + def __contains__(self, item): + return item in self.exclude_types diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py new file mode 100644 index 0000000000..96b39d20ad --- /dev/null +++ b/deepmd/dpmodel/utils/neighbor_stat.py @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Iterator, + Optional, + Tuple, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + NativeOP, +) +from deepmd.dpmodel.utils.nlist import ( + extend_coord_with_ghosts, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat + + +class NeighborStatOP(NativeOP): + """Class for getting neighbor statics data information. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + mixed_types : bool, optional + If True, treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_types: bool, + ) -> None: + self.rcut = rcut + self.ntypes = ntypes + self.mixed_types = mixed_types + + def call( + self, + coord: np.ndarray, + atype: np.ndarray, + cell: Optional[np.ndarray], + ) -> Tuple[float, np.ndarray]: + """Calculate the neareest neighbor distance between atoms, maximum nbor size of + atoms and the output data range of the environment matrix. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + + Returns + ------- + float + The minimal squared distance between two atoms + np.ndarray + The maximal number of neighbors + """ + nframes = coord.shape[0] + coord = coord.reshape(nframes, -1, 3) + nloc = coord.shape[1] + coord = coord.reshape(nframes, nloc * 3) + extend_coord, extend_atype, _ = extend_coord_with_ghosts( + coord, atype, cell, self.rcut + ) + + coord1 = extend_coord.reshape(nframes, -1) + nall = coord1.shape[1] // 3 + coord0 = coord1[:, : nloc * 3] + diff = ( + coord1.reshape([nframes, -1, 3])[:, None, :, :] + - coord0.reshape([nframes, -1, 3])[:, :, None, :] + ) + assert list(diff.shape) == [nframes, nloc, nall, 3] + # remove the diagonal elements + mask = np.eye(nloc, nall, dtype=bool) + diff[:, mask] = np.inf + rr2 = np.sum(np.square(diff), axis=-1) + min_rr2 = np.min(rr2, axis=-1) + # count the number of neighbors + if not self.mixed_types: + mask = rr2 < self.rcut**2 + nnei = np.zeros((nframes, nloc, self.ntypes), dtype=int) + for ii in range(self.ntypes): + nnei[:, :, ii] = np.sum( + mask & (extend_atype == ii)[:, None, :], axis=-1 + ) + else: + mask = rr2 < self.rcut**2 + # virtual type (<0) are not counted + nnei = np.sum(mask & (extend_atype >= 0)[:, None, :], axis=-1).reshape( + nframes, nloc, 1 + ) + max_nnei = np.max(nnei, axis=1) + return min_rr2, max_nnei + + +class NeighborStat(BaseNeighborStat): + """Neighbor statistics using pure NumPy. + + Parameters + ---------- + ntypes : int + The num of atom types + rcut : float + The cut-off radius + mixed_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_type: bool = False, + ) -> None: + super().__init__(ntypes, rcut, mixed_type) + self.op = NeighborStatOP(ntypes, rcut, mixed_type) + + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system + """ + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii] + data_set_data = data_set._load_set(jj) + minrr2, max_nnei = self.op( + data_set_data["coord"], + data_set_data["type"], + data_set_data["box"] if data_set.pbc else None, + ) + yield np.max(max_nnei, axis=0), np.min(minrr2), jj diff --git a/deepmd_utils/model_format/network.py b/deepmd/dpmodel/utils/network.py similarity index 83% rename from deepmd_utils/model_format/network.py rename to deepmd/dpmodel/utils/network.py index 71ed659787..661358ed70 100644 --- a/deepmd_utils/model_format/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -3,10 +3,15 @@ See issue #2982 for more information. """ + import copy import itertools import json +from datetime import ( + datetime, +) from typing import ( + Callable, ClassVar, Dict, List, @@ -17,12 +22,16 @@ import h5py import numpy as np +from deepmd.utils.version import ( + check_version_compatibility, +) + try: - from deepmd_utils._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: __version__ = "unknown" -from .common import ( +from deepmd.dpmodel import ( DEFAULT_PRECISION, PRECISION_DICT, NativeOP, @@ -54,6 +63,8 @@ def traverse_model_dict(model_obj, callback: callable, is_variable: bool = False elif isinstance(model_obj, list): for ii, vv in enumerate(model_obj): model_obj[ii] = traverse_model_dict(vv, callback, is_variable=is_variable) + elif model_obj is None: + return model_obj elif is_variable: model_obj = callback(model_obj) return model_obj @@ -79,7 +90,9 @@ def __call__(self): return self.count -def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] = None): +# TODO: move save_dp_model and load_dp_model to a seperated module +# should be moved to otherwhere... +def save_dp_model(filename: str, model_dict: dict) -> None: """Save a DP model to a file in the native format. Parameters @@ -88,15 +101,9 @@ def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] = The filename to save to. model_dict : dict The model dict to save. - extra_info : dict, optional - Extra meta information to save. """ model_dict = model_dict.copy() variable_counter = Counter() - if extra_info is not None: - extra_info = extra_info.copy() - else: - extra_info = {} with h5py.File(filename, "w") as f: model_dict = traverse_model_dict( model_dict, @@ -105,10 +112,11 @@ def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] = ).name, ) save_dict = { - "model": model_dict, "software": "deepmd-kit", "version": __version__, - **extra_info, + # use UTC+0 time + "time": str(datetime.utcnow()), + **model_dict, } f.attrs["json"] = json.dumps(save_dict, separators=(",", ":")) @@ -161,6 +169,8 @@ def __init__( ) -> None: prec = PRECISION_DICT[precision.lower()] self.precision = precision + # only use_timestep when skip connection is established. + use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2) rng = np.random.default_rng() self.w = rng.normal(size=(num_in, num_out)).astype(prec) self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None @@ -186,11 +196,14 @@ def serialize(self) -> dict: "idt": self.idt, } return { + "@class": "Layer", + "@version": 1, "bias": self.b is not None, "use_timestep": self.idt is not None, "activation_function": self.activation_function, "resnet": self.resnet, - "precision": self.precision, + # make deterministic + "precision": np.dtype(PRECISION_DICT[self.precision]).name, "@variables": data, } @@ -204,6 +217,8 @@ def deserialize(cls, data: dict) -> "NativeLayer": The dict to deserialize from. """ data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) variables = data.pop("@variables") assert variables["w"] is not None and len(variables["w"].shape) == 2 num_in, num_out = variables["w"].shape @@ -217,6 +232,10 @@ def deserialize(cls, data: dict) -> "NativeLayer": variables.get("b", None), variables.get("idt", None), ) + if obj.b is not None: + obj.b = obj.b.ravel() + if obj.idt is not None: + obj.idt = obj.idt.ravel() obj.check_shape_consistency() return obj @@ -297,14 +316,7 @@ def call(self, x: np.ndarray) -> np.ndarray: """ if self.w is None or self.activation_function is None: raise ValueError("w, b, and activation_function must be set") - if self.activation_function == "tanh": - fn = np.tanh - elif self.activation_function.lower() == "none": - - def fn(x): - return x - else: - raise NotImplementedError(self.activation_function) + fn = get_activation_fn(self.activation_function) y = ( np.matmul(x, self.w) + self.b if self.b is not None @@ -320,6 +332,55 @@ def fn(x): return y +def get_activation_fn(activation_function: str) -> Callable[[np.ndarray], np.ndarray]: + activation_function = activation_function.lower() + if activation_function == "tanh": + return np.tanh + elif activation_function == "relu": + + def fn(x): + # https://stackoverflow.com/a/47936476/9567349 + return x * (x > 0) + + return fn + elif activation_function in ("gelu", "gelu_tf"): + + def fn(x): + # generated by GitHub Copilot + return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3))) + + return fn + elif activation_function == "relu6": + + def fn(x): + # generated by GitHub Copilot + return np.minimum(np.maximum(x, 0), 6) + + return fn + elif activation_function == "softplus": + + def fn(x): + # generated by GitHub Copilot + return np.log(1 + np.exp(x)) + + return fn + elif activation_function == "sigmoid": + + def fn(x): + # generated by GitHub Copilot + return 1 / (1 + np.exp(-x)) + + return fn + elif activation_function.lower() in ("none", "linear"): + + def fn(x): + return x + + return fn + else: + raise NotImplementedError(activation_function) + + def make_multilayer_network(T_NetworkLayer, ModuleBase): class NN(ModuleBase): """Native representation of a neural network. @@ -345,7 +406,11 @@ def serialize(self) -> dict: dict The serialized network. """ - return {"layers": [layer.serialize() for layer in self.layers]} + return { + "@class": "NN", + "@version": 1, + "layers": [layer.serialize() for layer in self.layers], + } @classmethod def deserialize(cls, data: dict) -> "NN": @@ -356,6 +421,9 @@ def deserialize(cls, data: dict) -> "NN": data : dict The dict to deserialize from. """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) return cls(data["layers"]) def __getitem__(self, key): @@ -392,6 +460,15 @@ def call(self, x): x = layer(x) return x + def clear(self): + """Clear the network parameters to zero.""" + for layer in self.layers: + layer.w.fill(0.0) + if layer.b is not None: + layer.b.fill(0.0) + if layer.idt is not None: + layer.idt.fill(0.0) + return NN @@ -458,11 +535,14 @@ def serialize(self) -> dict: The serialized network. """ return { + "@class": "EmbeddingNetwork", + "@version": 1, "in_dim": self.in_dim, "neuron": self.neuron.copy(), "activation_function": self.activation_function, "resnet_dt": self.resnet_dt, - "precision": self.precision, + # make deterministic + "precision": np.dtype(PRECISION_DICT[self.precision]).name, "layers": [layer.serialize() for layer in self.layers], } @@ -476,6 +556,8 @@ def deserialize(cls, data: dict) -> "EmbeddingNet": The dict to deserialize from. """ data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) layers = data.pop("layers") obj = cls(**data) super(EN, obj).__init__(layers) @@ -528,7 +610,8 @@ def __init__( resnet_dt=resnet_dt, precision=precision, ) - i_in, i_ot = neuron[-1], out_dim + i_in = neuron[-1] if len(neuron) > 0 else in_dim + i_ot = out_dim self.layers.append( T_NetworkLayer( i_in, @@ -552,6 +635,8 @@ def serialize(self) -> dict: The serialized network. """ return { + "@class": "FittingNetwork", + "@version": 1, "in_dim": self.in_dim, "out_dim": self.out_dim, "neuron": self.neuron.copy(), @@ -572,6 +657,8 @@ def deserialize(cls, data: dict) -> "FittingNet": The dict to deserialize from. """ data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) layers = data.pop("layers") obj = cls(**data) T_Network.__init__(obj, layers) @@ -674,6 +761,8 @@ def serialize(self) -> dict: network_type_map_inv = {v: k for k, v in self.NETWORK_TYPE_MAP.items()} network_type_name = network_type_map_inv[self.network_type] return { + "@class": "NetworkCollection", + "@version": 1, "ndim": self.ndim, "ntypes": self.ntypes, "network_type": network_type_name, @@ -689,4 +778,7 @@ def deserialize(cls, data: dict) -> "NetworkCollection": data : dict The dict to deserialize from. """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) return cls(**data) diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py new file mode 100644 index 0000000000..ca8b18023b --- /dev/null +++ b/deepmd/dpmodel/utils/nlist.py @@ -0,0 +1,264 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, + Union, +) + +import numpy as np + +from .region import ( + to_face_distance, +) + + +## translated from torch implemantation by chatgpt +def build_neighbor_list( + coord: np.ndarray, + atype: np.ndarray, + nloc: int, + rcut: float, + sel: Union[int, List[int]], + distinguish_types: bool = True, +) -> np.ndarray: + """Build neightbor list for a single frame. keeps nsel neighbors. + + Parameters + ---------- + coord : np.ndarray + exptended coordinates of shape [batch_size, nall x 3] + atype : np.ndarray + extended atomic types of shape [batch_size, nall] + type < 0 the atom is treat as virtual atoms. + nloc : int + number of local atoms. + rcut : float + cut-off radius + sel : int or List[int] + maximal number of neighbors (of each type). + if distinguish_types==True, nsel should be list and + the length of nsel should be equal to number of + types. + distinguish_types : bool + distinguish different types. + + Returns + ------- + neighbor_list : np.ndarray + Neighbor list of shape [batch_size, nloc, nsel], the neighbors + are stored in an ascending order. If the number of + neighbors is less than nsel, the positions are masked + with -1. The neighbor list of an atom looks like + |------ nsel ------| + xx xx xx xx -1 -1 -1 + if distinguish_types==True and we have two types + |---- nsel[0] -----| |---- nsel[1] -----| + xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1 + For virtual atoms all neighboring positions are filled with -1. + + """ + batch_size = coord.shape[0] + coord = coord.reshape(batch_size, -1) + nall = coord.shape[1] // 3 + # fill virtual atoms with large coords so they are not neighbors of any + # real atom. + xmax = np.max(coord) + 2.0 * rcut + # nf x nall + is_vir = atype < 0 + coord1 = np.where(is_vir[:, :, None], xmax, coord.reshape(-1, nall, 3)).reshape( + -1, nall * 3 + ) + if isinstance(sel, int): + sel = [sel] + nsel = sum(sel) + coord0 = coord1[:, : nloc * 3] + diff = ( + coord1.reshape([batch_size, -1, 3])[:, None, :, :] + - coord0.reshape([batch_size, -1, 3])[:, :, None, :] + ) + assert list(diff.shape) == [batch_size, nloc, nall, 3] + rr = np.linalg.norm(diff, axis=-1) + # if central atom has two zero distances, sorting sometimes can not exclude itself + rr -= np.eye(nloc, nall, dtype=diff.dtype)[np.newaxis, :, :] + nlist = np.argsort(rr, axis=-1) + rr = np.sort(rr, axis=-1) + rr = rr[:, :, 1:] + nlist = nlist[:, :, 1:] + nnei = rr.shape[2] + if nsel <= nnei: + rr = rr[:, :, :nsel] + nlist = nlist[:, :, :nsel] + else: + rr = np.concatenate( + [rr, np.ones([batch_size, nloc, nsel - nnei]) + rcut], axis=-1 + ) + nlist = np.concatenate( + [nlist, np.ones([batch_size, nloc, nsel - nnei], dtype=nlist.dtype)], + axis=-1, + ) + assert list(nlist.shape) == [batch_size, nloc, nsel] + nlist = np.where(np.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist) + + if distinguish_types: + return nlist_distinguish_types(nlist, atype, sel) + else: + return nlist + + +def nlist_distinguish_types( + nlist: np.ndarray, + atype: np.ndarray, + sel: List[int], +): + """Given a nlist that does not distinguish atom types, return a nlist that + distinguish atom types. + + """ + nf, nloc, _ = nlist.shape + ret_nlist = [] + tmp_atype = np.tile(atype[:, None], [1, nloc, 1]) + mask = nlist == -1 + tnlist_0 = nlist.copy() + tnlist_0[mask] = 0 + tnlist = np.take_along_axis(tmp_atype, tnlist_0, axis=2).squeeze() + tnlist = np.where(mask, -1, tnlist) + snsel = tnlist.shape[2] + for ii, ss in enumerate(sel): + pick_mask = (tnlist == ii).astype(np.int32) + sorted_indices = np.argsort(-pick_mask, kind="stable", axis=-1) + pick_mask_sorted = -np.sort(-pick_mask, axis=-1) + inlist = np.take_along_axis(nlist, sorted_indices, axis=2) + inlist = np.where(~pick_mask_sorted.astype(bool), -1, inlist) + ret_nlist.append(np.split(inlist, [ss, snsel - ss], axis=-1)[0]) + ret = np.concatenate(ret_nlist, axis=-1) + return ret + + +def get_multiple_nlist_key(rcut: float, nsel: int) -> str: + return str(rcut) + "_" + str(nsel) + + +## translated from torch implemantation by chatgpt +def build_multiple_neighbor_list( + coord: np.ndarray, + nlist: np.ndarray, + rcuts: List[float], + nsels: List[int], +) -> Dict[str, np.ndarray]: + """Input one neighbor list, and produce multiple neighbor lists with + different cutoff radius and numbers of selection out of it. The + required rcuts and nsels should be smaller or equal to the input nlist. + + Parameters + ---------- + coord : np.ndarray + exptended coordinates of shape [batch_size, nall x 3] + nlist : np.ndarray + Neighbor list of shape [batch_size, nloc, nsel], the neighbors + should be stored in an ascending order. + rcuts : List[float] + list of cut-off radius in ascending order. + nsels : List[int] + maximal number of neighbors in ascending order. + + Returns + ------- + nlist_dict : Dict[str, np.ndarray] + A dict of nlists, key given by get_multiple_nlist_key(rc, nsel) + value being the corresponding nlist. + + """ + assert len(rcuts) == len(nsels) + if len(rcuts) == 0: + return {} + nb, nloc, nsel = nlist.shape + if nsel < nsels[-1]: + pad = -1 * np.ones((nb, nloc, nsels[-1] - nsel), dtype=nlist.dtype) + nlist = np.concatenate([nlist, pad], axis=-1) + nsel = nsels[-1] + coord1 = coord.reshape(nb, -1, 3) + nall = coord1.shape[1] + coord0 = coord1[:, :nloc, :] + nlist_mask = nlist == -1 + tnlist_0 = nlist.copy() + tnlist_0[nlist_mask] = 0 + index = np.tile(tnlist_0.reshape(nb, nloc * nsel, 1), [1, 1, 3]) + coord2 = np.take_along_axis(coord1, index, axis=1).reshape(nb, nloc, nsel, 3) + diff = coord2 - coord0[:, :, None, :] + rr = np.linalg.norm(diff, axis=-1) + rr = np.where(nlist_mask, float("inf"), rr) + nlist0 = nlist + ret = {} + for rc, ns in zip(rcuts[::-1], nsels[::-1]): + tnlist_1 = np.copy(nlist0[:, :, :ns]) + tnlist_1[rr[:, :, :ns] > rc] = -1 + ret[get_multiple_nlist_key(rc, ns)] = tnlist_1 + return ret + + +## translated from torch implemantation by chatgpt +def extend_coord_with_ghosts( + coord: np.ndarray, + atype: np.ndarray, + cell: Optional[np.ndarray], + rcut: float, +): + """Extend the coordinates of the atoms by appending peridoc images. + The number of images is large enough to ensure all the neighbors + within rcut are appended. + + Parameters + ---------- + coord : np.ndarray + original coordinates of shape [-1, nloc*3]. + atype : np.ndarray + atom type of shape [-1, nloc]. + cell : np.ndarray + simulation cell tensor of shape [-1, 9]. + rcut : float + the cutoff radius + + Returns + ------- + extended_coord: np.ndarray + extended coordinates of shape [-1, nall*3]. + extended_atype: np.ndarray + extended atom type of shape [-1, nall]. + index_mapping: np.ndarray + maping extended index to the local index + + """ + nf, nloc = atype.shape + aidx = np.tile(np.arange(nloc)[np.newaxis, :], (nf, 1)) + if cell is None: + nall = nloc + extend_coord = coord.copy() + extend_atype = atype.copy() + extend_aidx = aidx.copy() + else: + coord = coord.reshape((nf, nloc, 3)) + cell = cell.reshape((nf, 3, 3)) + to_face = to_face_distance(cell) + nbuff = np.ceil(rcut / to_face).astype(int) + nbuff = np.max(nbuff, axis=0) + xi = np.arange(-nbuff[0], nbuff[0] + 1, 1) + yi = np.arange(-nbuff[1], nbuff[1] + 1, 1) + zi = np.arange(-nbuff[2], nbuff[2] + 1, 1) + xyz = np.outer(xi, np.array([1, 0, 0]))[:, np.newaxis, np.newaxis, :] + xyz = xyz + np.outer(yi, np.array([0, 1, 0]))[np.newaxis, :, np.newaxis, :] + xyz = xyz + np.outer(zi, np.array([0, 0, 1]))[np.newaxis, np.newaxis, :, :] + xyz = xyz.reshape(-1, 3) + shift_idx = xyz[np.argsort(np.linalg.norm(xyz, axis=1))] + ns, _ = shift_idx.shape + nall = ns * nloc + shift_vec = np.einsum("sd,fdk->fsk", shift_idx, cell) + extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :] + extend_atype = np.tile(atype[:, :, np.newaxis], (1, ns, 1)) + extend_aidx = np.tile(aidx[:, :, np.newaxis], (1, ns, 1)) + + return ( + extend_coord.reshape((nf, nall * 3)), + extend_atype.reshape((nf, nall)), + extend_aidx.reshape((nf, nall)), + ) diff --git a/deepmd/dpmodel/utils/region.py b/deepmd/dpmodel/utils/region.py new file mode 100644 index 0000000000..ddbc4b29b8 --- /dev/null +++ b/deepmd/dpmodel/utils/region.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + + +def phys2inter( + coord: np.ndarray, + cell: np.ndarray, +) -> np.ndarray: + """Convert physical coordinates to internal(direct) coordinates. + + Parameters + ---------- + coord : np.ndarray + physical coordinates of shape [*, na, 3]. + cell : np.ndarray + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + inter_coord: np.ndarray + the internal coordinates + + """ + rec_cell = np.linalg.inv(cell) + return np.matmul(coord, rec_cell) + + +def inter2phys( + coord: np.ndarray, + cell: np.ndarray, +) -> np.ndarray: + """Convert internal(direct) coordinates to physical coordinates. + + Parameters + ---------- + coord : np.ndarray + internal coordinates of shape [*, na, 3]. + cell : np.ndarray + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + phys_coord: np.ndarray + the physical coordinates + + """ + return np.matmul(coord, cell) + + +def normalize_coord( + coord: np.ndarray, + cell: np.ndarray, +) -> np.ndarray: + """Apply PBC according to the atomic coordinates. + + Parameters + ---------- + coord : np.ndarray + orignal coordinates of shape [*, na, 3]. + cell : np.ndarray + simulation cell shape [*, 3, 3]. + + Returns + ------- + wrapped_coord: np.ndarray + wrapped coordinates of shape [*, na, 3]. + + """ + icoord = phys2inter(coord, cell) + icoord = np.remainder(icoord, 1.0) + return inter2phys(icoord, cell) + + +def to_face_distance( + cell: np.ndarray, +) -> np.ndarray: + """Compute the to-face-distance of the simulation cell. + + Parameters + ---------- + cell : np.ndarray + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + dist: np.ndarray + the to face distances of shape [*, 3] + + """ + cshape = cell.shape + dist = b_to_face_distance(cell.reshape([-1, 3, 3])) + return dist.reshape(list(cshape[:-2]) + [3]) # noqa:RUF005 + + +def b_to_face_distance(cell): + volume = np.linalg.det(cell) + c_yz = np.cross(cell[:, 1], cell[:, 2], axis=-1) + _h2yz = volume / np.linalg.norm(c_yz, axis=-1) + c_zx = np.cross(cell[:, 2], cell[:, 0], axis=-1) + _h2zx = volume / np.linalg.norm(c_zx, axis=-1) + c_xy = np.cross(cell[:, 0], cell[:, 1], axis=-1) + _h2xy = volume / np.linalg.norm(c_xy, axis=-1) + return np.stack([_h2yz, _h2zx, _h2xy], axis=1) diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py new file mode 100644 index 0000000000..7527c122f3 --- /dev/null +++ b/deepmd/dpmodel/utils/type_embed.py @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + PRECISION_DICT, + NativeOP, +) +from deepmd.dpmodel.utils.network import ( + EmbeddingNet, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + + +class TypeEmbedNet(NativeOP): + r"""Type embedding network. + + Parameters + ---------- + ntypes : int + Number of atom types + neuron : list[int] + Number of neurons in each hidden layers of the embedding net + resnet_dt + Time-step `dt` in the resnet construction: y = x + dt * \phi (Wx + b) + activation_function + The activation function in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + trainable + If the weights of embedding net are trainable. + seed + Random seed for initializing the network parameters. + padding + Concat the zero padding to the output, as the default embedding of empty type. + """ + + def __init__( + self, + *, + ntypes: int, + neuron: List[int], + resnet_dt: bool = False, + activation_function: str = "tanh", + precision: str = "default", + trainable: bool = True, + seed: Optional[int] = None, + padding: bool = False, + ) -> None: + self.ntypes = ntypes + self.neuron = neuron + self.seed = seed + self.resnet_dt = resnet_dt + self.precision = precision + self.activation_function = str(activation_function) + self.trainable = trainable + self.padding = padding + self.embedding_net = EmbeddingNet( + ntypes, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + ) + + def call(self) -> np.ndarray: + """Compute the type embedding network.""" + embed = self.embedding_net( + np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision]) + ) + if self.padding: + embed = np.pad(embed, ((0, 1), (0, 0)), mode="constant") + return embed + + @classmethod + def deserialize(cls, data: dict): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data_cls = data.pop("@class") + assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}" + + embedding_net = EmbeddingNet.deserialize(data.pop("embedding")) + type_embedding_net = cls(**data) + type_embedding_net.embedding_net = embedding_net + return type_embedding_net + + def serialize(self) -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + return { + "@class": "TypeEmbedNet", + "@version": 1, + "ntypes": self.ntypes, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "precision": self.precision, + "activation_function": self.activation_function, + "trainable": self.trainable, + "padding": self.padding, + "embedding": self.embedding_net.serialize(), + } diff --git a/deepmd/dpmodel/utils/update_sel.py b/deepmd/dpmodel/utils/update_sel.py new file mode 100644 index 0000000000..48463b5743 --- /dev/null +++ b/deepmd/dpmodel/utils/update_sel.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Type, +) + +from deepmd.dpmodel.utils.neighbor_stat import ( + NeighborStat, +) +from deepmd.utils.update_sel import ( + BaseUpdateSel, +) + + +class UpdateSel(BaseUpdateSel): + @property + def neighbor_stat(self) -> Type[NeighborStat]: + return NeighborStat + + def hook(self, min_nbor_dist, max_nbor_size): + # TODO: save to the model in UpdateSel.hook + pass diff --git a/deepmd/driver.py b/deepmd/driver.py new file mode 100644 index 0000000000..0b48f2ac84 --- /dev/null +++ b/deepmd/driver.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""dpdata driver.""" + +# Derived from https://github.com/deepmodeling/dpdata/blob/18a0ed5ebced8b1f6887038883d46f31ae9990a4/dpdata/plugins/deepmd.py#L361-L443 +# under LGPL-3.0-or-later license. +# The original deepmd driver maintained in the dpdata package will be overriden. +# The class in the dpdata package needs to handle different situations for v1 and v2 interface, +# which is too complex with the development of deepmd-kit. +# So, it will be a good idea to ship it with DeePMD-kit itself. +import dpdata +from dpdata.utils import ( + sort_atom_names, +) + + +@dpdata.driver.Driver.register("dp") +@dpdata.driver.Driver.register("deepmd") +@dpdata.driver.Driver.register("deepmd-kit") +class DPDriver(dpdata.driver.Driver): + """DeePMD-kit driver. + + Parameters + ---------- + dp : deepmd.DeepPot or str + The deepmd-kit potential class or the filename of the model. + + Examples + -------- + >>> DPDriver("frozen_model.pb") + """ + + def __init__(self, dp: str) -> None: + from deepmd.infer.deep_pot import ( + DeepPot, + ) + + if not isinstance(dp, DeepPot): + self.dp = DeepPot(dp, auto_batch_size=True) + else: + self.dp = dp + + def label(self, data: dict) -> dict: + """Label a system data by deepmd-kit. Returns new data with energy, forces, and virials. + + Parameters + ---------- + data : dict + data with coordinates and atom types + + Returns + ------- + dict + labeled data with energies and forces + """ + nframes = data["coords"].shape[0] + natoms = data["coords"].shape[1] + type_map = self.dp.get_type_map() + # important: dpdata type_map may not be the same as the model type_map + # note: while we want to change the type_map when feeding to DeepPot, + # we don't want to change the type_map in the returned data + sorted_data = sort_atom_names(data.copy(), type_map=type_map) + atype = sorted_data["atom_types"] + + coord = data["coords"].reshape((nframes, natoms * 3)) + if "nopbc" not in data: + cell = data["cells"].reshape((nframes, 9)) + else: + cell = None + e, f, v = self.dp.eval(coord, cell, atype) + data = data.copy() + data["energies"] = e.reshape((nframes,)) + data["forces"] = f.reshape((nframes, natoms, 3)) + data["virials"] = v.reshape((nframes, 3, 3)) + return data diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py index 9c3a8b31e1..6ceb116d85 100644 --- a/deepmd/entrypoints/__init__.py +++ b/deepmd/entrypoints/__init__.py @@ -1,48 +1 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Submodule that contains all the DeePMD-Kit entry point scripts.""" - -from ..infer.model_devi import ( - make_model_devi, -) -from .compress import ( - compress, -) -from .convert import ( - convert, -) -from .doc import ( - doc_train_input, -) -from .freeze import ( - freeze, -) -from .gui import ( - start_dpgui, -) -from .neighbor_stat import ( - neighbor_stat, -) -from .test import ( - test, -) - -# import `train` as `train_dp` to avoid the conflict of the -# module name `train` and the function name `train` -from .train import train as train_dp -from .transfer import ( - transfer, -) - -__all__ = [ - "doc_train_input", - "freeze", - "test", - "train_dp", - "transfer", - "compress", - "doc_train_input", - "make_model_devi", - "convert", - "neighbor_stat", - "start_dpgui", -] diff --git a/deepmd/entrypoints/convert_backend.py b/deepmd/entrypoints/convert_backend.py new file mode 100644 index 0000000000..39967d565c --- /dev/null +++ b/deepmd/entrypoints/convert_backend.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.backend.backend import ( + Backend, +) + + +def convert_backend( + *, # Enforce keyword-only arguments + INPUT: str, + OUTPUT: str, + **kwargs, +) -> None: + """Convert a model file from one backend to another. + + Parameters + ---------- + INPUT : str + The input model file. + INPUT : str + The output model file. + """ + inp_backend: Backend = Backend.detect_backend_by_model(INPUT)() + out_backend: Backend = Backend.detect_backend_by_model(OUTPUT)() + inp_hook = inp_backend.serialize_hook + out_hook = out_backend.deserialize_hook + data = inp_hook(INPUT) + out_hook(OUTPUT, data) diff --git a/deepmd/entrypoints/doc.py b/deepmd/entrypoints/doc.py index cc28e52930..e55e84f9d3 100644 --- a/deepmd/entrypoints/doc.py +++ b/deepmd/entrypoints/doc.py @@ -1,6 +1,20 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd_utils.entrypoints.doc import ( - doc_train_input, +"""Module that prints train input arguments docstrings.""" + +from deepmd.utils.argcheck import ( + gen_doc, + gen_json, ) __all__ = ["doc_train_input"] + + +def doc_train_input(*, out_type: str = "rst", **kwargs): + """Print out trining input arguments to console.""" + if out_type == "rst": + doc_str = gen_doc(make_anchor=True) + elif out_type == "json": + doc_str = gen_json() + else: + raise RuntimeError("Unsupported out type %s" % out_type) + print(doc_str) # noqa: T201 diff --git a/deepmd/entrypoints/gui.py b/deepmd/entrypoints/gui.py index 72de65f1c2..8b6b9e0a09 100644 --- a/deepmd/entrypoints/gui.py +++ b/deepmd/entrypoints/gui.py @@ -1,6 +1,31 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd_utils.entrypoints.gui import ( - start_dpgui, -) +"""DP-GUI entrypoint.""" -__all__ = ["start_dpgui"] + +def start_dpgui(*, port: int, bind_all: bool, **kwargs): + """Host DP-GUI server. + + Parameters + ---------- + port : int + The port to serve DP-GUI on. + bind_all : bool + Serve on all public interfaces. This will expose your DP-GUI instance + to the network on both IPv4 and IPv6 (where available). + **kwargs + additional arguments + + Raises + ------ + ModuleNotFoundError + The dpgui package is not installed + """ + try: + from dpgui import ( + start_dpgui, + ) + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "To use DP-GUI, please install the dpgui package:\npip install dpgui" + ) from e + start_dpgui(port=port, bind_all=bind_all) diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 2c6ac26a7f..9f05b9a530 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -1,47 +1,41 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""DeePMD-Kit entry point module.""" +"""Common entrypoints.""" import argparse from pathlib import ( Path, ) -from typing import ( - List, - Optional, - Union, -) -from deepmd.common import ( - clear_session, +from deepmd.backend.backend import ( + Backend, +) +from deepmd.backend.suffix import ( + format_model_suffix, +) +from deepmd.entrypoints.convert_backend import ( + convert_backend, ) -from deepmd.entrypoints import ( - compress, - convert, +from deepmd.entrypoints.doc import ( doc_train_input, - freeze, - make_model_devi, - neighbor_stat, +) +from deepmd.entrypoints.gui import ( start_dpgui, - test, - train_dp, - transfer, ) -from deepmd.loggers import ( - set_log_handles, +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, ) -from deepmd.nvnmd.entrypoints.train import ( - train_nvnmd, +from deepmd.entrypoints.test import ( + test, ) -from deepmd_utils.main import ( - get_ll, - main_parser, - parse_args, +from deepmd.infer.model_devi import ( + make_model_devi, +) +from deepmd.loggers.loggers import ( + set_log_handles, ) - -__all__ = ["main", "parse_args", "get_ll", "main_parser"] -def main(args: Optional[Union[List[str], argparse.Namespace]] = None): +def main(args: argparse.Namespace): """DeePMD-Kit entry point. Parameters @@ -56,46 +50,36 @@ def main(args: Optional[Union[List[str], argparse.Namespace]] = None): RuntimeError if no command was input """ - if args is not None: - clear_session() - - if not isinstance(args, argparse.Namespace): - args = parse_args(args=args) - - # do not set log handles for None, it is useless - # log handles for train will be set separatelly - # when the use of MPI will be determined in `RunOptions` - if args.command not in (None, "train"): - set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None) + set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None) dict_args = vars(args) - if args.command == "train": - train_dp(**dict_args) - elif args.command == "freeze": - freeze(**dict_args) - elif args.command == "test": + if args.command == "test": + dict_args["model"] = format_model_suffix( + dict_args["model"], + feature=Backend.Feature.DEEP_EVAL, + preferred_backend=args.backend, + strict_prefer=False, + ) test(**dict_args) - elif args.command == "transfer": - transfer(**dict_args) - elif args.command == "compress": - compress(**dict_args) elif args.command == "doc-train-input": doc_train_input(**dict_args) elif args.command == "model-devi": + dict_args["models"] = [ + format_model_suffix( + mm, + feature=Backend.Feature.DEEP_EVAL, + preferred_backend=args.backend, + strict_prefer=False, + ) + for mm in dict_args["models"] + ] make_model_devi(**dict_args) - elif args.command == "convert-from": - convert(**dict_args) elif args.command == "neighbor-stat": neighbor_stat(**dict_args) - elif args.command == "train-nvnmd": # nvnmd - train_nvnmd(**dict_args) elif args.command == "gui": start_dpgui(**dict_args) - elif args.command is None: - pass + elif args.command == "convert-backend": + convert_backend(**dict_args) else: - raise RuntimeError(f"unknown command {args.command}") - - if args is not None: - clear_session() + raise ValueError(f"Unknown command: {args.command}") diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py index 28cab00ad2..a68a3fd3bb 100644 --- a/deepmd/entrypoints/neighbor_stat.py +++ b/deepmd/entrypoints/neighbor_stat.py @@ -4,15 +4,15 @@ List, ) +from deepmd.backend.backend import ( + Backend, +) from deepmd.common import ( expand_sys_str, ) from deepmd.utils.data_system import ( DeepmdDataSystem, ) -from deepmd.utils.neighbor_stat import ( - NeighborStat, -) log = logging.getLogger(__name__) @@ -22,7 +22,8 @@ def neighbor_stat( system: str, rcut: float, type_map: List[str], - one_type: bool = False, + mixed_type: bool = False, + backend: str = "tensorflow", **kwargs, ): """Calculate neighbor statistics. @@ -35,17 +36,48 @@ def neighbor_stat( cutoff radius type_map : list[str] type map - one_type : bool, optional, default=False + mixed_type : bool, optional, default=False treat all types as a single type + backend : str, optional, default="tensorflow" + backend to use **kwargs additional arguments Examples -------- - >>> neighbor_stat(system='.', rcut=6., type_map=["C", "H", "O", "N", "P", "S", "Mg", "Na", "HW", "OW", "mNa", "mCl", "mC", "mH", "mMg", "mN", "mO", "mP"]) + >>> neighbor_stat( + ... system=".", + ... rcut=6.0, + ... type_map=[ + ... "C", + ... "H", + ... "O", + ... "N", + ... "P", + ... "S", + ... "Mg", + ... "Na", + ... "HW", + ... "OW", + ... "mNa", + ... "mCl", + ... "mC", + ... "mH", + ... "mMg", + ... "mN", + ... "mO", + ... "mP", + ... ], + ... ) min_nbor_dist: 0.6599510670195264 max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5] """ + backends = Backend.get_backends_by_feature(Backend.Feature.NEIGHBOR_STAT) + try: + backend_obj = backends[backend]() + except KeyError: + raise ValueError(f"Invalid backend {backend}") + NeighborStat = backend_obj.neighbor_stat all_sys = expand_sys_str(system) if not len(all_sys): raise RuntimeError("Did not find valid system") @@ -57,7 +89,7 @@ def neighbor_stat( type_map=type_map, ) data.get_batch() - nei = NeighborStat(data.get_ntypes(), rcut, one_type=one_type) + nei = NeighborStat(data.get_ntypes(), rcut, mixed_type=mixed_type) min_nbor_dist, max_nbor_size = nei.get_stat(data) log.info("min_nbor_dist: %f" % min_nbor_dist) log.info("max_nbor_size: %s" % str(max_nbor_size)) diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index 4658b16e7c..cad6e12d2b 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Test trained DeePMD model.""" + import logging from pathlib import ( Path, @@ -14,12 +15,28 @@ import numpy as np -from deepmd import ( - DeepPotential, -) from deepmd.common import ( expand_sys_str, ) +from deepmd.infer.deep_dipole import ( + DeepDipole, +) +from deepmd.infer.deep_dos import ( + DeepDOS, +) +from deepmd.infer.deep_eval import ( + DeepEval, +) +from deepmd.infer.deep_polar import ( + DeepGlobalPolar, + DeepPolar, +) +from deepmd.infer.deep_pot import ( + DeepPot, +) +from deepmd.infer.deep_wfc import ( + DeepWFC, +) from deepmd.utils import random as dp_random from deepmd.utils.data import ( DeepmdData, @@ -29,14 +46,14 @@ ) if TYPE_CHECKING: - from deepmd.infer import ( + from deepmd.tf.infer import ( DeepDipole, DeepDOS, DeepPolar, DeepPot, DeepWFC, ) - from deepmd.infer.deep_tensor import ( + from deepmd.tf.infer.deep_tensor import ( DeepTensor, ) @@ -56,6 +73,7 @@ def test( shuffle_test: bool, detail_file: str, atomic: bool, + head: Optional[str] = None, **kwargs, ): """Test model predictions. @@ -80,6 +98,8 @@ def test( file where test details will be output atomic : bool whether per atom quantities should be computed + head : Optional[str], optional + (Supported backend: PyTorch) Task head to test if in multi-task mode. **kwargs additional arguments @@ -92,9 +112,8 @@ def test( # only float has inf, but should work for min numb_test = float("inf") if datafile is not None: - datalist = open(datafile) - all_sys = datalist.read().splitlines() - datalist.close() + with open(datafile) as datalist: + all_sys = datalist.read().splitlines() else: all_sys = expand_sys_str(system) @@ -108,14 +127,14 @@ def test( dp_random.seed(rand_seed % (2**32)) # init model - dp = DeepPotential(model) + dp = DeepEval(model, head=head) for cc, system in enumerate(all_sys): log.info("# ---------------output of dp test--------------- ") log.info(f"# testing system : {system}") # create data class - tmap = dp.get_type_map() if dp.model_type == "ener" else None + tmap = dp.get_type_map() if isinstance(dp, DeepPot) else None data = DeepmdData( system, set_prefix, @@ -124,7 +143,7 @@ def test( sort_atoms=False, ) - if dp.model_type == "ener": + if isinstance(dp, DeepPot): err = test_ener( dp, data, @@ -134,7 +153,7 @@ def test( atomic, append_detail=(cc != 0), ) - elif dp.model_type == "dos": + elif isinstance(dp, DeepDOS): err = test_dos( dp, data, @@ -144,11 +163,11 @@ def test( atomic, append_detail=(cc != 0), ) - elif dp.model_type == "dipole": + elif isinstance(dp, DeepDipole): err = test_dipole(dp, data, numb_test, detail_file, atomic) - elif dp.model_type == "polar": + elif isinstance(dp, DeepPolar): err = test_polar(dp, data, numb_test, detail_file, atomic=atomic) - elif dp.model_type == "global_polar": # should not appear in this new version + elif isinstance(dp, DeepGlobalPolar): # should not appear in this new version log.warning( "Global polar model is not currently supported. Please directly use the polar mode and change loss parameters." ) @@ -166,17 +185,17 @@ def test( if len(all_sys) > 1: log.info("# ----------weighted average of errors----------- ") log.info(f"# number of systems : {len(all_sys)}") - if dp.model_type == "ener": + if isinstance(dp, DeepPot): print_ener_sys_avg(avg_err) - elif dp.model_type == "dos": + elif isinstance(dp, DeepDOS): print_dos_sys_avg(avg_err) - elif dp.model_type == "dipole": + elif isinstance(dp, DeepDipole): print_dipole_sys_avg(avg_err) - elif dp.model_type == "polar": + elif isinstance(dp, DeepPolar): print_polar_sys_avg(avg_err) - elif dp.model_type == "global_polar": + elif isinstance(dp, DeepGlobalPolar): print_polar_sys_avg(avg_err) - elif dp.model_type == "wfc": + elif isinstance(dp, DeepGlobalPolar): print_wfc_sys_avg(avg_err) log.info("# ----------------------------------------------- ") @@ -280,6 +299,9 @@ def test_ener( ) if dp.get_dim_aparam() > 0: data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False) + if dp.has_spin: + data.add("spin", 3, atomic=True, must=True, high_prec=False) + data.add("force_mag", 3, atomic=True, must=False, high_prec=False) test_data = data.get_test() mixed_type = data.mixed_type @@ -293,6 +315,10 @@ def test_ener( efield = test_data["efield"][:numb_test].reshape([numb_test, -1]) else: efield = None + if dp.has_spin: + spin = test_data["spin"][:numb_test].reshape([numb_test, -1]) + else: + spin = None if not data.pbc: box = None if mixed_type: @@ -317,6 +343,7 @@ def test_ener( atomic=has_atom_ener, efield=efield, mixed_type=mixed_type, + spin=spin, ) energy = ret[0] force = ret[1] @@ -329,26 +356,50 @@ def test_ener( av = ret[4] ae = ae.reshape([numb_test, -1]) av = av.reshape([numb_test, -1]) - if dp.get_ntypes_spin() != 0: - ntypes_real = dp.get_ntypes() - dp.get_ntypes_spin() - nloc = natoms - nloc_real = sum([np.count_nonzero(atype == ii) for ii in range(ntypes_real)]) - force_r = np.split( - force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1 - )[0] - force_m = np.split( - force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1 - )[1] - test_force_r = np.split( - test_data["force"][:numb_test], - indices_or_sections=[nloc_real * 3, nloc * 3], - axis=1, - )[0] - test_force_m = np.split( - test_data["force"][:numb_test], - indices_or_sections=[nloc_real * 3, nloc * 3], - axis=1, - )[1] + if dp.has_spin: + force_m = ret[5] + force_m = force_m.reshape([numb_test, -1]) + mask_mag = ret[6] + mask_mag = mask_mag.reshape([numb_test, -1]) + else: + if dp.has_spin: + force_m = ret[3] + force_m = force_m.reshape([numb_test, -1]) + mask_mag = ret[4] + mask_mag = mask_mag.reshape([numb_test, -1]) + out_put_spin = dp.get_ntypes_spin() != 0 or dp.has_spin + if out_put_spin: + if dp.get_ntypes_spin() != 0: # old tf support for spin + ntypes_real = dp.get_ntypes() - dp.get_ntypes_spin() + nloc = natoms + nloc_real = sum( + [np.count_nonzero(atype == ii) for ii in range(ntypes_real)] + ) + force_r = np.split( + force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1 + )[0] + force_m = np.split( + force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1 + )[1] + test_force_r = np.split( + test_data["force"][:numb_test], + indices_or_sections=[nloc_real * 3, nloc * 3], + axis=1, + )[0] + test_force_m = np.split( + test_data["force"][:numb_test], + indices_or_sections=[nloc_real * 3, nloc * 3], + axis=1, + )[1] + else: # pt support for spin + force_r = force + test_force_r = test_data["force"][:numb_test] + # The shape of force_m and test_force_m are [-1, 3], + # which is designed for mixed_type cases + force_m = force_m.reshape(-1, 3)[mask_mag.reshape(-1)] + test_force_m = test_data["force_mag"][:numb_test].reshape(-1, 3)[ + mask_mag.reshape(-1) + ] diff_e = energy - test_data["energy"][:numb_test].reshape([-1, 1]) mae_e = mae(diff_e) @@ -367,7 +418,7 @@ def test_ener( diff_ae = test_data["atom_ener"][:numb_test].reshape([-1]) - ae.reshape([-1]) mae_ae = mae(diff_ae) rmse_ae = rmse(diff_ae) - if dp.get_ntypes_spin() != 0: + if out_put_spin: mae_fr = mae(force_r - test_force_r) mae_fm = mae(force_m - test_force_m) rmse_fr = rmse(force_r - test_force_r) @@ -378,16 +429,16 @@ def test_ener( log.info(f"Energy RMSE : {rmse_e:e} eV") log.info(f"Energy MAE/Natoms : {mae_ea:e} eV") log.info(f"Energy RMSE/Natoms : {rmse_ea:e} eV") - if dp.get_ntypes_spin() == 0: + if not out_put_spin: log.info(f"Force MAE : {mae_f:e} eV/A") log.info(f"Force RMSE : {rmse_f:e} eV/A") else: log.info(f"Force atom MAE : {mae_fr:e} eV/A") - log.info(f"Force spin MAE : {mae_fm:e} eV/uB") log.info(f"Force atom RMSE : {rmse_fr:e} eV/A") + log.info(f"Force spin MAE : {mae_fm:e} eV/uB") log.info(f"Force spin RMSE : {rmse_fm:e} eV/uB") - if data.pbc: + if data.pbc and not out_put_spin: log.info(f"Virial MAE : {mae_v:e} eV") log.info(f"Virial RMSE : {rmse_v:e} eV") log.info(f"Virial MAE/Natoms : {mae_va:e} eV") @@ -419,7 +470,7 @@ def test_ener( header="%s: data_e pred_e" % system, append=append_detail, ) - if dp.get_ntypes_spin() == 0: + if not out_put_spin: pf = np.concatenate( ( np.reshape(test_data["force"][:numb_test], [-1, 3]), @@ -479,7 +530,7 @@ def test_ener( "pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz", append=append_detail, ) - if dp.get_ntypes_spin() == 0: + if not out_put_spin: return { "mae_e": (mae_e, energy.size), "mae_ea": (mae_ea, energy.size), @@ -842,6 +893,10 @@ def test_polar( rmse_fs = rmse_f / np.sqrt(sel_natoms) rmse_fa = rmse_f / sel_natoms else: + sel_mask = np.isin(atype, sel_type) + polar = polar.reshape((polar.shape[0], -1, 9))[:, sel_mask, :].reshape( + (polar.shape[0], -1) + ) rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test]) log.info(f"# number of test data : {numb_test:d} ") @@ -978,6 +1033,10 @@ def test_dipole( rmse_fs = rmse_f / np.sqrt(sel_natoms) rmse_fa = rmse_f / sel_natoms else: + sel_mask = np.isin(atype, sel_type) + dipole = dipole.reshape((dipole.shape[0], -1, 3))[:, sel_mask, :].reshape( + (dipole.shape[0], -1) + ) rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test]) log.info(f"# number of test data : {numb_test:d}") diff --git a/deepmd/env.py b/deepmd/env.py index f290dc0a90..8215de39ac 100644 --- a/deepmd/env.py +++ b/deepmd/env.py @@ -1,219 +1,55 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Module that sets tensorflow working environment and exports inportant constants.""" - -import ctypes import logging import os -import platform from configparser import ( ConfigParser, ) -from importlib import ( - import_module, - reload, -) from pathlib import ( Path, ) from typing import ( - TYPE_CHECKING, - Any, Dict, Tuple, ) import numpy as np -from packaging.version import ( - Version, -) import deepmd.lib -from deepmd_utils.env import ( - GLOBAL_ENER_FLOAT_PRECISION, - GLOBAL_NP_FLOAT_PRECISION, - global_float_prec, -) - -if TYPE_CHECKING: - from types import ( - ModuleType, - ) - - -def dlopen_library(module: str, filename: str): - """Dlopen a library from a module. - - Parameters - ---------- - module : str - The module name. - filename : str - The library filename pattern. - """ - try: - m = import_module(module) - except ModuleNotFoundError: - pass - else: - libs = sorted(Path(m.__path__[0]).glob(filename)) - # hope that there is only one version installed... - if len(libs): - ctypes.CDLL(str(libs[0].absolute())) - - -# dlopen pip cuda library before tensorflow -if platform.system() == "Linux": - dlopen_library("nvidia.cuda_runtime.lib", "libcudart.so*") - dlopen_library("nvidia.cublas.lib", "libcublasLt.so*") - dlopen_library("nvidia.cublas.lib", "libcublas.so*") - dlopen_library("nvidia.cufft.lib", "libcufft.so*") - dlopen_library("nvidia.curand.lib", "libcurand.so*") - dlopen_library("nvidia.cusolver.lib", "libcusolver.so*") - dlopen_library("nvidia.cusparse.lib", "libcusparse.so*") - dlopen_library("nvidia.cudnn.lib", "libcudnn.so*") - - -# import tensorflow v1 compatability -try: - import tensorflow.compat.v1 as tf - - tf.disable_v2_behavior() -except ImportError: - import tensorflow as tf -try: - import tensorflow.compat.v2 as tfv2 -except ImportError: - tfv2 = None __all__ = [ - "GLOBAL_CONFIG", - "GLOBAL_TF_FLOAT_PRECISION", "GLOBAL_NP_FLOAT_PRECISION", "GLOBAL_ENER_FLOAT_PRECISION", "global_float_prec", - "global_cvt_2_tf_float", - "global_cvt_2_ener_float", - "MODEL_VERSION", - "SHARED_LIB_DIR", + "GLOBAL_CONFIG", "SHARED_LIB_MODULE", - "default_tf_session_config", - "reset_default_tf_session_config", - "op_module", - "op_grads_module", - "TRANSFER_PATTERN", - "FITTING_NET_PATTERN", - "EMBEDDING_NET_PATTERN", - "TYPE_EMBEDDING_PATTERN", - "ATTENTION_LAYER_PATTERN", - "REMOVE_SUFFIX_DICT", - "TF_VERSION", + "SHARED_LIB_DIR", ] +log = logging.getLogger(__name__) + + SHARED_LIB_MODULE = "lib" SHARED_LIB_DIR = Path(deepmd.lib.__path__[0]) CONFIG_FILE = SHARED_LIB_DIR / "run_config.ini" -# Python library version -try: - tf_py_version = tf.version.VERSION -except AttributeError: - tf_py_version = tf.__version__ - -EMBEDDING_NET_PATTERN = str( - r"filter_type_\d+/matrix_\d+_\d+|" - r"filter_type_\d+/bias_\d+_\d+|" - r"filter_type_\d+/idt_\d+_\d+|" - r"filter_type_all/matrix_\d+|" - r"filter_type_all/matrix_\d+_\d+|" - r"filter_type_all/matrix_\d+_\d+_\d+|" - r"filter_type_all/bias_\d+|" - r"filter_type_all/bias_\d+_\d+|" - r"filter_type_all/bias_\d+_\d+_\d+|" - r"filter_type_all/idt_\d+|" - r"filter_type_all/idt_\d+_\d+|" -) -FITTING_NET_PATTERN = str( - r"layer_\d+/matrix|" - r"layer_\d+_type_\d+/matrix|" - r"layer_\d+/bias|" - r"layer_\d+_type_\d+/bias|" - r"layer_\d+/idt|" - r"layer_\d+_type_\d+/idt|" - r"final_layer/matrix|" - r"final_layer_type_\d+/matrix|" - r"final_layer/bias|" - r"final_layer_type_\d+/bias|" - # layer_name - r"share_.+_type_\d/matrix|" - r"share_.+_type_\d/bias|" - r"share_.+_type_\d/idt|" - r"share_.+/matrix|" - r"share_.+/bias|" - r"share_.+/idt|" -) - -TYPE_EMBEDDING_PATTERN = str( - r"type_embed_net+/matrix_\d+|" - r"type_embed_net+/bias_\d+|" - r"type_embed_net+/idt_\d+|" -) - -ATTENTION_LAYER_PATTERN = str( - r"attention_layer_\d+/c_query/matrix|" - r"attention_layer_\d+/c_query/bias|" - r"attention_layer_\d+/c_key/matrix|" - r"attention_layer_\d+/c_key/bias|" - r"attention_layer_\d+/c_value/matrix|" - r"attention_layer_\d+/c_value/bias|" - r"attention_layer_\d+/c_out/matrix|" - r"attention_layer_\d+/c_out/bias|" - r"attention_layer_\d+/layer_normalization/beta|" - r"attention_layer_\d+/layer_normalization/gamma|" - r"attention_layer_\d+/layer_normalization_\d+/beta|" - r"attention_layer_\d+/layer_normalization_\d+/gamma|" -) - -TRANSFER_PATTERN = ( - EMBEDDING_NET_PATTERN - + FITTING_NET_PATTERN - + TYPE_EMBEDDING_PATTERN - + str( - r"descrpt_attr/t_avg|" - r"descrpt_attr/t_std|" - r"fitting_attr/t_fparam_avg|" - r"fitting_attr/t_fparam_istd|" - r"fitting_attr/t_aparam_avg|" - r"fitting_attr/t_aparam_istd|" - r"model_attr/t_tab_info|" - r"model_attr/t_tab_data|" +# FLOAT_PREC +dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower() +if dp_float_prec in ("high", ""): + # default is high + GLOBAL_NP_FLOAT_PRECISION = np.float64 + GLOBAL_ENER_FLOAT_PRECISION = np.float64 + global_float_prec = "double" +elif dp_float_prec == "low": + GLOBAL_NP_FLOAT_PRECISION = np.float32 + GLOBAL_ENER_FLOAT_PRECISION = np.float64 + global_float_prec = "float" +else: + raise RuntimeError( + "Unsupported float precision option: %s. Supported: high," + "low. Please set precision with environmental variable " + "DP_INTERFACE_PREC." % dp_float_prec ) -) - -REMOVE_SUFFIX_DICT = { - "model_attr/sel_type_{}": "model_attr/sel_type", - "model_attr/output_dim_{}": "model_attr/output_dim", - "_{}/": "/", - # when atom_ener is set - "_{}_1/": "_1/", - "o_energy_{}": "o_energy", - "o_force_{}": "o_force", - "o_virial_{}": "o_virial", - "o_atom_energy_{}": "o_atom_energy", - "o_atom_virial_{}": "o_atom_virial", - "o_dipole_{}": "o_dipole", - "o_global_dipole_{}": "o_global_dipole", - "o_polar_{}": "o_polar", - "o_global_polar_{}": "o_global_polar", - "o_rmat_{}": "o_rmat", - "o_rmat_deriv_{}": "o_rmat_deriv", - "o_nlist_{}": "o_nlist", - "o_rij_{}": "o_rij", - "o_dm_force_{}": "o_dm_force", - "o_dm_virial_{}": "o_dm_virial", - "o_dm_av_{}": "o_dm_av", - "o_wfc_{}": "o_wfc", -} def set_env_if_empty(key: str, value: str, verbose: bool = True): @@ -231,224 +67,68 @@ def set_env_if_empty(key: str, value: str, verbose: bool = True): if os.environ.get(key) is None: os.environ[key] = value if verbose: - logging.warning( + log.warning( f"Environment variable {key} is empty. Use the default value {value}" ) -def set_mkl(): - """Tuning MKL for the best performance. - - References - ---------- - TF overview - https://www.tensorflow.org/guide/performance/overview - - Fixing an issue in numpy built by MKL - https://github.com/ContinuumIO/anaconda-issues/issues/11367 - https://github.com/numpy/numpy/issues/12374 - - check whether the numpy is built by mkl, see - https://github.com/numpy/numpy/issues/14751 - """ - try: - is_mkl = ( - np.show_config("dicts") - .get("Build Dependencies", {}) - .get("blas", {}) - .get("name", "") - .lower() - .startswith("mkl") - ) - except TypeError: - is_mkl = "mkl_rt" in np.__config__.get_info("blas_mkl_info").get( - "libraries", [] - ) - if is_mkl: - set_env_if_empty("KMP_BLOCKTIME", "0") - set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0") - reload(np) - - -def set_tf_default_nthreads(): - """Set TF internal number of threads to default=automatic selection. +def set_default_nthreads(): + """Set internal number of threads to default=automatic selection. Notes ----- - `TF_INTRA_OP_PARALLELISM_THREADS` and `TF_INTER_OP_PARALLELISM_THREADS` - control TF configuration of multithreading. + `DP_INTRA_OP_PARALLELISM_THREADS` and `DP_INTER_OP_PARALLELISM_THREADS` + control configuration of multithreading. """ if ( "OMP_NUM_THREADS" not in os.environ - or "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ - or "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ + # for backward compatibility + or ( + "DP_INTRA_OP_PARALLELISM_THREADS" not in os.environ + and "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ + ) + or ( + "DP_INTER_OP_PARALLELISM_THREADS" not in os.environ + and "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ + ) ): - logging.warning( + log.warning( "To get the best performance, it is recommended to adjust " "the number of threads by setting the environment variables " - "OMP_NUM_THREADS, TF_INTRA_OP_PARALLELISM_THREADS, and " - "TF_INTER_OP_PARALLELISM_THREADS. See " + "OMP_NUM_THREADS, DP_INTRA_OP_PARALLELISM_THREADS, and " + "DP_INTER_OP_PARALLELISM_THREADS. See " "https://deepmd.rtfd.io/parallelism/ for more information." ) - set_env_if_empty("TF_INTRA_OP_PARALLELISM_THREADS", "0", verbose=False) - set_env_if_empty("TF_INTER_OP_PARALLELISM_THREADS", "0", verbose=False) + if "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ: + set_env_if_empty("DP_INTRA_OP_PARALLELISM_THREADS", "0", verbose=False) + if "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ: + set_env_if_empty("DP_INTER_OP_PARALLELISM_THREADS", "0", verbose=False) -def get_tf_default_nthreads() -> Tuple[int, int]: - """Get TF paralellism settings. +def get_default_nthreads() -> Tuple[int, int]: + """Get paralellism settings. - Returns - ------- - Tuple[int, int] - number of `TF_INTRA_OP_PARALLELISM_THREADS` and - `TF_INTER_OP_PARALLELISM_THREADS` - """ - return int(os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0")), int( - os.environ.get("TF_INTER_OP_PARALLELISM_THREADS", "0") - ) - - -def get_tf_session_config() -> Any: - """Configure tensorflow session. + The method will first read the environment variables with the prefix `DP_`. + If not found, it will read the environment variables with the prefix `TF_` + for backward compatibility. Returns ------- - Any - session configure object + Tuple[int, int] + number of `DP_INTRA_OP_PARALLELISM_THREADS` and + `DP_INTER_OP_PARALLELISM_THREADS` """ - set_tf_default_nthreads() - intra, inter = get_tf_default_nthreads() - if int(os.environ.get("DP_JIT", 0)): - set_env_if_empty("TF_XLA_FLAGS", "--tf_xla_auto_jit=2") - # pip cuda package - if platform.system() == "Linux": - try: - m = import_module("nvidia.cuda_nvcc") - except ModuleNotFoundError: - pass - else: - cuda_data_dir = str(Path(m.__file__).parent.absolute()) - set_env_if_empty( - "XLA_FLAGS", "--xla_gpu_cuda_data_dir=" + cuda_data_dir - ) - config = tf.ConfigProto( - gpu_options=tf.GPUOptions(allow_growth=True), - intra_op_parallelism_threads=intra, - inter_op_parallelism_threads=inter, + return int( + os.environ.get( + "DP_INTRA_OP_PARALLELISM_THREADS", + os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0"), + ) + ), int( + os.environ.get( + "DP_INTER_OP_PARALLELISM_THREADS", + os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0"), + ) ) - if Version(tf_py_version) >= Version("1.15") and int( - os.environ.get("DP_AUTO_PARALLELIZATION", 0) - ): - config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel" - return config - - -default_tf_session_config = get_tf_session_config() - - -def reset_default_tf_session_config(cpu_only: bool): - """Limit tensorflow session to CPU or not. - - Parameters - ---------- - cpu_only : bool - If enabled, no GPU device is visible to the TensorFlow Session. - """ - global default_tf_session_config - if cpu_only: - default_tf_session_config.device_count["GPU"] = 0 - else: - if "GPU" in default_tf_session_config.device_count: - del default_tf_session_config.device_count["GPU"] - - -def get_module(module_name: str) -> "ModuleType": - """Load force module. - - Returns - ------- - ModuleType - loaded force module - - Raises - ------ - FileNotFoundError - if module is not found in directory - """ - if platform.system() == "Windows": - ext = ".dll" - prefix = "" - # elif platform.system() == "Darwin": - # ext = ".dylib" - else: - ext = ".so" - prefix = "lib" - - module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve() - - if not module_file.is_file(): - raise FileNotFoundError(f"module {module_name} does not exist") - else: - try: - module = tf.load_op_library(str(module_file)) - except tf.errors.NotFoundError as e: - # check CXX11_ABI_FLAG is compatiblity - # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html - # ABI should be the same - if "CXX11_ABI_FLAG" in tf.__dict__: - tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG - else: - tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG - if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag: - raise RuntimeError( - "This deepmd-kit package was compiled with " - "CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled " - "with CXX11_ABI_FLAG=%d. These two library ABIs are " - "incompatible and thus an error is raised when loading %s. " - "You need to rebuild deepmd-kit against this TensorFlow " - "runtime." - % ( - TF_CXX11_ABI_FLAG, - tf_cxx11_abi_flag, - module_name, - ) - ) from e - - # different versions may cause incompatibility - # see #406, #447, #557, #774, and #796 for example - # throw a message if versions are different - if TF_VERSION != tf_py_version: - raise RuntimeError( - "The version of TensorFlow used to compile this " - "deepmd-kit package is {}, but the version of TensorFlow " - "runtime you are using is {}. These two versions are " - "incompatible and thus an error is raised when loading {}. " - "You need to install TensorFlow {}, or rebuild deepmd-kit " - "against TensorFlow {}.\nIf you are using a wheel from " - "pypi, you may consider to install deepmd-kit execuating " - "`pip install deepmd-kit --no-binary deepmd-kit` " - "instead.".format( - TF_VERSION, - tf_py_version, - module_name, - TF_VERSION, - tf_py_version, - ) - ) from e - error_message = ( - "This deepmd-kit package is inconsitent with TensorFlow " - f"Runtime, thus an error is raised when loading {module_name}. " - "You need to rebuild deepmd-kit against this TensorFlow " - "runtime." - ) - if TF_CXX11_ABI_FLAG == 1: - # #1791 - error_message += ( - "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. " - "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704" - ) - raise RuntimeError(error_message) from e - return module def _get_package_constants( @@ -466,50 +146,14 @@ def _get_package_constants( Dict[str, str] dictionary with package constants """ + if not config_file.is_file(): + raise FileNotFoundError( + f"CONFIG file not found at {config_file}. " + "Please check if the package is installed correctly." + ) config = ConfigParser() config.read(config_file) return dict(config.items("CONFIG")) GLOBAL_CONFIG = _get_package_constants() -MODEL_VERSION = GLOBAL_CONFIG["model_version"] -TF_VERSION = GLOBAL_CONFIG["tf_version"] -TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"]) - -op_module = get_module("deepmd_op") -op_grads_module = get_module("op_grads") - -# FLOAT_PREC -GLOBAL_TF_FLOAT_PRECISION = tf.dtypes.as_dtype(GLOBAL_NP_FLOAT_PRECISION) - - -def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor: - """Cast tensor to globally set TF precision. - - Parameters - ---------- - xx : tf.Tensor - input tensor - - Returns - ------- - tf.Tensor - output tensor cast to `GLOBAL_TF_FLOAT_PRECISION` - """ - return tf.cast(xx, GLOBAL_TF_FLOAT_PRECISION) - - -def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor: - """Cast tensor to globally set energy precision. - - Parameters - ---------- - xx : tf.Tensor - input tensor - - Returns - ------- - tf.Tensor - output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION` - """ - return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION) diff --git a/deepmd/fit/fitting.py b/deepmd/fit/fitting.py deleted file mode 100644 index a467ec1201..0000000000 --- a/deepmd/fit/fitting.py +++ /dev/null @@ -1,104 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from abc import ( - abstractmethod, -) -from typing import ( - Callable, -) - -from deepmd.env import ( - tf, -) -from deepmd.loss.loss import ( - Loss, -) -from deepmd.utils import ( - Plugin, - PluginVariant, -) - - -class Fitting(PluginVariant): - __plugins = Plugin() - - @staticmethod - def register(key: str) -> Callable: - """Register a Fitting plugin. - - Parameters - ---------- - key : str - the key of a Fitting - - Returns - ------- - Fitting - the registered Fitting - - Examples - -------- - >>> @Fitting.register("some_fitting") - class SomeFitting(Fitting): - pass - """ - return Fitting.__plugins.register(key) - - def __new__(cls, *args, **kwargs): - if cls is Fitting: - try: - fitting_type = kwargs["type"] - except KeyError: - raise KeyError("the type of fitting should be set by `type`") - if fitting_type in Fitting.__plugins.plugins: - cls = Fitting.__plugins.plugins[fitting_type] - else: - raise RuntimeError("Unknown descriptor type: " + fitting_type) - return super().__new__(cls) - - @property - def precision(self) -> tf.DType: - """Precision of fitting network.""" - return self.fitting_precision - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the fitting net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str - suffix to name scope - - Notes - ----- - This method is called by others when the fitting supported initialization from the given variables. - """ - raise NotImplementedError( - "Fitting %s doesn't support initialization from the given variables!" - % type(self).__name__ - ) - - @abstractmethod - def get_loss(self, loss: dict, lr) -> Loss: - """Get the loss function. - - Parameters - ---------- - loss : dict - the loss dict - lr : LearningRateExp - the learning rate - - Returns - ------- - Loss - the loss function - """ diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py index c1071af35c..5678494023 100644 --- a/deepmd/infer/__init__.py +++ b/deepmd/infer/__init__.py @@ -1,146 +1,35 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Submodule containing all the implemented potentials.""" - -from pathlib import ( - Path, -) -from typing import ( - Optional, - Union, -) - -from .data_modifier import ( - DipoleChargeModifier, -) -from .deep_dipole import ( - DeepDipole, -) -from .deep_dos import ( - DeepDOS, -) from .deep_eval import ( DeepEval, ) -from .deep_polar import ( - DeepGlobalPolar, - DeepPolar, -) from .deep_pot import ( DeepPot, ) -from .deep_wfc import ( - DeepWFC, -) -from .ewald_recp import ( - EwaldRecp, -) from .model_devi import ( calc_model_devi, ) __all__ = [ - "DeepPotential", - "DeepDipole", - "DeepEval", - "DeepGlobalPolar", - "DeepPolar", "DeepPot", - "DeepDOS", - "DeepWFC", - "DipoleChargeModifier", - "EwaldRecp", "calc_model_devi", + "DeepEval", + "DeepPotential", ] -def DeepPotential( - model_file: Union[str, Path], - load_prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - neighbor_list=None, -) -> Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepDOS, DeepWFC]: - """Factory function that will inialize appropriate potential read from `model_file`. +def DeepPotential(*args, **kwargs) -> "DeepEval": + """Factory function that forwards to DeepEval (for compatbility). Parameters ---------- - model_file : str - The name of the frozen model file. - load_prefix : str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - neighbor_list : ase.neighborlist.NeighborList, optional - The neighbor list object. If None, then build the native neighbor list. + *args + positional arguments + **kwargs + keyword arguments Returns ------- - Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepWFC] - one of the available potentials - - Raises - ------ - RuntimeError - if model file does not correspond to any implementd potential + DeepEval + potentials """ - mf = Path(model_file) - - model_type = DeepEval( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - ).model_type - - if model_type == "ener": - dp = DeepPot( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - elif model_type == "dos": - dp = DeepDOS( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - ) - elif model_type == "dipole": - dp = DeepDipole( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - elif model_type == "polar": - dp = DeepPolar( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - elif model_type == "global_polar": - dp = DeepGlobalPolar( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - elif model_type == "wfc": - dp = DeepWFC( - mf, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - ) - else: - raise RuntimeError(f"unknown model type {model_type}") - - return dp + return DeepEval(*args, **kwargs) diff --git a/deepmd/infer/deep_dipole.py b/deepmd/infer/deep_dipole.py index aba098a9f3..b443b54417 100644 --- a/deepmd/infer/deep_dipole.py +++ b/deepmd/infer/deep_dipole.py @@ -1,73 +1,28 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - TYPE_CHECKING, - Optional, -) - from deepmd.infer.deep_tensor import ( DeepTensor, ) -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - class DeepDipole(DeepTensor): - """Constructor. + """Deep dipole model. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - neighbor_list : ase.neighborlist.NeighborList, optional - The neighbor list object. If None, then build the native neighbor list. - - Warnings - -------- - For developers: `DeepTensor` initializer must be called at the end after - `self.tensors` are modified because it uses the data in `self.tensors` dict. - Do not chanage the order! + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - neighbor_list=None, - ) -> None: - # use this in favor of dict update to move attribute from class to - # instance namespace - self.tensors = dict( - { - # output tensor - "t_tensor": "o_dipole:0", - }, - **self.tensors, - ) - - DeepTensor.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - - def get_dim_fparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_dim_aparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") + @property + def output_tensor_name(self) -> str: + return "dipole" diff --git a/deepmd/infer/deep_dos.py b/deepmd/infer/deep_dos.py index 5f181bd336..7823f02999 100644 --- a/deepmd/infer/deep_dos.py +++ b/deepmd/infer/deep_dos.py @@ -1,8 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -import logging from typing import ( - TYPE_CHECKING, - Callable, + Any, + Dict, List, Optional, Tuple, @@ -11,496 +10,140 @@ import numpy as np -from deepmd.common import ( - make_default_mesh, +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, ) -from deepmd.infer.deep_eval import ( + +from .deep_eval import ( DeepEval, ) -from deepmd.utils.batch_size import ( - AutoBatchSize, -) -from deepmd.utils.sess import ( - run_sess, -) - -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - -log = logging.getLogger(__name__) class DeepDOS(DeepEval): - """Constructor. + """Deep density of states model. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - auto_batch_size : bool or int or AutomaticBatchSize, default: True + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True If True, automatic batch size will be used. If int, it will be used as the initial batch size. - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - - Warnings - -------- - For developers: `DeepTensor` initializer must be called at the end after - `self.tensors` are modified because it uses the data in `self.tensors` dict. - Do not chanage the order! + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - auto_batch_size: Union[bool, int, AutoBatchSize] = True, - input_map: Optional[dict] = None, - ) -> None: - # add these tensors on top of what is defined by DeepTensor Class - # use this in favor of dict update to move attribute from class to - # instance namespace - self.tensors = { - # descrpt attrs - "t_ntypes": "descrpt_attr/ntypes:0", - "t_rcut": "descrpt_attr/rcut:0", - # fitting attrs - "t_dfparam": "fitting_attr/dfparam:0", - "t_daparam": "fitting_attr/daparam:0", - "t_numb_dos": "fitting_attr/numb_dos:0", - # model attrs - "t_tmap": "model_attr/tmap:0", - # inputs - "t_coord": "t_coord:0", - "t_type": "t_type:0", - "t_natoms": "t_natoms:0", - "t_box": "t_box:0", - "t_mesh": "t_mesh:0", - # add output tensors - "t_dos": "o_dos:0", - "t_atom_dos": "o_atom_dos:0", - "t_descriptor": "o_descriptor:0", - } - DeepEval.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - auto_batch_size=auto_batch_size, - input_map=input_map, - ) - - # load optional tensors - operations = [op.name for op in self.graph.get_operations()] - # check if the graph has these operations: - # if yes add them - if "load/t_fparam" in operations: - self.tensors.update({"t_fparam": "t_fparam:0"}) - self.has_fparam = True - else: - log.debug("Could not get tensor 't_fparam:0'") - self.t_fparam = None - self.has_fparam = False - - if "load/t_aparam" in operations: - self.tensors.update({"t_aparam": "t_aparam:0"}) - self.has_aparam = True - else: - log.debug("Could not get tensor 't_aparam:0'") - self.t_aparam = None - self.has_aparam = False - - # now load tensors to object attributes - for attr_name, tensor_name in self.tensors.items(): - try: - self._get_tensor(tensor_name, attr_name) - except KeyError: - if attr_name != "t_descriptor": - raise - - self._run_default_sess() - self.tmap = self.tmap.decode("UTF-8").split() - - # setup modifier - try: - t_modifier_type = self._get_tensor("modifier_attr/type:0") - self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8") - except (ValueError, KeyError): - self.modifier_type = None - - def _run_default_sess(self): - [ - self.ntypes, - self.rcut, - self.numb_dos, - self.dfparam, - self.daparam, - self.tmap, - ] = run_sess( - self.sess, - [ - self.t_ntypes, - self.t_rcut, - self.t_numb_dos, - self.t_dfparam, - self.t_daparam, - self.t_tmap, - ], + @property + def output_def(self) -> ModelOutputDef: + """Get the output definition of this model.""" + return ModelOutputDef( + FittingOutputDef( + [ + OutputVariableDef( + "dos", + shape=[-1], + reduciable=True, + atomic=True, + ), + ] + ) ) - def get_ntypes(self) -> int: - """Get the number of atom types of this model.""" - return self.ntypes - - def get_rcut(self) -> float: - """Get the cut-off radius of this model.""" - return self.rcut - - def get_numb_dos(self) -> int: - """Get the length of DOS output of this DP model.""" - return self.numb_dos - - def get_type_map(self) -> List[str]: - """Get the type map (element name of the atom types) of this model.""" - return self.tmap - - def get_sel_type(self) -> List[int]: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_dim_fparam(self) -> int: - """Get the number (dimension) of frame parameters of this DP.""" - return self.dfparam - - def get_dim_aparam(self) -> int: - """Get the number (dimension) of atomic parameters of this DP.""" - return self.daparam - - def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: - """Wrapper method with auto batch size. - - Parameters - ---------- - inner_func : Callable - the method to be wrapped - numb_test : int - number of tests - natoms : int - number of atoms - - Returns - ------- - Callable - the wrapper - """ - if self.auto_batch_size is not None: - - def eval_func(*args, **kwargs): - return self.auto_batch_size.execute_all( - inner_func, numb_test, natoms, *args, **kwargs - ) - - else: - eval_func = inner_func - return eval_func - - def _get_natoms_and_nframes( - self, - coords: np.ndarray, - atom_types: Union[List[int], np.ndarray], - mixed_type: bool = False, - ) -> Tuple[int, int]: - if mixed_type: - natoms = len(atom_types[0]) - else: - natoms = len(atom_types) - coords = np.reshape(np.array(coords), [-1, natoms * 3]) - nframes = coords.shape[0] - return natoms, nframes + @property + def numb_dos(self) -> int: + """Get the number of DOS.""" + return self.get_numb_dos() def eval( self, coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], + cells: Optional[np.ndarray], + atom_types: Union[List[int], np.ndarray], atomic: bool = False, fparam: Optional[np.ndarray] = None, aparam: Optional[np.ndarray] = None, mixed_type: bool = False, + **kwargs: Dict[str, Any], ) -> Tuple[np.ndarray, ...]: - """Evaluate the dos, atom_dos by using this model. + """Evaluate energy, force, and virial. If atomic is True, + also return atomic energy and atomic virial. Parameters ---------- - coords - The coordinates of atoms. - The array should be of size nframes x natoms x 3 - cells - The cell of the region. - If None then non-PBC is assumed, otherwise using PBC. - The array should be of size nframes x 9 - atom_types - The atom types - The list should contain natoms ints - atomic - Calculate the atomic energy and virial - fparam - The frame parameter. - The array can be of size : - - nframes x dim_fparam. - - dim_fparam. Then all frames are assumed to be provided with the same fparam. - aparam - The atomic parameter - The array can be of size : - - nframes x natoms x dim_aparam. - - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. - - dim_aparam. Then all frames and atoms are provided with the same aparam. - mixed_type - Whether to perform the mixed_type mode. - If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), - in which frames in a system may have different natoms_vec(s), with the same nloc. + coords : np.ndarray + The coordinates of the atoms, in shape (nframes, natoms, 3). + cells : np.ndarray + The cell vectors of the system, in shape (nframes, 9). If the system + is not periodic, set it to None. + atom_types : List[int] or np.ndarray + The types of the atoms. If mixed_type is False, the shape is (natoms,); + otherwise, the shape is (nframes, natoms). + atomic : bool, optional + Whether to return atomic energy and atomic virial, by default False. + fparam : np.ndarray, optional + The frame parameters, by default None. + aparam : np.ndarray, optional + The atomic parameters, by default None. + mixed_type : bool, optional + Whether the atom_types is mixed type, by default False. + **kwargs : Dict[str, Any] + Keyword arguments. Returns ------- - dos - The electron density of state. - atom_dos - The atom-sited density of state. Only returned when atomic == True + energy + The energy of the system, in shape (nframes,). + force + The force of the system, in shape (nframes, natoms, 3). + virial + The virial of the system, in shape (nframes, 9). + atomic_energy + The atomic energy of the system, in shape (nframes, natoms). Only returned + when atomic is True. + atomic_virial + The atomic virial of the system, in shape (nframes, natoms, 9). Only returned + when atomic is True. """ - # reshape coords before getting shape - natoms, numb_test = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - output = self._eval_func(self._eval_inner, numb_test, natoms)( + ( coords, cells, atom_types, + fparam, + aparam, + nframes, + natoms, + ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type) + results = self.deep_eval.eval( + coords, + cells, + atom_types, + atomic, fparam=fparam, aparam=aparam, - atomic=atomic, - mixed_type=mixed_type, - ) - - return output - - def _prepare_feed_dict( - self, - coords, - cells, - atom_types, - fparam=None, - aparam=None, - atomic=False, - mixed_type=False, - ): - # standarize the shape of inputs - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - if mixed_type: - atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) - else: - atom_types = np.array(atom_types, dtype=int).reshape([-1]) - coords = np.reshape(np.array(coords), [-1, natoms * 3]) - if cells is None: - pbc = False - # make cells to work around the requirement of pbc - cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) - else: - pbc = True - cells = np.array(cells).reshape([nframes, 9]) - - if self.has_fparam: - assert fparam is not None - fparam = np.array(fparam) - if self.has_aparam: - assert aparam is not None - aparam = np.array(aparam) - - # reshape the inputs - if self.has_fparam: - fdim = self.get_dim_fparam() - if fparam.size == nframes * fdim: - fparam = np.reshape(fparam, [nframes, fdim]) - elif fparam.size == fdim: - fparam = np.tile(fparam.reshape([-1]), [nframes, 1]) - else: - raise RuntimeError( - "got wrong size of frame param, should be either %d x %d or %d" - % (nframes, fdim, fdim) - ) - if self.has_aparam: - fdim = self.get_dim_aparam() - if aparam.size == nframes * natoms * fdim: - aparam = np.reshape(aparam, [nframes, natoms * fdim]) - elif aparam.size == natoms * fdim: - aparam = np.tile(aparam.reshape([-1]), [nframes, 1]) - elif aparam.size == fdim: - aparam = np.tile(aparam.reshape([-1]), [nframes, natoms]) - else: - raise RuntimeError( - "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d" - % (nframes, natoms, fdim, natoms, fdim, fdim) - ) - - # sort inputs - coords, atom_types, imap = self.sort_input( - coords, atom_types, mixed_type=mixed_type + **kwargs, ) + # energy = results["dos_redu"].reshape(nframes, self.get_numb_dos()) + atomic_energy = results["dos"].reshape(nframes, natoms, self.get_numb_dos()) + # not same as dos_redu... why? + energy = np.sum(atomic_energy, axis=1) - # make natoms_vec and default_mesh - natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) - assert natoms_vec[0] == natoms - - # evaluate - feed_dict_test = {} - feed_dict_test[self.t_natoms] = natoms_vec - if mixed_type: - feed_dict_test[self.t_type] = atom_types.reshape([-1]) - else: - feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - [-1] - ) - feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - - if len(self.t_box.shape) == 1: - feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - elif len(self.t_box.shape) == 2: - feed_dict_test[self.t_box] = cells - else: - raise RuntimeError - feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type) - if self.has_fparam: - feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1]) - if self.has_aparam: - feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1]) - return feed_dict_test, imap - - def _eval_inner( - self, - coords, - cells, - atom_types, - fparam=None, - aparam=None, - atomic=False, - mixed_type=False, - ): - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - feed_dict_test, imap = self._prepare_feed_dict( - coords, cells, atom_types, fparam, aparam, mixed_type=mixed_type - ) - t_out = [self.t_dos] - if atomic: - t_out += [self.t_atom_dos] - - v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) - dos = v_out[0] - if atomic: - atom_dos = v_out[1] - - # reverse map of the outputs if atomic: - atom_dos = self.reverse_map( - np.reshape(atom_dos, [nframes, -1, self.numb_dos]), imap + return ( + energy, + atomic_energy, ) - dos = np.sum(atom_dos, axis=1) - - dos = np.reshape(dos, [nframes, self.numb_dos]) - if atomic: - atom_dos = np.reshape(atom_dos, [nframes, natoms, self.numb_dos]) - return dos, atom_dos else: - return dos + return (energy,) - def eval_descriptor( - self, - coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], - fparam: Optional[np.ndarray] = None, - aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, - mixed_type: bool = False, - ) -> np.array: - """Evaluate descriptors by using this DP. - - Parameters - ---------- - coords - The coordinates of atoms. - The array should be of size nframes x natoms x 3 - cells - The cell of the region. - If None then non-PBC is assumed, otherwise using PBC. - The array should be of size nframes x 9 - atom_types - The atom types - The list should contain natoms ints - fparam - The frame parameter. - The array can be of size : - - nframes x dim_fparam. - - dim_fparam. Then all frames are assumed to be provided with the same fparam. - aparam - The atomic parameter - The array can be of size : - - nframes x natoms x dim_aparam. - - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. - - dim_aparam. Then all frames and atoms are provided with the same aparam. - efield - The external field on atoms. - The array should be of size nframes x natoms x 3 - mixed_type - Whether to perform the mixed_type mode. - If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), - in which frames in a system may have different natoms_vec(s), with the same nloc. + def get_numb_dos(self) -> int: + return self.deep_eval.get_numb_dos() - Returns - ------- - descriptor - Descriptors. - """ - natoms, numb_test = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)( - coords, - cells, - atom_types, - fparam=fparam, - aparam=aparam, - efield=efield, - mixed_type=mixed_type, - ) - return descriptor - def _eval_descriptor_inner( - self, - coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], - fparam: Optional[np.ndarray] = None, - aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, - mixed_type: bool = False, - ) -> np.array: - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - feed_dict_test, imap = self._prepare_feed_dict( - coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type - ) - (descriptor,) = run_sess( - self.sess, [self.t_descriptor], feed_dict=feed_dict_test - ) - return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap) +__all__ = ["DeepDOS"] diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index 0ca9f21a77..aae2082e13 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -1,235 +1,207 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from functools import ( - lru_cache, +from abc import ( + ABC, + abstractmethod, ) from typing import ( TYPE_CHECKING, + Any, + ClassVar, + Dict, List, Optional, + Tuple, Union, ) import numpy as np -from deepmd.env import ( - MODEL_VERSION, - default_tf_session_config, - tf, +from deepmd.backend.backend import ( + Backend, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, ) from deepmd.utils.batch_size import ( AutoBatchSize, ) -from deepmd.utils.sess import ( - run_sess, -) if TYPE_CHECKING: - from pathlib import ( - Path, - ) + import ase.neighborlist + +class DeepEvalBackend(ABC): + """Low-level Deep Evaluator interface. -class DeepEval: - """Common methods for DeepPot, DeepWFC, DeepPolar, ... + Backends should inherbit implement this interface. High-level interface + will be built on top of this. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - auto_batch_size : bool or int or AutomaticBatchSize, default: False + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True If True, automatic batch size will be used. If int, it will be used as the initial batch size. - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional The ASE neighbor list class to produce the neighbor list. If None, the neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - load_prefix: str # set by subclass - + _OUTDEF_DP2BACKEND: ClassVar[dict] = { + "energy": "atom_energy", + "energy_redu": "energy", + "energy_derv_r": "force", + "energy_derv_r_mag": "force_mag", + "energy_derv_c": "atom_virial", + "energy_derv_c_mag": "atom_virial_mag", + "energy_derv_c_redu": "virial", + "polar": "polar", + "polar_redu": "global_polar", + "polar_derv_r": "force", + "polar_derv_c": "atom_virial", + "polar_derv_c_redu": "virial", + "dipole": "dipole", + "dipole_redu": "global_dipole", + "dipole_derv_r": "force", + "dipole_derv_c": "atom_virial", + "dipole_derv_c_redu": "virial", + "dos": "atom_dos", + "dos_redu": "dos", + "mask_mag": "mask_mag", + "mask": "mask", + } + + @abstractmethod def __init__( self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - auto_batch_size: Union[bool, int, AutoBatchSize] = False, - input_map: Optional[dict] = None, - neighbor_list=None, - ): - self.graph = self._load_graph( - model_file, - prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - ) - self.load_prefix = load_prefix - - # graph_compatable should be called after graph and prefix are set - if not self._graph_compatable(): - raise RuntimeError( - f"model in graph (version {self.model_version}) is incompatible" - f"with the model (version {MODEL_VERSION}) supported by the current code." - "See https://deepmd.rtfd.io/compatability/ for details." - ) - - # set default to False, as subclasses may not support - if isinstance(auto_batch_size, bool): - if auto_batch_size: - self.auto_batch_size = AutoBatchSize() - else: - self.auto_batch_size = None - elif isinstance(auto_batch_size, int): - self.auto_batch_size = AutoBatchSize(auto_batch_size) - elif isinstance(auto_batch_size, AutoBatchSize): - self.auto_batch_size = auto_batch_size - else: - raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") - - self.neighbor_list = neighbor_list - - @property - @lru_cache(maxsize=None) - def model_type(self) -> str: - """Get type of model. - - :type:str - """ - t_mt = self._get_tensor("model_attr/model_type:0") - [mt] = run_sess(self.sess, [t_mt], feed_dict={}) - return mt.decode("utf-8") + model_file: str, + output_def: ModelOutputDef, + *args: List[Any], + auto_batch_size: Union[bool, int, AutoBatchSize] = True, + neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + **kwargs: Dict[str, Any], + ) -> None: + pass + + def __new__(cls, model_file: str, *args, **kwargs): + if cls is DeepEvalBackend: + backend = Backend.detect_backend_by_model(model_file) + return super().__new__(backend().deep_eval) + return super().__new__(cls) + + @abstractmethod + def eval( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + atomic: bool = False, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + **kwargs: Dict[str, Any], + ) -> Dict[str, np.ndarray]: + """Evaluate the energy, force and virial by using this DP. - @property - @lru_cache(maxsize=None) - def model_version(self) -> str: - """Get version of model. + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + Calculate the atomic energy and virial + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + **kwargs + Other parameters Returns ------- - str - version of model + output_dict : dict + The output of the evaluation. The keys are the names of the output + variables, and the values are the corresponding output arrays. """ - try: - t_mt = self._get_tensor("model_attr/model_version:0") - except KeyError: - # For deepmd-kit version 0.x - 1.x, set model version to 0.0 - return "0.0" - else: - [mt] = run_sess(self.sess, [t_mt], feed_dict={}) - return mt.decode("utf-8") - @property - @lru_cache(maxsize=None) - def sess(self) -> tf.Session: - """Get TF session.""" - # start a tf session associated to the graph - return tf.Session(graph=self.graph, config=default_tf_session_config) + @abstractmethod + def get_rcut(self) -> float: + """Get the cutoff radius of this model.""" - def _graph_compatable(self) -> bool: - """Check the model compatability. + @abstractmethod + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" - Returns - ------- - bool - If the model stored in the graph file is compatable with the current code - """ - model_version_major = int(self.model_version.split(".")[0]) - model_version_minor = int(self.model_version.split(".")[1]) - MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0]) - MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1]) - if (model_version_major != MODEL_VERSION_MAJOR) or ( - model_version_minor > MODEL_VERSION_MINOR - ): - return False - else: - return True + @abstractmethod + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" - def _get_tensor( - self, tensor_name: str, attr_name: Optional[str] = None - ) -> tf.Tensor: - """Get TF graph tensor and assign it to class namespace. + @abstractmethod + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" - Parameters - ---------- - tensor_name : str - name of tensor to get - attr_name : Optional[str], optional - if specified, class attribute with this name will be created and tensor will - be assigned to it, by default None + @abstractmethod + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" - Returns - ------- - tf.Tensor - loaded tensor - """ - # do not use os.path.join as it doesn't work on Windows - tensor_path = "/".join((self.load_prefix, tensor_name)) - tensor = self.graph.get_tensor_by_name(tensor_path) - if attr_name: - setattr(self, attr_name, tensor) - return tensor - else: - return tensor - - @staticmethod - def _load_graph( - frozen_graph_filename: "Path", - prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - ): - # We load the protobuf file from the disk and parse it to retrieve the - # unserialized graph_def - with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - - if default_tf_graph: - tf.import_graph_def( - graph_def, - input_map=input_map, - return_elements=None, - name=prefix, - producer_op_list=None, - ) - graph = tf.get_default_graph() - else: - # Then, we can use again a convenient built-in function to import - # a graph_def into the current default Graph - with tf.Graph().as_default() as graph: - tf.import_graph_def( - graph_def, - input_map=None, - return_elements=None, - name=prefix, - producer_op_list=None, - ) - - return graph - - @staticmethod - def sort_input( - coord: np.ndarray, - atom_type: np.ndarray, - sel_atoms: Optional[List[int]] = None, + def eval_descriptor( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + efield: Optional[np.ndarray] = None, mixed_type: bool = False, - ): - """Sort atoms in the system according their types. + **kwargs: Dict[str, Any], + ) -> np.ndarray: + """Evaluate descriptors by using this DP. Parameters ---------- - coord + coords The coordinates of atoms. - Should be of shape [nframes, natoms, 3] - atom_type - The type of atoms - Should be of shape [natoms] - sel_atoms - The selected atoms by type + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + efield + The external field on atoms. + The array should be of size nframes x natoms x 3 mixed_type Whether to perform the mixed_type mode. If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), @@ -237,77 +209,214 @@ def sort_input( Returns ------- - coord_out - The coordinates after sorting - atom_type_out - The atom types after sorting - idx_map - The index mapping from the input to the output. - For example coord_out = coord[:,idx_map,:] - sel_atom_type - Only output if sel_atoms is not None - The sorted selected atom types - sel_idx_map - Only output if sel_atoms is not None - The index mapping from the selected atoms to sorted selected atoms. + descriptor + Descriptors. """ - if mixed_type: - # mixed_type need not to resort - natoms = atom_type[0].size - idx_map = np.arange(natoms) - return coord, atom_type, idx_map - if sel_atoms is not None: - selection = [False] * np.size(atom_type) - for ii in sel_atoms: - selection += atom_type == ii - sel_atom_type = atom_type[selection] - natoms = atom_type.size - idx = np.arange(natoms) - idx_map = np.lexsort((idx, atom_type)) - nframes = coord.shape[0] - coord = coord.reshape([nframes, -1, 3]) - coord = np.reshape(coord[:, idx_map, :], [nframes, -1]) - atom_type = atom_type[idx_map] - if sel_atoms is not None: - sel_natoms = np.size(sel_atom_type) - sel_idx = np.arange(sel_natoms) - sel_idx_map = np.lexsort((sel_idx, sel_atom_type)) - sel_atom_type = sel_atom_type[sel_idx_map] - return coord, atom_type, idx_map, sel_atom_type, sel_idx_map - else: - return coord, atom_type, idx_map + raise NotImplementedError + + def eval_typeebd(self) -> np.ndarray: + """Evaluate output of type embedding network by using this model. + + Returns + ------- + np.ndarray + The output of type embedding network. The shape is [ntypes, o_size], + where ntypes is the number of types, and o_size is the number of nodes + in the output layer. + + Raises + ------ + KeyError + If the model does not enable type embedding. + """ + raise NotImplementedError + + def _check_mixed_types(self, atom_types: np.ndarray) -> bool: + """Check if atom types of all frames are the same. - @staticmethod - def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray: - """Reverse mapping of a vector according to the index map. + Traditional descriptors like se_e2_a requires all the frames to + have the same atom types. Parameters ---------- - vec - Input vector. Be of shape [nframes, natoms, -1] - imap - Index map. Be of shape [natoms] + atom_types : np.ndarray + The atom types of all frames, in shape nframes * natoms. + """ + if np.count_nonzero(atom_types[0] == -1) > 0: + # assume mixed_types if there are virtual types, even when + # the atom types of all frames are the same + return False + return np.all(np.equal(atom_types, atom_types[0])) - Returns - ------- - vec_out - Reverse mapped vector. + @property + @abstractmethod + def model_type(self) -> "DeepEval": + """The the evaluator of the model type.""" + + @abstractmethod + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. """ - ret = np.zeros(vec.shape) - # for idx,ii in enumerate(imap) : - # ret[:,ii,:] = vec[:,idx,:] - ret[:, imap, :] = vec - return ret - - def make_natoms_vec( - self, atom_types: np.ndarray, mixed_type: bool = False + + def get_numb_dos(self) -> int: + """Get the number of DOS.""" + raise NotImplementedError + + def get_has_efield(self): + """Check if the model has efield.""" + return False + + def get_has_spin(self): + """Check if the model has spin atom types.""" + return False + + @abstractmethod + def get_ntypes_spin(self) -> int: + """Get the number of spin atom types of this model. Only used in old implement.""" + + +class DeepEval(ABC): + """High-level Deep Evaluator interface. + + The specific DeepEval, such as DeepPot and DeepTensor, should inherit + from this class. This class provides a high-level interface on the top + of the low-level interface. + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. + """ + + def __new__(cls, model_file: str, *args, **kwargs): + if cls is DeepEval: + deep_eval = DeepEvalBackend( + model_file, + ModelOutputDef(FittingOutputDef([])), + *args, + **kwargs, + ) + return super().__new__(deep_eval.model_type) + return super().__new__(cls) + + def __init__( + self, + model_file: str, + *args: List[Any], + auto_batch_size: Union[bool, int, AutoBatchSize] = True, + neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + **kwargs: Dict[str, Any], + ) -> None: + self.deep_eval = DeepEvalBackend( + model_file, + self.output_def, + *args, + auto_batch_size=auto_batch_size, + neighbor_list=neighbor_list, + **kwargs, + ) + if self.deep_eval.get_has_spin() and hasattr(self, "output_def_mag"): + self.deep_eval.output_def = self.output_def_mag + + @property + @abstractmethod + def output_def(self) -> ModelOutputDef: + """Returns the output variable definitions.""" + + def get_rcut(self) -> float: + """Get the cutoff radius of this model.""" + return self.deep_eval.get_rcut() + + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" + return self.deep_eval.get_ntypes() + + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" + return self.deep_eval.get_type_map() + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" + return self.deep_eval.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" + return self.deep_eval.get_dim_aparam() + + def _get_natoms_and_nframes( + self, + coords: np.ndarray, + atom_types: np.ndarray, + mixed_type: bool = False, + ) -> Tuple[int, int]: + if mixed_type or atom_types.ndim > 1: + natoms = len(atom_types[0]) + else: + natoms = len(atom_types) + if natoms == 0: + assert coords.size == 0 + else: + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + return natoms, nframes + + def _expande_atype(self, atype: np.ndarray, nframes: int, mixed_type: bool): + if not mixed_type: + atype = np.tile(atype.reshape(1, -1), (nframes, 1)) + return atype + + def eval_descriptor( + self, + coords: np.ndarray, + cells: Optional[np.ndarray], + atom_types: np.ndarray, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + mixed_type: bool = False, + **kwargs: Dict[str, Any], ) -> np.ndarray: - """Make the natom vector used by deepmd-kit. + """Evaluate descriptors by using this DP. Parameters ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 atom_types - The type of atoms + The atom types + The list should contain natoms ints + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + efield + The external field on atoms. + The array should be of size nframes x natoms x 3 mixed_type Whether to perform the mixed_type mode. If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), @@ -315,26 +424,27 @@ def make_natoms_vec( Returns ------- - natoms - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - + descriptor + Descriptors. """ - natoms_vec = np.zeros(self.ntypes + 2).astype(int) - if mixed_type: - natoms = atom_types[0].size - else: - natoms = atom_types.size - natoms_vec[0] = natoms - natoms_vec[1] = natoms - if mixed_type: - natoms_vec[2] = natoms - return natoms_vec - for ii in range(self.ntypes): - natoms_vec[ii + 2] = np.count_nonzero(atom_types == ii) - return natoms_vec + ( + coords, + cells, + atom_types, + fparam, + aparam, + nframes, + natoms, + ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type) + descriptor = self.deep_eval.eval_descriptor( + coords, + cells, + atom_types, + fparam=fparam, + aparam=aparam, + **kwargs, + ) + return descriptor def eval_typeebd(self) -> np.ndarray: """Evaluate output of type embedding network by using this model. @@ -353,105 +463,80 @@ def eval_typeebd(self) -> np.ndarray: See Also -------- - deepmd.utils.type_embed.TypeEmbedNet : The type embedding network. + deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network. Examples -------- Get the output of type embedding network of `graph.pb`: >>> from deepmd.infer import DeepPotential - >>> dp = DeepPotential('graph.pb') + >>> dp = DeepPotential("graph.pb") >>> dp.eval_typeebd() """ - t_typeebd = self._get_tensor("t_typeebd:0") - [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={}) - return typeebd - - def build_neighbor_list( - self, - coords: np.ndarray, - cell: Optional[np.ndarray], - atype: np.ndarray, - imap: np.ndarray, - neighbor_list, - ): - """Make the mesh with neighbor list for a single frame. + return self.deep_eval.eval_typeebd() + + def _standard_input(self, coords, cells, atom_types, fparam, aparam, mixed_type): + coords = np.array(coords) + if cells is not None: + cells = np.array(cells) + atom_types = np.array(atom_types, dtype=np.int32) + if fparam is not None: + fparam = np.array(fparam) + if aparam is not None: + aparam = np.array(aparam) + natoms, nframes = self._get_natoms_and_nframes(coords, atom_types, mixed_type) + atom_types = self._expande_atype(atom_types, nframes, mixed_type) + coords = coords.reshape(nframes, natoms, 3) + if cells is not None: + cells = cells.reshape(nframes, 3, 3) + if fparam is not None: + fdim = self.get_dim_fparam() + if fparam.size == nframes * fdim: + fparam = np.reshape(fparam, [nframes, fdim]) + elif fparam.size == fdim: + fparam = np.tile(fparam.reshape([-1]), [nframes, 1]) + else: + raise RuntimeError( + "got wrong size of frame param, should be either %d x %d or %d" + % (nframes, fdim, fdim) + ) + if aparam is not None: + fdim = self.get_dim_aparam() + if aparam.size == nframes * natoms * fdim: + aparam = np.reshape(aparam, [nframes, natoms * fdim]) + elif aparam.size == natoms * fdim: + aparam = np.tile(aparam.reshape([-1]), [nframes, 1]) + elif aparam.size == fdim: + aparam = np.tile(aparam.reshape([-1]), [nframes, natoms]) + else: + raise RuntimeError( + "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d" + % (nframes, natoms, fdim, natoms, fdim, fdim) + ) + return coords, cells, atom_types, fparam, aparam, nframes, natoms - Parameters - ---------- - coords : np.ndarray - The coordinates of atoms. Should be of shape [natoms, 3] - cell : Optional[np.ndarray] - The cell of the system. Should be of shape [3, 3] - atype : np.ndarray - The type of atoms. Should be of shape [natoms] - imap : np.ndarray - The index map of atoms. Should be of shape [natoms] - neighbor_list : ase.neighborlist.NewPrimitiveNeighborList - ASE neighbor list. The following method or attribute will be - used/set: bothways, self_interaction, update, build, first_neigh, - pair_second, offset_vec. + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. - Returns - ------- - natoms_vec : np.ndarray - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: nloc - natoms[1]: nall - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc - coords : np.ndarray - The coordinates of atoms, including ghost atoms. Should be of - shape [nframes, nall, 3] - atype : np.ndarray - The type of atoms, including ghost atoms. Should be of shape [nall] - mesh : np.ndarray - The mesh in nei_mode=4. - imap : np.ndarray - The index map of atoms. Should be of shape [nall] - ghost_map : np.ndarray - The index map of ghost atoms. Should be of shape [nghost] + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. """ - pbc = np.repeat(cell is not None, 3) - cell = cell.reshape(3, 3) - positions = coords.reshape(-1, 3) - neighbor_list.bothways = True - neighbor_list.self_interaction = False - if neighbor_list.update(pbc, cell, positions): - neighbor_list.build(pbc, cell, positions) - first_neigh = neighbor_list.first_neigh.copy() - pair_second = neighbor_list.pair_second.copy() - offset_vec = neighbor_list.offset_vec.copy() - # get out-of-box neighbors - out_mask = np.any(offset_vec != 0, axis=1) - out_idx = pair_second[out_mask] - out_offset = offset_vec[out_mask] - out_coords = positions[out_idx] + out_offset.dot(cell) - atype = np.array(atype, dtype=int) - out_atype = atype[out_idx] - - nloc = positions.shape[0] - nghost = out_idx.size - all_coords = np.concatenate((positions, out_coords), axis=0) - all_atype = np.concatenate((atype, out_atype), axis=0) - # convert neighbor indexes - ghost_map = pair_second[out_mask] - pair_second[out_mask] = np.arange(nloc, nloc + nghost) - # get the mesh - mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int) - mesh[0] = nloc - # ilist - mesh[16 : 16 + nloc] = np.arange(nloc) - # numnei - mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1] - # jlist - mesh[16 + nloc * 2 :] = pair_second - - # natoms_vec - natoms_vec = np.zeros(self.ntypes + 2).astype(int) - natoms_vec[0] = nloc - natoms_vec[1] = nloc + nghost - for ii in range(self.ntypes): - natoms_vec[ii + 2] = np.count_nonzero(atype == ii) - # imap append ghost atoms - imap = np.concatenate((imap, np.arange(nloc, nloc + nghost))) - return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map + return self.deep_eval.get_sel_type() + + def _get_sel_natoms(self, atype) -> int: + return np.sum(np.isin(atype, self.get_sel_type()).astype(int)) + + @property + def has_efield(self) -> bool: + """Check if the model has efield.""" + return self.deep_eval.get_has_efield() + + @property + def has_spin(self) -> bool: + """Check if the model has spin.""" + return self.deep_eval.get_has_spin() + + def get_ntypes_spin(self) -> int: + """Get the number of spin atom types of this model. Only used in old implement.""" + return self.deep_eval.get_ntypes_spin() diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py index c1f981ef86..f857619871 100644 --- a/deepmd/infer/deep_polar.py +++ b/deepmd/infer/deep_polar.py @@ -1,8 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( - TYPE_CHECKING, List, Optional, + Union, ) import numpy as np @@ -11,118 +11,46 @@ DeepTensor, ) -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - class DeepPolar(DeepTensor): - """Constructor. + """Deep polar model. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - neighbor_list : ase.neighborlist.NeighborList, optional - The neighbor list object. If None, then build the native neighbor list. - - Warnings - -------- - For developers: `DeepTensor` initializer must be called at the end after - `self.tensors` are modified because it uses the data in `self.tensors` dict. - Do not chanage the order! + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - neighbor_list=None, - ) -> None: - # use this in favor of dict update to move attribute from class to - # instance namespace - self.tensors = dict( - { - # output tensor - "t_tensor": "o_polar:0", - }, - **self.tensors, - ) - - DeepTensor.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - - def get_dim_fparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_dim_aparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") + @property + def output_tensor_name(self) -> str: + return "polar" class DeepGlobalPolar(DeepTensor): - """Constructor. - - Parameters - ---------- - model_file : str - The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - neighbor_list : ase.neighborlist.NeighborList, optional - The neighbor list object. If None, then build the native neighbor list. - """ - - def __init__( - self, - model_file: str, - load_prefix: str = "load", - default_tf_graph: bool = False, - neighbor_list=None, - ) -> None: - self.tensors.update( - { - "t_sel_type": "model_attr/sel_type:0", - # output tensor - "t_tensor": "o_global_polar:0", - } - ) - - DeepTensor.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - neighbor_list=None, - ) + @property + def output_tensor_name(self) -> str: + return "global_polar" def eval( self, coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], + cells: Optional[np.ndarray], + atom_types: Union[List[int], np.ndarray], atomic: bool = False, fparam: Optional[np.ndarray] = None, aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, + mixed_type: bool = False, + **kwargs: dict, ) -> np.ndarray: """Evaluate the model. @@ -135,31 +63,35 @@ def eval( The cell of the region. If None then non-PBC is assumed, otherwise using PBC. The array should be of size nframes x 9 - atom_types + atom_types : list[int] or np.ndarray The atom types The list should contain natoms ints atomic - Not used in this model + If True (default), return the atomic tensor + Otherwise return the global tensor fparam Not used in this model aparam Not used in this model - efield - Not used in this model + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. Returns ------- tensor The returned tensor - If atomic == False then of size nframes x variable_dof - else of size nframes x natoms x variable_dof + If atomic == False then of size nframes x output_dim + else of size nframes x natoms x output_dim """ - return DeepTensor.eval(self, coords, cells, atom_types, atomic=False) - - def get_dim_fparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_dim_aparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") + return super().eval( + coords, + cells, + atom_types, + atomic=atomic, + fparam=fparam, + aparam=aparam, + mixed_type=mixed_type, + **kwargs, + ) diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py index 81cfdde7a8..bc0bfc9599 100644 --- a/deepmd/infer/deep_pot.py +++ b/deepmd/infer/deep_pot.py @@ -1,8 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -import logging from typing import ( - TYPE_CHECKING, - Callable, + Any, + Dict, List, Optional, Tuple, @@ -11,681 +10,195 @@ import numpy as np -from deepmd.common import ( - make_default_mesh, +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, ) -from deepmd.infer.data_modifier import ( - DipoleChargeModifier, -) -from deepmd.infer.deep_eval import ( + +from .deep_eval import ( DeepEval, ) -from deepmd.utils.batch_size import ( - AutoBatchSize, -) -from deepmd.utils.sess import ( - run_sess, -) - -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - -log = logging.getLogger(__name__) class DeepPot(DeepEval): - """Constructor. + """Potential energy model. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - auto_batch_size : bool or int or AutomaticBatchSize, default: True + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True If True, automatic batch size will be used. If int, it will be used as the initial batch size. - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional The ASE neighbor list class to produce the neighbor list. If None, the neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. Examples -------- >>> from deepmd.infer import DeepPot >>> import numpy as np - >>> dp = DeepPot('graph.pb') - >>> coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1]) + >>> dp = DeepPot("graph.pb") + >>> coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1]) >>> cell = np.diag(10 * np.ones(3)).reshape([1, -1]) - >>> atype = [1,0,1] + >>> atype = [1, 0, 1] >>> e, f, v = dp.eval(coord, cell, atype) where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively. - - Warnings - -------- - For developers: `DeepTensor` initializer must be called at the end after - `self.tensors` are modified because it uses the data in `self.tensors` dict. - Do not chanage the order! """ - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - auto_batch_size: Union[bool, int, AutoBatchSize] = True, - input_map: Optional[dict] = None, - neighbor_list=None, - ) -> None: - # add these tensors on top of what is defined by DeepTensor Class - # use this in favor of dict update to move attribute from class to - # instance namespace - self.tensors = { - # descrpt attrs - "t_ntypes": "descrpt_attr/ntypes:0", - "t_rcut": "descrpt_attr/rcut:0", - # fitting attrs - "t_dfparam": "fitting_attr/dfparam:0", - "t_daparam": "fitting_attr/daparam:0", - # model attrs - "t_tmap": "model_attr/tmap:0", - # inputs - "t_coord": "t_coord:0", - "t_type": "t_type:0", - "t_natoms": "t_natoms:0", - "t_box": "t_box:0", - "t_mesh": "t_mesh:0", - # add output tensors - "t_energy": "o_energy:0", - "t_force": "o_force:0", - "t_virial": "o_virial:0", - "t_ae": "o_atom_energy:0", - "t_av": "o_atom_virial:0", - "t_descriptor": "o_descriptor:0", - } - DeepEval.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - auto_batch_size=auto_batch_size, - input_map=input_map, - neighbor_list=neighbor_list, - ) - - # load optional tensors - operations = [op.name for op in self.graph.get_operations()] - # check if the graph has these operations: - # if yes add them - - if ("%s/t_efield" % load_prefix) in operations: - self.tensors.update({"t_efield": "t_efield:0"}) - self.has_efield = True - else: - log.debug("Could not get tensor 't_efield:0'") - self.t_efield = None - self.has_efield = False - - if ("%s/t_fparam" % load_prefix) in operations: - self.tensors.update({"t_fparam": "t_fparam:0"}) - self.has_fparam = True - else: - log.debug("Could not get tensor 't_fparam:0'") - self.t_fparam = None - self.has_fparam = False - - if ("%s/t_aparam" % load_prefix) in operations: - self.tensors.update({"t_aparam": "t_aparam:0"}) - self.has_aparam = True - else: - log.debug("Could not get tensor 't_aparam:0'") - self.t_aparam = None - self.has_aparam = False - - if ("%s/spin_attr/ntypes_spin" % load_prefix) in operations: - self.tensors.update({"t_ntypes_spin": "spin_attr/ntypes_spin:0"}) - self.has_spin = True - else: - self.ntypes_spin = 0 - self.has_spin = False - - # now load tensors to object attributes - for attr_name, tensor_name in self.tensors.items(): - try: - self._get_tensor(tensor_name, attr_name) - except KeyError: - if attr_name != "t_descriptor": - raise - - self._run_default_sess() - self.tmap = self.tmap.decode("UTF-8").split() - - # setup modifier - try: - t_modifier_type = self._get_tensor("modifier_attr/type:0") - self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8") - except (ValueError, KeyError): - self.modifier_type = None - - try: - t_jdata = self._get_tensor("train_attr/training_script:0") - jdata = run_sess(self.sess, t_jdata).decode("UTF-8") - import json - - jdata = json.loads(jdata) - self.descriptor_type = jdata["model"]["descriptor"]["type"] - except (ValueError, KeyError): - self.descriptor_type = None - - if self.modifier_type == "dipole_charge": - t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0") - t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0") - t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0") - t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0") - t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0") - [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess( - self.sess, + @property + def output_def(self) -> ModelOutputDef: + """Get the output definition of this model.""" + return ModelOutputDef( + FittingOutputDef( [ - t_mdl_name, - t_mdl_charge_map, - t_sys_charge_map, - t_ewald_h, - t_ewald_beta, - ], - ) - mdl_name = mdl_name.decode("UTF-8") - mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()] - sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()] - self.dm = DipoleChargeModifier( - mdl_name, - mdl_charge_map, - sys_charge_map, - ewald_h=ewald_h, - ewald_beta=ewald_beta, + OutputVariableDef( + "energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + atomic=True, + ), + ] ) + ) - def _run_default_sess(self): - if self.has_spin is True: - [ - self.ntypes, - self.ntypes_spin, - self.rcut, - self.dfparam, - self.daparam, - self.tmap, - ] = run_sess( - self.sess, - [ - self.t_ntypes, - self.t_ntypes_spin, - self.t_rcut, - self.t_dfparam, - self.t_daparam, - self.t_tmap, - ], - ) - else: - [self.ntypes, self.rcut, self.dfparam, self.daparam, self.tmap] = run_sess( - self.sess, + @property + def output_def_mag(self) -> ModelOutputDef: + """Get the output definition of this model with magnetic parts.""" + return ModelOutputDef( + FittingOutputDef( [ - self.t_ntypes, - self.t_rcut, - self.t_dfparam, - self.t_daparam, - self.t_tmap, - ], + OutputVariableDef( + "energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + atomic=True, + magnetic=True, + ), + ] ) - - def get_ntypes(self) -> int: - """Get the number of atom types of this model.""" - return self.ntypes - - def get_ntypes_spin(self): - """Get the number of spin atom types of this model.""" - return self.ntypes_spin - - def get_rcut(self) -> float: - """Get the cut-off radius of this model.""" - return self.rcut - - def get_type_map(self) -> List[str]: - """Get the type map (element name of the atom types) of this model.""" - return self.tmap - - def get_sel_type(self) -> List[int]: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_descriptor_type(self) -> List[int]: - """Get the descriptor type of this model.""" - return self.descriptor_type - - def get_dim_fparam(self) -> int: - """Get the number (dimension) of frame parameters of this DP.""" - return self.dfparam - - def get_dim_aparam(self) -> int: - """Get the number (dimension) of atomic parameters of this DP.""" - return self.daparam - - def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: - """Wrapper method with auto batch size. - - Parameters - ---------- - inner_func : Callable - the method to be wrapped - numb_test : int - number of tests - natoms : int - number of atoms - - Returns - ------- - Callable - the wrapper - """ - if self.auto_batch_size is not None: - - def eval_func(*args, **kwargs): - return self.auto_batch_size.execute_all( - inner_func, numb_test, natoms, *args, **kwargs - ) - - else: - eval_func = inner_func - return eval_func - - def _get_natoms_and_nframes( - self, - coords: np.ndarray, - atom_types: Union[List[int], np.ndarray], - mixed_type: bool = False, - ) -> Tuple[int, int]: - if mixed_type: - natoms = len(atom_types[0]) - else: - natoms = len(atom_types) - if natoms == 0: - assert coords.size == 0 - else: - coords = np.reshape(np.array(coords), [-1, natoms * 3]) - nframes = coords.shape[0] - return natoms, nframes + ) def eval( self, coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], + cells: Optional[np.ndarray], + atom_types: Union[List[int], np.ndarray], atomic: bool = False, fparam: Optional[np.ndarray] = None, aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, mixed_type: bool = False, + **kwargs: Dict[str, Any], ) -> Tuple[np.ndarray, ...]: - """Evaluate the energy, force and virial by using this DP. + """Evaluate energy, force, and virial. If atomic is True, + also return atomic energy and atomic virial. Parameters ---------- - coords - The coordinates of atoms. - The array should be of size nframes x natoms x 3 - cells - The cell of the region. - If None then non-PBC is assumed, otherwise using PBC. - The array should be of size nframes x 9 - atom_types - The atom types - The list should contain natoms ints - atomic - Calculate the atomic energy and virial - fparam - The frame parameter. - The array can be of size : - - nframes x dim_fparam. - - dim_fparam. Then all frames are assumed to be provided with the same fparam. - aparam - The atomic parameter - The array can be of size : - - nframes x natoms x dim_aparam. - - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. - - dim_aparam. Then all frames and atoms are provided with the same aparam. - efield - The external field on atoms. - The array should be of size nframes x natoms x 3 - mixed_type - Whether to perform the mixed_type mode. - If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), - in which frames in a system may have different natoms_vec(s), with the same nloc. + coords : np.ndarray + The coordinates of the atoms, in shape (nframes, natoms, 3). + cells : np.ndarray + The cell vectors of the system, in shape (nframes, 9). If the system + is not periodic, set it to None. + atom_types : List[int] or np.ndarray + The types of the atoms. If mixed_type is False, the shape is (natoms,); + otherwise, the shape is (nframes, natoms). + atomic : bool, optional + Whether to return atomic energy and atomic virial, by default False. + fparam : np.ndarray, optional + The frame parameters, by default None. + aparam : np.ndarray, optional + The atomic parameters, by default None. + mixed_type : bool, optional + Whether the atom_types is mixed type, by default False. + **kwargs : Dict[str, Any] + Keyword arguments. Returns ------- energy - The system energy. + The energy of the system, in shape (nframes,). force - The force on each atom + The force of the system, in shape (nframes, natoms, 3). virial - The virial - atom_energy - The atomic energy. Only returned when atomic == True - atom_virial - The atomic virial. Only returned when atomic == True + The virial of the system, in shape (nframes, 9). + atomic_energy + The atomic energy of the system, in shape (nframes, natoms). Only returned + when atomic is True. + atomic_virial + The atomic virial of the system, in shape (nframes, natoms, 9). Only returned + when atomic is True. """ - # reshape coords before getting shape - natoms, numb_test = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - output = self._eval_func(self._eval_inner, numb_test, natoms)( + # This method has been used by: + # documentation python.md + # dp model_devi: +fparam, +aparam, +mixed_type + # dp test: +atomic, +fparam, +aparam, +efield, +mixed_type + # finetune: +mixed_type + # dpdata + # ase + ( + coords, + cells, + atom_types, + fparam, + aparam, + nframes, + natoms, + ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type) + results = self.deep_eval.eval( coords, cells, atom_types, + atomic, fparam=fparam, aparam=aparam, - atomic=atomic, - efield=efield, - mixed_type=mixed_type, + **kwargs, ) + energy = results["energy_redu"].reshape(nframes, 1) + force = results["energy_derv_r"].reshape(nframes, natoms, 3) + virial = results["energy_derv_c_redu"].reshape(nframes, 9) - if self.modifier_type is not None: - if atomic: - raise RuntimeError("modifier does not support atomic modification") - me, mf, mv = self.dm.eval(coords, cells, atom_types) - output = list(output) # tuple to list - e, f, v = output[:3] - output[0] += me.reshape(e.shape) - output[1] += mf.reshape(f.shape) - output[2] += mv.reshape(v.shape) - output = tuple(output) - return output - - def _prepare_feed_dict( - self, - coords, - cells, - atom_types, - fparam=None, - aparam=None, - efield=None, - mixed_type=False, - ): - # standarize the shape of inputs - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - if mixed_type: - atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) - else: - atom_types = np.array(atom_types, dtype=int).reshape([-1]) - coords = np.reshape(np.array(coords), [nframes, natoms * 3]) - if cells is None: - pbc = False - # make cells to work around the requirement of pbc - cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) - else: - pbc = True - cells = np.array(cells).reshape([nframes, 9]) - - if self.has_fparam: - assert fparam is not None - fparam = np.array(fparam) - if self.has_aparam: - assert aparam is not None - aparam = np.array(aparam) - if self.has_efield: - assert ( - efield is not None - ), "you are using a model with external field, parameter efield should be provided" - efield = np.array(efield) - - # reshape the inputs - if self.has_fparam: - fdim = self.get_dim_fparam() - if fparam.size == nframes * fdim: - fparam = np.reshape(fparam, [nframes, fdim]) - elif fparam.size == fdim: - fparam = np.tile(fparam.reshape([-1]), [nframes, 1]) - else: - raise RuntimeError( - "got wrong size of frame param, should be either %d x %d or %d" - % (nframes, fdim, fdim) + if atomic: + if self.get_ntypes_spin() > 0: + ntypes_real = self.get_ntypes() - self.get_ntypes_spin() + natoms_real = sum( + [ + np.count_nonzero(np.array(atom_types[0]) == ii) + for ii in range(ntypes_real) + ] ) - if self.has_aparam: - fdim = self.get_dim_aparam() - if aparam.size == nframes * natoms * fdim: - aparam = np.reshape(aparam, [nframes, natoms * fdim]) - elif aparam.size == natoms * fdim: - aparam = np.tile(aparam.reshape([-1]), [nframes, 1]) - elif aparam.size == fdim: - aparam = np.tile(aparam.reshape([-1]), [nframes, natoms]) else: - raise RuntimeError( - "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d" - % (nframes, natoms, fdim, natoms, fdim, fdim) - ) - - # sort inputs - coords, atom_types, imap = self.sort_input( - coords, atom_types, mixed_type=mixed_type - ) - if self.has_efield: - efield = np.reshape(efield, [nframes, natoms, 3]) - efield = efield[:, imap, :] - efield = np.reshape(efield, [nframes, natoms * 3]) - if self.has_aparam: - aparam = np.reshape(aparam, [nframes, natoms, fdim]) - aparam = aparam[:, imap, :] - aparam = np.reshape(aparam, [nframes, natoms * fdim]) - - # make natoms_vec and default_mesh - if self.neighbor_list is None: - natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) - assert natoms_vec[0] == natoms - mesh = make_default_mesh(pbc, mixed_type) - ghost_map = None - else: - if nframes > 1: - raise NotImplementedError( - "neighbor_list does not support multiple frames" - ) - ( - natoms_vec, - coords, - atom_types, - mesh, - imap, - ghost_map, - ) = self.build_neighbor_list( - coords, - cells if cells is not None else None, - atom_types, - imap, - self.neighbor_list, - ) - - # evaluate - feed_dict_test = {} - feed_dict_test[self.t_natoms] = natoms_vec - if mixed_type: - feed_dict_test[self.t_type] = atom_types.reshape([-1]) - else: - feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - [-1] + natoms_real = natoms + atomic_energy = results["energy"].reshape(nframes, natoms_real, 1) + atomic_virial = results["energy_derv_c"].reshape(nframes, natoms, 9) + result = ( + energy, + force, + virial, + atomic_energy, + atomic_virial, ) - feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - - if len(self.t_box.shape) == 1: - feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - elif len(self.t_box.shape) == 2: - feed_dict_test[self.t_box] = cells else: - raise RuntimeError - if self.has_efield: - feed_dict_test[self.t_efield] = np.reshape(efield, [-1]) - feed_dict_test[self.t_mesh] = mesh - if self.has_fparam: - feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1]) - if self.has_aparam: - feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1]) - return feed_dict_test, imap, natoms_vec, ghost_map - - def _eval_inner( - self, - coords, - cells, - atom_types, - fparam=None, - aparam=None, - atomic=False, - efield=None, - mixed_type=False, - ): - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict( - coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type - ) - - nloc = natoms_vec[0] - nall = natoms_vec[1] - - t_out = [self.t_energy, self.t_force, self.t_virial] - if atomic: - t_out += [self.t_ae, self.t_av] - - v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) - energy = v_out[0] - force = v_out[1] - virial = v_out[2] - if atomic: - ae = v_out[3] - av = v_out[4] - - if self.has_spin: - ntypes_real = self.ntypes - self.ntypes_spin - natoms_real = sum( - [ - np.count_nonzero(np.array(atom_types) == ii) - for ii in range(ntypes_real) - ] + result = ( + energy, + force, + virial, ) - else: - natoms_real = natoms - if ghost_map is not None: - # add the value of ghost atoms to real atoms - force = np.reshape(force, [nframes, -1, 3]) - np.add.at(force[0], ghost_map, force[0, nloc:]) - if atomic: - av = np.reshape(av, [nframes, -1, 9]) - np.add.at(av[0], ghost_map, av[0, nloc:]) + if self.deep_eval.get_has_spin(): + force_mag = results["energy_derv_r_mag"].reshape(nframes, natoms, 3) + mask_mag = results["mask_mag"].reshape(nframes, natoms, 1) + result = (*list(result), force_mag, mask_mag) + return result - # reverse map of the outputs - force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap) - if atomic: - ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap[:natoms_real]) - av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap) - - energy = np.reshape(energy, [nframes, 1]) - force = np.reshape(force, [nframes, nall, 3]) - if nloc < nall: - force = force[:, :nloc, :] - virial = np.reshape(virial, [nframes, 9]) - if atomic: - ae = np.reshape(ae, [nframes, natoms_real, 1]) - av = np.reshape(av, [nframes, nall, 9]) - if nloc < nall: - av = av[:, :nloc, :] - return energy, force, virial, ae, av - else: - return energy, force, virial - - def eval_descriptor( - self, - coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], - fparam: Optional[np.ndarray] = None, - aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, - mixed_type: bool = False, - ) -> np.array: - """Evaluate descriptors by using this DP. - - Parameters - ---------- - coords - The coordinates of atoms. - The array should be of size nframes x natoms x 3 - cells - The cell of the region. - If None then non-PBC is assumed, otherwise using PBC. - The array should be of size nframes x 9 - atom_types - The atom types - The list should contain natoms ints - fparam - The frame parameter. - The array can be of size : - - nframes x dim_fparam. - - dim_fparam. Then all frames are assumed to be provided with the same fparam. - aparam - The atomic parameter - The array can be of size : - - nframes x natoms x dim_aparam. - - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. - - dim_aparam. Then all frames and atoms are provided with the same aparam. - efield - The external field on atoms. - The array should be of size nframes x natoms x 3 - mixed_type - Whether to perform the mixed_type mode. - If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), - in which frames in a system may have different natoms_vec(s), with the same nloc. - - Returns - ------- - descriptor - Descriptors. - """ - natoms, numb_test = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)( - coords, - cells, - atom_types, - fparam=fparam, - aparam=aparam, - efield=efield, - mixed_type=mixed_type, - ) - return descriptor - def _eval_descriptor_inner( - self, - coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], - fparam: Optional[np.ndarray] = None, - aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, - mixed_type: bool = False, - ) -> np.array: - natoms, nframes = self._get_natoms_and_nframes( - coords, atom_types, mixed_type=mixed_type - ) - feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict( - coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type - ) - (descriptor,) = run_sess( - self.sess, [self.t_descriptor], feed_dict=feed_dict_test - ) - imap = imap[:natoms] - return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap) +__all__ = ["DeepPot"] diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py index a803eb0c6b..14e13e7f84 100644 --- a/deepmd/infer/deep_tensor.py +++ b/deepmd/infer/deep_tensor.py @@ -1,160 +1,55 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + abstractmethod, +) from typing import ( - TYPE_CHECKING, - ClassVar, - Dict, List, Optional, Tuple, + Union, ) import numpy as np -from deepmd.common import ( - make_default_mesh, +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, ) from deepmd.infer.deep_eval import ( DeepEval, ) -from deepmd.utils.sess import ( - run_sess, -) - -if TYPE_CHECKING: - from pathlib import ( - Path, - ) class DeepTensor(DeepEval): - """Evaluates a tensor model. + """Deep Tensor Model. Parameters ---------- - model_file: str + model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - neighbor_list : ase.neighborlist.NeighborList, optional - The neighbor list object. If None, then build the native neighbor list. + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - tensors: ClassVar[Dict[str, str]] = { - # descriptor attrs - "t_ntypes": "descrpt_attr/ntypes:0", - "t_rcut": "descrpt_attr/rcut:0", - # model attrs - "t_tmap": "model_attr/tmap:0", - "t_sel_type": "model_attr/sel_type:0", - "t_ouput_dim": "model_attr/output_dim:0", - # inputs - "t_coord": "t_coord:0", - "t_type": "t_type:0", - "t_natoms": "t_natoms:0", - "t_box": "t_box:0", - "t_mesh": "t_mesh:0", - } - - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - neighbor_list=None, - ) -> None: - """Constructor.""" - DeepEval.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - neighbor_list=neighbor_list, - ) - # check model type - model_type = self.tensors["t_tensor"][2:-2] - assert ( - self.model_type == model_type - ), f"expect {model_type} model but got {self.model_type}" - - # now load tensors to object attributes - for attr_name, tensor_name in self.tensors.items(): - self._get_tensor(tensor_name, attr_name) - - # load optional tensors if possible - optional_tensors = { - "t_global_tensor": f"o_global_{model_type}:0", - "t_force": "o_force:0", - "t_virial": "o_virial:0", - "t_atom_virial": "o_atom_virial:0", - } - try: - # first make sure these tensor all exists (but do not modify self attr) - for attr_name, tensor_name in optional_tensors.items(): - self._get_tensor(tensor_name) - # then put those into self.attrs - for attr_name, tensor_name in optional_tensors.items(): - self._get_tensor(tensor_name, attr_name) - except KeyError: - self._support_gfv = False - else: - self.tensors.update(optional_tensors) - self._support_gfv = True - - self._run_default_sess() - self.tmap = self.tmap.decode("UTF-8").split() - - def _run_default_sess(self): - [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] = run_sess( - self.sess, - [ - self.t_ntypes, - self.t_rcut, - self.t_tmap, - self.t_sel_type, - self.t_ouput_dim, - ], - ) - - def get_ntypes(self) -> int: - """Get the number of atom types of this model.""" - return self.ntypes - - def get_rcut(self) -> float: - """Get the cut-off radius of this model.""" - return self.rcut - - def get_type_map(self) -> List[str]: - """Get the type map (element name of the atom types) of this model.""" - return self.tmap - - def get_sel_type(self) -> List[int]: - """Get the selected atom types of this model.""" - return self.tselt - - def get_dim_fparam(self) -> int: - """Get the number (dimension) of frame parameters of this DP.""" - return self.dfparam - - def get_dim_aparam(self) -> int: - """Get the number (dimension) of atomic parameters of this DP.""" - return self.daparam - def eval( self, coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], + cells: Optional[np.ndarray], + atom_types: Union[List[int], np.ndarray], atomic: bool = True, fparam: Optional[np.ndarray] = None, aparam: Optional[np.ndarray] = None, - efield: Optional[np.ndarray] = None, mixed_type: bool = False, + **kwargs: dict, ) -> np.ndarray: """Evaluate the model. @@ -167,7 +62,7 @@ def eval( The cell of the region. If None then non-PBC is assumed, otherwise using PBC. The array should be of size nframes x 9 - atom_types + atom_types : list[int] or np.ndarray The atom types The list should contain natoms ints atomic @@ -191,100 +86,39 @@ def eval( If atomic == False then of size nframes x output_dim else of size nframes x natoms x output_dim """ - # standarize the shape of inputs - if mixed_type: - natoms = atom_types[0].size - atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) - else: - atom_types = np.array(atom_types, dtype=int).reshape([-1]) - natoms = atom_types.size - coords = np.reshape(np.array(coords), [-1, natoms * 3]) - nframes = coords.shape[0] - if cells is None: - pbc = False - cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) - else: - pbc = True - cells = np.array(cells).reshape([nframes, 9]) - - # sort inputs - coords, atom_types, imap, sel_at, sel_imap = self.sort_input( - coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type + ( + coords, + cells, + atom_types, + fparam, + aparam, + nframes, + natoms, + ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type) + results = self.deep_eval.eval( + coords, + cells, + atom_types, + atomic, + fparam=fparam, + aparam=aparam, + **kwargs, ) - - # make natoms_vec and default_mesh - if self.neighbor_list is None: - natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) - assert natoms_vec[0] == natoms - mesh = make_default_mesh(pbc, mixed_type) - else: - if nframes > 1: - raise NotImplementedError( - "neighbor_list does not support multiple frames" - ) - ( - natoms_vec, - coords, - atom_types, - mesh, - imap, - _, - ) = self.build_neighbor_list( - coords, - cells if cells is not None else None, - atom_types, - imap, - self.neighbor_list, - ) - - # evaluate - feed_dict_test = {} - feed_dict_test[self.t_natoms] = natoms_vec - if mixed_type: - feed_dict_test[self.t_type] = atom_types.reshape([-1]) - else: - feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - [-1] - ) - feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - feed_dict_test[self.t_mesh] = mesh - if atomic: - assert ( - "global" not in self.model_type - ), f"cannot do atomic evaluation with model type {self.model_type}" - t_out = [self.t_tensor] + return results[self.output_tensor_name].reshape(nframes, natoms, -1) else: - assert ( - self._support_gfv or "global" in self.model_type - ), f"do not support global tensor evaluation with old {self.model_type} model" - t_out = [self.t_global_tensor if self._support_gfv else self.t_tensor] - v_out = self.sess.run(t_out, feed_dict=feed_dict_test) - tensor = v_out[0] - - # reverse map of the outputs - if atomic: - tensor = np.array(tensor) - tensor = self.reverse_map( - np.reshape(tensor, [nframes, -1, self.output_dim]), sel_imap - ) - tensor = np.reshape(tensor, [nframes, len(sel_at), self.output_dim]) - else: - tensor = np.reshape(tensor, [nframes, self.output_dim]) - - return tensor + return results[f"{self.output_tensor_name}_redu"].reshape(nframes, -1) def eval_full( self, coords: np.ndarray, - cells: np.ndarray, - atom_types: List[int], + cells: Optional[np.ndarray], + atom_types: np.ndarray, atomic: bool = False, - fparam: Optional[np.array] = None, - aparam: Optional[np.array] = None, - efield: Optional[np.array] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, mixed_type: bool = False, + **kwargs: dict, ) -> Tuple[np.ndarray, ...]: """Evaluate the model with interface similar to the energy model. Will return global tensor, component-wise force and virial @@ -308,8 +142,6 @@ def eval_full( Not used in this model aparam Not used in this model - efield - Not used in this model mixed_type Whether to perform the mixed_type mode. If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), @@ -333,114 +165,72 @@ def eval_full( The atomic virial. Only returned when atomic == True shape: [nframes x nout x natoms x 9] """ - assert self._support_gfv, "do not support eval_full with old tensor model" - - # standarize the shape of inputs - if mixed_type: - natoms = atom_types[0].size - atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) - else: - atom_types = np.array(atom_types, dtype=int).reshape([-1]) - natoms = atom_types.size - coords = np.reshape(np.array(coords), [-1, natoms * 3]) - nframes = coords.shape[0] - if cells is None: - pbc = False - cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) - else: - pbc = True - cells = np.array(cells).reshape([nframes, 9]) - nout = self.output_dim - - # sort inputs - coords, atom_types, imap, sel_at, sel_imap = self.sort_input( - coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type + ( + coords, + cells, + atom_types, + fparam, + aparam, + nframes, + natoms, + ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type) + results = self.deep_eval.eval( + coords, + cells, + atom_types, + atomic, + fparam=fparam, + aparam=aparam, + **kwargs, ) - # make natoms_vec and default_mesh - if self.neighbor_list is None: - natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) - assert natoms_vec[0] == natoms - mesh = make_default_mesh(pbc, mixed_type) - ghost_map = None - else: - if nframes > 1: - raise NotImplementedError( - "neighbor_list does not support multiple frames" - ) - ( - natoms_vec, - coords, - atom_types, - mesh, - imap, - ghost_map, - ) = self.build_neighbor_list( - coords, - cells if cells is not None else None, - atom_types, - imap, - self.neighbor_list, + energy = results[f"{self.output_tensor_name}_redu"].reshape(nframes, -1) + force = results[f"{self.output_tensor_name}_derv_r"].reshape( + nframes, -1, natoms, 3 + ) + virial = results[f"{self.output_tensor_name}_derv_c_redu"].reshape( + nframes, -1, 9 + ) + if atomic: + atomic_energy = results[self.output_tensor_name].reshape( + nframes, natoms, -1 + ) + atomic_virial = results[f"{self.output_tensor_name}_derv_c"].reshape( + nframes, -1, natoms, 9 + ) + return ( + energy, + force, + virial, + atomic_energy, + atomic_virial, ) - - # evaluate - feed_dict_test = {} - feed_dict_test[self.t_natoms] = natoms_vec - if mixed_type: - feed_dict_test[self.t_type] = atom_types.reshape([-1]) else: - feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( - [-1] + return ( + energy, + force, + virial, ) - feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) - feed_dict_test[self.t_box] = np.reshape(cells, [-1]) - feed_dict_test[self.t_mesh] = mesh - - t_out = [self.t_global_tensor, self.t_force, self.t_virial] - if atomic: - t_out += [self.t_tensor, self.t_atom_virial] - - v_out = self.sess.run(t_out, feed_dict=feed_dict_test) - gt = v_out[0] # global tensor - force = v_out[1] - virial = v_out[2] - if atomic: - at = v_out[3] # atom tensor - av = v_out[4] # atom virial - nloc = natoms_vec[0] - nall = natoms_vec[1] - - if ghost_map is not None: - # add the value of ghost atoms to real atoms - force = np.reshape(force, [nframes * nout, -1, 3]) - # TODO: is there some way not to use for loop? - for ii in range(nframes * nout): - np.add.at(force[ii], ghost_map, force[ii, nloc:]) - if atomic: - av = np.reshape(av, [nframes * nout, -1, 9]) - for ii in range(nframes * nout): - np.add.at(av[ii], ghost_map, av[ii, nloc:]) - - # please note here the shape are wrong! - force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap) - if atomic: - at = self.reverse_map( - np.reshape(at, [nframes, len(sel_at), nout]), sel_imap + @property + @abstractmethod + def output_tensor_name(self) -> str: + """The name of the tensor.""" + + @property + def output_def(self) -> ModelOutputDef: + """Get the output definition of this model.""" + return ModelOutputDef( + FittingOutputDef( + [ + OutputVariableDef( + self.output_tensor_name, + shape=[-1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + atomic=True, + ), + ] ) - av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap) - - # make sure the shapes are correct here - gt = np.reshape(gt, [nframes, nout]) - force = np.reshape(force, [nframes, nout, nall, 3]) - if nloc < nall: - force = force[:, :, :nloc, :] - virial = np.reshape(virial, [nframes, nout, 9]) - if atomic: - at = np.reshape(at, [nframes, len(sel_at), self.output_dim]) - av = np.reshape(av, [nframes, nout, nall, 9]) - if nloc < nall: - av = av[:, :, :nloc, :] - return gt, force, virial, at, av - else: - return gt, force, virial + ) diff --git a/deepmd/infer/deep_wfc.py b/deepmd/infer/deep_wfc.py index ed682f642b..deed938e04 100644 --- a/deepmd/infer/deep_wfc.py +++ b/deepmd/infer/deep_wfc.py @@ -1,68 +1,28 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - TYPE_CHECKING, - Optional, -) - from deepmd.infer.deep_tensor import ( DeepTensor, ) -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - class DeepWFC(DeepTensor): - """Constructor. + """Deep WFC model. Parameters ---------- model_file : Path The name of the frozen model file. - load_prefix: str - The prefix in the load computational graph - default_tf_graph : bool - If uses the default tf graph, otherwise build a new tf graph for evaluation - input_map : dict, optional - The input map for tf.import_graph_def. Only work with default tf graph - - Warnings - -------- - For developers: `DeepTensor` initializer must be called at the end after - `self.tensors` are modified because it uses the data in `self.tensors` dict. - Do not chanage the order! + *args : list + Positional arguments. + auto_batch_size : bool or int or AutoBatchSize, default: True + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. """ - def __init__( - self, - model_file: "Path", - load_prefix: str = "load", - default_tf_graph: bool = False, - input_map: Optional[dict] = None, - ) -> None: - # use this in favor of dict update to move attribute from class to - # instance namespace - self.tensors = dict( - { - # output tensor - "t_tensor": "o_wfc:0", - }, - **self.tensors, - ) - DeepTensor.__init__( - self, - model_file, - load_prefix=load_prefix, - default_tf_graph=default_tf_graph, - input_map=input_map, - ) - - def get_dim_fparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") - - def get_dim_aparam(self) -> int: - """Unsupported in this model.""" - raise NotImplementedError("This model type does not support this attribute") + @property + def output_tensor_name(self) -> str: + return "wfc" diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py index 8c329a0845..a37dfd34c5 100644 --- a/deepmd/infer/model_devi.py +++ b/deepmd/infer/model_devi.py @@ -10,16 +10,12 @@ from deepmd.common import ( expand_sys_str, ) - -from ..utils.batch_size import ( - AutoBatchSize, +from deepmd.infer.deep_pot import ( + DeepPot, ) -from ..utils.data import ( +from deepmd.utils.data import ( DeepmdData, ) -from .deep_pot import ( - DeepPot, -) try: from typing import Literal # python >=3.8 @@ -33,8 +29,7 @@ def calc_model_devi_f( real_f: Optional[np.ndarray] = None, relative: Optional[float] = None, atomic: Literal[False] = False, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - ... +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ... @overload @@ -44,8 +39,7 @@ def calc_model_devi_f( relative: Optional[float] = None, *, atomic: Literal[True], -) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - ... +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ... def calc_model_devi_f( @@ -297,19 +291,19 @@ def calc_model_devi( Examples -------- - >>> from deepmd.infer import calc_model_devi - >>> from deepmd.infer import DeepPot as DP + >>> from deepmd.tf.infer import calc_model_devi + >>> from deepmd.tf.infer import DeepPot as DP >>> import numpy as np - >>> coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1]) + >>> coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1]) >>> cell = np.diag(10 * np.ones(3)).reshape([1, -1]) - >>> atype = [1,0,1] + >>> atype = [1, 0, 1] >>> graphs = [DP("graph.000.pb"), DP("graph.001.pb")] >>> model_devi = calc_model_devi(coord, cell, atype, graphs) """ energies = [] forces = [] virials = [] - natom = atype.shape[-1] + natom = np.array(atype).shape[-1] for dp in models: ret = dp.eval( coord, @@ -396,9 +390,8 @@ def make_model_devi( **kwargs Arbitrary keyword arguments. """ - auto_batch_size = AutoBatchSize() # init models - dp_models = [DeepPot(model, auto_batch_size=auto_batch_size) for model in models] + dp_models = [DeepPot(model, auto_batch_size=True) for model in models] # check type maps tmaps = [dp.get_type_map() for dp in dp_models] diff --git a/deepmd/loggers/__init__.py b/deepmd/loggers/__init__.py index 71057e3056..39aa76139d 100644 --- a/deepmd/loggers/__init__.py +++ b/deepmd/loggers/__init__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias of deepmd_utils.loggers for backward compatibility.""" +"""Module taking care of logging duties.""" -from deepmd_utils.loggers.loggers import ( +from .loggers import ( set_log_handles, ) diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py index 74ca7de63e..33b9497507 100644 --- a/deepmd/loggers/loggers.py +++ b/deepmd/loggers/loggers.py @@ -1,7 +1,277 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias of deepmd_utils.loggers.loggers for backward compatibility.""" -from deepmd_utils.loggers.loggers import ( - set_log_handles, +"""Logger initialization for package.""" + +import logging +import os +from typing import ( + TYPE_CHECKING, + Optional, ) +if TYPE_CHECKING: + from pathlib import ( + Path, + ) + + from mpi4py import ( + MPI, + ) + + _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND + +logging.getLogger(__name__) + __all__ = ["set_log_handles"] + +# logger formater +FFORMATTER = logging.Formatter( + "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s" +) +CFORMATTER = logging.Formatter( + # "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s" + "[%(asctime)s] %(app_name)s %(levelname)-7s %(message)s" +) +FFORMATTER_MPI = logging.Formatter( + "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s" +) +CFORMATTER_MPI = logging.Formatter( + # "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s" + "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s" +) + + +class _AppFilter(logging.Filter): + """Add field `app_name` to log messages.""" + + def filter(self, record): + record.app_name = "DEEPMD" + return True + + +class _MPIRankFilter(logging.Filter): + """Add MPI rank number to log messages, adds field `rank`.""" + + def __init__(self, rank: int) -> None: + super().__init__(name="MPI_rank_id") + self.mpi_rank = str(rank) + + def filter(self, record): + record.rank = self.mpi_rank + return True + + +class _MPIMasterFilter(logging.Filter): + """Filter that lets through only messages emited from rank==0.""" + + def __init__(self, rank: int) -> None: + super().__init__(name="MPI_master_log") + self.mpi_rank = rank + + def filter(self, record): + if self.mpi_rank == 0: + return True + else: + return False + + +class _MPIFileStream: + """Wrap MPI.File` so it has the same API as python file streams. + + Parameters + ---------- + filename : Path + disk location of the file stream + MPI : MPI + MPI communicator object + mode : str, optional + file write mode, by default _MPI_APPEND_MODE + """ + + def __init__( + self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE" + ) -> None: + self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode) + self.stream.Set_atomicity(True) + self.name = "MPIfilestream" + + def write(self, msg: str): + """Write to MPI shared file stream. + + Parameters + ---------- + msg : str + message to write + """ + b = bytearray() + b.extend(map(ord, msg)) + self.stream.Write_shared(b) + + def close(self): + """Synchronize and close MPI file stream.""" + self.stream.Sync() + self.stream.Close() + + +class _MPIHandler(logging.FileHandler): + """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to. + + Parameters + ---------- + filename : Path + file path + MPI : MPI + MPI communicator object + mode : str, optional + file access mode, by default "_MPI_APPEND_MODE" + """ + + def __init__( + self, + filename: "Path", + MPI: "MPI", + mode: str = "_MPI_APPEND_MODE", + ) -> None: + self.MPI = MPI + super().__init__(filename, mode=mode, encoding=None, delay=False) + + def _open(self): + return _MPIFileStream(self.baseFilename, self.MPI, self.mode) + + def setStream(self, stream): + """Stream canot be reasigned in MPI mode.""" + raise NotImplementedError("Unable to do for MPI file handler!") + + +def set_log_handles( + level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None +): + """Set desired level for package loggers and add file handlers. + + Parameters + ---------- + level : int + logging level + log_path : Optional[str] + path to log file, if None logs will be send only to console. If the parent + directory does not exist it will be automatically created, by default None + mpi_log : Optional[str], optional + mpi log type. Has three options. `master` will output logs to file and console + only from rank==0. `collect` will write messages from all ranks to one file + opened under rank==0 and to console. `workers` will open one log file for each + worker designated by its rank, console behaviour is the same as for `collect`. + If this argument is specified, package 'mpi4py' must be already installed. + by default None + + Raises + ------ + RuntimeError + If the argument `mpi_log` is specified, package `mpi4py` is not installed. + + References + ---------- + https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U + https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error + https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu + + Notes + ----- + Logging levels: + + +---------+--------------+----------------+----------------+----------------+ + | | our notation | python logging | tensorflow cpp | OpenMP | + +=========+==============+================+================+================+ + | debug | 10 | 10 | 0 | 1/on/true/yes | + +---------+--------------+----------------+----------------+----------------+ + | info | 20 | 20 | 1 | 0/off/false/no | + +---------+--------------+----------------+----------------+----------------+ + | warning | 30 | 30 | 2 | 0/off/false/no | + +---------+--------------+----------------+----------------+----------------+ + | error | 40 | 40 | 3 | 0/off/false/no | + +---------+--------------+----------------+----------------+----------------+ + + """ + # silence logging for OpenMP when running on CPU if level is any other than debug + if level <= 10: + os.environ["KMP_WARNINGS"] = "FALSE" + + # set TF cpp internal logging level + os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1)) + + # get root logger + root_log = logging.getLogger("deepmd") + root_log.propagate = False + + root_log.setLevel(level) + + # check if arguments are present + MPI = None + if mpi_log: + try: + from mpi4py import ( + MPI, + ) + except ImportError as e: + raise RuntimeError( + "You cannot specify 'mpi_log' when mpi4py not installed" + ) from e + + # * add console handler ************************************************************ + ch = logging.StreamHandler() + if MPI: + rank = MPI.COMM_WORLD.Get_rank() + if mpi_log == "master": + ch.setFormatter(CFORMATTER) + ch.addFilter(_MPIMasterFilter(rank)) + else: + ch.setFormatter(CFORMATTER_MPI) + ch.addFilter(_MPIRankFilter(rank)) + else: + ch.setFormatter(CFORMATTER) + + ch.setLevel(level) + ch.addFilter(_AppFilter()) + # clean old handlers before adding new one + root_log.handlers.clear() + root_log.addHandler(ch) + + # * add file handler *************************************************************** + if log_path: + # create directory + log_path.parent.mkdir(exist_ok=True, parents=True) + + fh = None + + if mpi_log == "master": + rank = MPI.COMM_WORLD.Get_rank() + if rank == 0: + fh = logging.FileHandler(log_path, mode="w") + fh.addFilter(_MPIMasterFilter(rank)) + fh.setFormatter(FFORMATTER) + elif mpi_log == "collect": + rank = MPI.COMM_WORLD.Get_rank() + fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE) + fh.addFilter(_MPIRankFilter(rank)) + fh.setFormatter(FFORMATTER_MPI) + elif mpi_log == "workers": + rank = MPI.COMM_WORLD.Get_rank() + # if file has suffix than inser rank number before suffix + # e.g deepmd.log -> deepmd_.log + # if no suffix is present, insert rank as suffix + # e.g. deepmdlog -> deepmdlog. + if log_path.suffix: + worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix( + log_path.suffix + ) + else: + worker_log = log_path.with_suffix(f".{rank}") + + fh = logging.FileHandler(worker_log, mode="w") + fh.setFormatter(FFORMATTER) + else: + fh = logging.FileHandler(log_path, mode="w") + fh.setFormatter(FFORMATTER) + + if fh: + fh.setLevel(level) + fh.addFilter(_AppFilter()) + root_log.addHandler(fh) diff --git a/deepmd/loggers/training.py b/deepmd/loggers/training.py new file mode 100644 index 0000000000..954473e309 --- /dev/null +++ b/deepmd/loggers/training.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + + +def format_training_message( + batch: int, + wall_time: float, +): + """Format a training message.""" + return f"batch {batch:7d}: " f"total wall time = {wall_time:.2f} s" + + +def format_training_message_per_task( + batch: int, + task_name: str, + rmse: Dict[str, float], + learning_rate: Optional[float], +): + if task_name: + task_name += ": " + if learning_rate is None: + lr = "" + else: + lr = f", lr = {learning_rate:8.2e}" + # sort rmse + rmse = dict(sorted(rmse.items())) + return ( + f"batch {batch:7d}: {task_name}" + f"{', '.join([f'{kk} = {vv:8.2e}' for kk, vv in rmse.items()])}" + f"{lr}" + ) diff --git a/deepmd_utils/main.py b/deepmd/main.py similarity index 76% rename from deepmd_utils/main.py rename to deepmd/main.py index 19afaeee1f..b503107c73 100644 --- a/deepmd_utils/main.py +++ b/deepmd/main.py @@ -4,16 +4,27 @@ If only printing the help message, this module does not call the main DeePMD-kit module to avoid the slow import of TensorFlow. """ + import argparse import logging +import os import textwrap +from collections import ( + defaultdict, +) from typing import ( + Dict, List, Optional, + Type, +) + +from deepmd.backend.backend import ( + Backend, ) try: - from deepmd_utils._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: __version__ = "unknown" @@ -45,6 +56,19 @@ class RawTextArgumentDefaultsHelpFormatter( """This formatter is used to print multile-line help message with default value.""" +BACKENDS: Dict[str, Type[Backend]] = Backend.get_backends_by_feature( + Backend.Feature.ENTRY_POINT +) +BACKEND_TABLE: Dict[str, str] = {kk: vv.name.lower() for kk, vv in BACKENDS.items()} + + +class BackendOption(argparse.Action): + """Map backend alias to unique name.""" + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, BACKEND_TABLE[values]) + + def main_parser() -> argparse.ArgumentParser: """DeePMD-Kit commandline options argument parser. @@ -56,8 +80,49 @@ def main_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="DeePMD-kit: A deep learning package for many-body potential energy" " representation and molecular dynamics", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, + formatter_class=RawTextArgumentDefaultsHelpFormatter, + epilog=textwrap.dedent( + """\ + Use --tf or --pt to choose the backend: + dp --tf train input.json + dp --pt train input.json + """ + ), + ) + + # default backend is TF for compatibility + default_backend = os.environ.get("DP_BACKEND", "tensorflow").lower() + if default_backend not in BACKEND_TABLE.keys(): + raise ValueError( + f"Unknown backend {default_backend}. " + "Please set DP_BACKEND to either tensorflow or pytorch." + ) + + parser_backend = parser.add_mutually_exclusive_group() + parser_backend.add_argument( + "-b", + "--backend", + choices=list(BACKEND_TABLE.keys()), + action=BackendOption, + default=default_backend, + help=( + "The backend of the model. Default can be set by environment variable " + "DP_BACKEND." + ), ) + + BACKEND_ALIAS: Dict[str, List[str]] = defaultdict(list) + for alias, backend in BACKEND_TABLE.items(): + BACKEND_ALIAS[backend].append(alias) + for backend, alias in BACKEND_ALIAS.items(): + parser_backend.add_argument( + *[f"--{aa}" for aa in alias], + action="store_const", + dest="backend", + const=backend, + help=f"Alias for --backend {backend}", + ) + subparsers = parser.add_subparsers(title="Valid subcommands", dest="command") # * logging options parser ********************************************************* @@ -98,7 +163,9 @@ def main_parser() -> argparse.ArgumentParser: # * transfer script **************************************************************** parser_transfer = subparsers.add_parser( - "transfer", parents=[parser_log], help="pass parameters to another model" + "transfer", + parents=[parser_log], + help="(Supported backend: TensorFlow) pass parameters to another model", ) parser_transfer.add_argument( "-r", @@ -181,6 +248,18 @@ def main_parser() -> argparse.ArgumentParser: action="store_true", help="Skip calculating neighbor statistics. Sel checking, automatic sel, and model compression will be disabled.", ) + parser_train.add_argument( + # -m has been used by mpi-log + "--model-branch", + type=str, + default="", + help="(Supported backend: PyTorch) Model branch chosen for fine-tuning if multi-task. If not specified, it will re-init the fitting net.", + ) + parser_train.add_argument( + "--force-load", + action="store_true", + help="(Supported backend: PyTorch) Force load from ckpt, other missing tensors will init from scratch", + ) # * freeze script ****************************************************************** parser_frz = subparsers.add_parser( @@ -199,36 +278,43 @@ def main_parser() -> argparse.ArgumentParser: parser_frz.add_argument( "-c", "--checkpoint-folder", + "--checkpoint", type=str, default=".", - help="path to checkpoint folder", + help="Path to checkpoint, either a folder containing checkpoint or the checkpoint prefix", ) parser_frz.add_argument( "-o", "--output", type=str, - default="frozen_model.pb", - help="name of graph, will output to the checkpoint folder", + default="frozen_model", + help="Filename (prefix) of the output model file. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth", ) parser_frz.add_argument( "-n", "--node-names", type=str, default=None, - help="the frozen nodes, if not set, determined from the model type", + help="(Supported backend: TensorFlow) the frozen nodes, if not set, determined from the model type", ) parser_frz.add_argument( "-w", "--nvnmd-weight", type=str, default=None, - help="the name of weight file (.npy), if set, save the model's weight into the file", + help="(Supported backend: TensorFlow) the name of weight file (.npy), if set, save the model's weight into the file", ) parser_frz.add_argument( "--united-model", action="store_true", default=False, - help="When in multi-task mode, freeze all nodes into one united model", + help="(Supported backend: TensorFlow) When in multi-task mode, freeze all nodes into one united model", + ) + parser_frz.add_argument( + "--head", + default=None, + type=str, + help="(Supported backend: PyTorch) Task head to freeze if in multi-task mode.", ) # * test script ******************************************************************** @@ -247,9 +333,9 @@ def main_parser() -> argparse.ArgumentParser: parser_tst.add_argument( "-m", "--model", - default="frozen_model.pb", + default="frozen_model", type=str, - help="Frozen model file to import", + help="Frozen model file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.", ) parser_tst_subgroup = parser_tst.add_mutually_exclusive_group() parser_tst_subgroup.add_argument( @@ -267,7 +353,11 @@ def main_parser() -> argparse.ArgumentParser: help="The path to file of test list.", ) parser_tst.add_argument( - "-S", "--set-prefix", default="set", type=str, help="The set prefix" + "-S", + "--set-prefix", + default="set", + type=str, + help="(Supported backend: TensorFlow) The set prefix", ) parser_tst.add_argument( "-n", @@ -277,7 +367,11 @@ def main_parser() -> argparse.ArgumentParser: help="The number of data for test. 0 means all data.", ) parser_tst.add_argument( - "-r", "--rand-seed", type=int, default=None, help="The random seed" + "-r", + "--rand-seed", + type=int, + default=None, + help="(Supported backend: TensorFlow) The random seed", ) parser_tst.add_argument( "--shuffle-test", action="store_true", default=False, help="Shuffle test data" @@ -294,7 +388,19 @@ def main_parser() -> argparse.ArgumentParser: "--atomic", action="store_true", default=False, - help="Test the accuracy of atomic label, i.e. energy / tensor (dipole, polar)", + help="(Supported backend: TensorFlow) Test the accuracy of atomic label, i.e. energy / tensor (dipole, polar)", + ) + parser_tst.add_argument( + "-i", + "--input_script", + type=str, + help="(Supported backend: PyTorch) The input script of the model", + ) + parser_tst.add_argument( + "--head", + default=None, + type=str, + help="(Supported backend: PyTorch) Task head to test if in multi-task mode.", ) # * compress model ***************************************************************** @@ -308,7 +414,7 @@ def main_parser() -> argparse.ArgumentParser: parser_compress = subparsers.add_parser( "compress", parents=[parser_log, parser_mpi_log], - help="compress a model", + help="(Supported backend: TensorFlow) compress a model", formatter_class=RawTextArgumentDefaultsHelpFormatter, epilog=textwrap.dedent( """\ @@ -409,10 +515,10 @@ def main_parser() -> argparse.ArgumentParser: parser_model_devi.add_argument( "-m", "--models", - default=["graph.000.pb", "graph.001.pb", "graph.002.pb", "graph.003.pb"], + default=["graph.000", "graph.001", "graph.002", "graph.003"], nargs="+", type=str, - help="Frozen models file to import", + help="Frozen models file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.", ) parser_model_devi.add_argument( "-s", @@ -465,7 +571,7 @@ def main_parser() -> argparse.ArgumentParser: parser_transform = subparsers.add_parser( "convert-from", parents=[parser_log], - help="convert lower model version to supported version", + help="(Supported backend: TensorFlow) convert lower model version to supported version", formatter_class=RawTextArgumentDefaultsHelpFormatter, epilog=textwrap.dedent( """\ @@ -535,6 +641,7 @@ def main_parser() -> argparse.ArgumentParser: help="type map", ) parser_neighbor_stat.add_argument( + "--mixed-type", "--one-type", action="store_true", default=False, @@ -550,7 +657,7 @@ def main_parser() -> argparse.ArgumentParser: parser_train_nvnmd = subparsers.add_parser( "train-nvnmd", parents=[parser_log], - help="train nvnmd model", + help="(Supported backend: TensorFlow) train nvnmd model", formatter_class=argparse.ArgumentDefaultsHelpFormatter, epilog=textwrap.dedent( """\ @@ -615,6 +722,23 @@ def main_parser() -> argparse.ArgumentParser: "to the network on both IPv4 and IPv6 (where available)." ), ) + + # convert_backend + parser_convert_backend = subparsers.add_parser( + "convert-backend", + parents=[parser_log], + help="Convert model to another backend.", + formatter_class=RawTextArgumentDefaultsHelpFormatter, + epilog=textwrap.dedent( + """\ + examples: + dp convert-backend model.pb model.pth + dp convert-backend model.pb model.dp + """ + ), + ) + parser_convert_backend.add_argument("INPUT", help="The input model file.") + parser_convert_backend.add_argument("OUTPUT", help="The output model file.") return parser @@ -651,6 +775,33 @@ def main(): if no command was input """ args = parse_args() - from deepmd.entrypoints.main import main as deepmd_main + + if args.backend not in BACKEND_TABLE: + raise ValueError(f"Unknown backend {args.backend}") + + if args.command in ( + "test", + "doc-train-input", + "model-devi", + "neighbor-stat", + "gui", + "convert-backend", + ): + # common entrypoints + from deepmd.entrypoints.main import main as deepmd_main + elif args.command in ( + "train", + "freeze", + "transfer", + "compress", + "convert-from", + "train-nvnmd", + ): + deepmd_main = BACKENDS[args.backend]().entry_point_hook + elif args.command is None: + # help message has been printed in parse_args + return + else: + raise RuntimeError(f"unknown command {args.command}") deepmd_main(args) diff --git a/deepmd/pt/__init__.py b/deepmd/pt/__init__.py new file mode 100644 index 0000000000..ab61736198 --- /dev/null +++ b/deepmd/pt/__init__.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +# import customized OPs globally +from deepmd.pt.cxx_op import ( + ENABLE_CUSTOMIZED_OP, +) + +__all__ = [ + "ENABLE_CUSTOMIZED_OP", +] diff --git a/deepmd/pt/cxx_op.py b/deepmd/pt/cxx_op.py new file mode 100644 index 0000000000..7887b5722c --- /dev/null +++ b/deepmd/pt/cxx_op.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import platform + +import torch + +from deepmd.env import ( + SHARED_LIB_DIR, +) + + +def load_library(module_name: str) -> bool: + """Load OP library. + + Parameters + ---------- + module_name : str + Name of the module + + Returns + ------- + bool + Whether the library is loaded successfully + """ + if platform.system() == "Windows": + ext = ".dll" + prefix = "" + else: + ext = ".so" + prefix = "lib" + + module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve() + + if module_file.is_file(): + torch.ops.load_library(module_file) + return True + return False + + +ENABLE_CUSTOMIZED_OP = load_library("deepmd_op_pt") + +__all__ = [ + "ENABLE_CUSTOMIZED_OP", +] diff --git a/deepmd_utils/entrypoints/__init__.py b/deepmd/pt/entrypoints/__init__.py similarity index 100% rename from deepmd_utils/entrypoints/__init__.py rename to deepmd/pt/entrypoints/__init__.py diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py new file mode 100644 index 0000000000..adaec0968a --- /dev/null +++ b/deepmd/pt/entrypoints/main.py @@ -0,0 +1,324 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import argparse +import json +import logging +import os +from copy import ( + deepcopy, +) +from pathlib import ( + Path, +) +from typing import ( + List, + Optional, + Union, +) + +import h5py +import torch +import torch.distributed as dist +import torch.version +from torch.distributed.elastic.multiprocessing.errors import ( + record, +) + +from deepmd import ( + __version__, +) +from deepmd.loggers.loggers import ( + set_log_handles, +) +from deepmd.main import ( + parse_args, +) +from deepmd.pt.cxx_op import ( + ENABLE_CUSTOMIZED_OP, +) +from deepmd.pt.infer import ( + inference, +) +from deepmd.pt.model.model import ( + BaseModel, +) +from deepmd.pt.train import ( + training, +) +from deepmd.pt.utils.dataloader import ( + DpLoaderSet, +) +from deepmd.pt.utils.env import ( + DEVICE, +) +from deepmd.pt.utils.finetune import ( + change_finetune_model_params, +) +from deepmd.pt.utils.multi_task import ( + preprocess_shared_params, +) +from deepmd.utils.argcheck import ( + normalize, +) +from deepmd.utils.compat import ( + update_deepmd_input, +) +from deepmd.utils.data_system import ( + process_systems, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.summary import SummaryPrinter as BaseSummaryPrinter + +log = logging.getLogger(__name__) + + +def get_trainer( + config, + init_model=None, + restart_model=None, + finetune_model=None, + model_branch="", + force_load=False, + init_frz_model=None, + shared_links=None, +): + multi_task = "model_dict" in config.get("model", {}) + + # Initialize DDP + local_rank = os.environ.get("LOCAL_RANK") + if local_rank is not None: + local_rank = int(local_rank) + assert dist.is_nccl_available() + dist.init_process_group(backend="nccl") + + ckpt = init_model if init_model is not None else restart_model + finetune_links = None + if finetune_model is not None: + config["model"], finetune_links = change_finetune_model_params( + finetune_model, + config["model"], + model_branch=model_branch, + ) + config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None) + + def prepare_trainer_input_single( + model_params_single, data_dict_single, loss_dict_single, suffix="", rank=0 + ): + training_dataset_params = data_dict_single["training_data"] + type_split = False + if model_params_single["descriptor"]["type"] in ["se_e2_a"]: + type_split = True + validation_dataset_params = data_dict_single.get("validation_data", None) + validation_systems = ( + validation_dataset_params["systems"] if validation_dataset_params else None + ) + training_systems = training_dataset_params["systems"] + training_systems = process_systems(training_systems) + if validation_systems is not None: + validation_systems = process_systems(validation_systems) + + # stat files + stat_file_path_single = data_dict_single.get("stat_file", None) + if rank != 0: + stat_file_path_single = None + elif stat_file_path_single is not None: + if Path(stat_file_path_single).is_dir(): + raise ValueError( + f"stat_file should be a file, not a directory: {stat_file_path_single}" + ) + if not Path(stat_file_path_single).is_file(): + with h5py.File(stat_file_path_single, "w") as f: + pass + stat_file_path_single = DPPath(stat_file_path_single, "a") + + # validation and training data + validation_data_single = ( + DpLoaderSet( + validation_systems, + validation_dataset_params["batch_size"], + model_params_single["type_map"], + ) + if validation_systems + else None + ) + if ckpt or finetune_model: + train_data_single = DpLoaderSet( + training_systems, + training_dataset_params["batch_size"], + model_params_single["type_map"], + ) + else: + train_data_single = DpLoaderSet( + training_systems, + training_dataset_params["batch_size"], + model_params_single["type_map"], + ) + return ( + train_data_single, + validation_data_single, + stat_file_path_single, + ) + + rank = dist.get_rank() if dist.is_initialized() else 0 + if not multi_task: + ( + train_data, + validation_data, + stat_file_path, + ) = prepare_trainer_input_single( + config["model"], + config["training"], + config["loss"], + rank=rank, + ) + else: + train_data, validation_data, stat_file_path = {}, {}, {} + for model_key in config["model"]["model_dict"]: + ( + train_data[model_key], + validation_data[model_key], + stat_file_path[model_key], + ) = prepare_trainer_input_single( + config["model"]["model_dict"][model_key], + config["training"]["data_dict"][model_key], + config["loss_dict"][model_key], + suffix=f"_{model_key}", + rank=rank, + ) + + trainer = training.Trainer( + config, + train_data, + stat_file_path=stat_file_path, + validation_data=validation_data, + init_model=init_model, + restart_model=restart_model, + finetune_model=finetune_model, + force_load=force_load, + shared_links=shared_links, + finetune_links=finetune_links, + init_frz_model=init_frz_model, + ) + return trainer + + +class SummaryPrinter(BaseSummaryPrinter): + """Summary printer for PyTorch.""" + + def is_built_with_cuda(self) -> bool: + """Check if the backend is built with CUDA.""" + return torch.version.cuda is not None + + def is_built_with_rocm(self) -> bool: + """Check if the backend is built with ROCm.""" + return torch.version.hip is not None + + def get_compute_device(self) -> str: + """Get Compute device.""" + return str(DEVICE) + + def get_ngpus(self) -> int: + """Get the number of GPUs.""" + return torch.cuda.device_count() + + def get_backend_info(self) -> dict: + """Get backend information.""" + return { + "Backend": "PyTorch", + "PT ver": f"v{torch.__version__}-g{torch.version.git_version[:11]}", + "Enable custom OP": ENABLE_CUSTOMIZED_OP, + } + + +def train(FLAGS): + log.info("Configuration path: %s", FLAGS.INPUT) + SummaryPrinter()() + with open(FLAGS.INPUT) as fin: + config = json.load(fin) + + # update multitask config + multi_task = "model_dict" in config["model"] + shared_links = None + if multi_task: + config["model"], shared_links = preprocess_shared_params(config["model"]) + + # argcheck + if not multi_task: + config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json") + config = normalize(config) + + # do neighbor stat + if not FLAGS.skip_neighbor_stat: + log.info( + "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)" + ) + if not multi_task: + config["model"] = BaseModel.update_sel(config, config["model"]) + else: + training_jdata = deepcopy(config["training"]) + training_jdata.pop("data_dict", {}) + training_jdata.pop("model_prob", {}) + for model_item in config["model"]["model_dict"]: + fake_global_jdata = { + "model": deepcopy(config["model"]["model_dict"][model_item]), + "training": deepcopy(config["training"]["data_dict"][model_item]), + } + fake_global_jdata["training"].update(training_jdata) + config["model"]["model_dict"][model_item] = BaseModel.update_sel( + fake_global_jdata, config["model"]["model_dict"][model_item] + ) + + with open(FLAGS.output, "w") as fp: + json.dump(config, fp, indent=4) + + trainer = get_trainer( + config, + FLAGS.init_model, + FLAGS.restart, + FLAGS.finetune, + FLAGS.model_branch, + FLAGS.force_load, + FLAGS.init_frz_model, + shared_links=shared_links, + ) + trainer.run() + + +def freeze(FLAGS): + model = torch.jit.script(inference.Tester(FLAGS.model, head=FLAGS.head).model) + torch.jit.save( + model, + FLAGS.output, + {}, + ) + + +@record +def main(args: Optional[Union[List[str], argparse.Namespace]] = None): + if not isinstance(args, argparse.Namespace): + FLAGS = parse_args(args=args) + else: + FLAGS = args + + set_log_handles(FLAGS.log_level, FLAGS.log_path, mpi_log=None) + log.debug("Log handles were successfully set") + log.info("DeepMD version: %s", __version__) + + if FLAGS.command == "train": + train(FLAGS) + elif FLAGS.command == "freeze": + if Path(FLAGS.checkpoint_folder).is_dir(): + checkpoint_path = Path(FLAGS.checkpoint_folder) + latest_ckpt_file = (checkpoint_path / "checkpoint").read_text() + FLAGS.model = str(checkpoint_path.joinpath(latest_ckpt_file)) + else: + FLAGS.model = FLAGS.checkpoint_folder + FLAGS.output = str(Path(FLAGS.output).with_suffix(".pth")) + freeze(FLAGS) + else: + raise RuntimeError(f"Invalid command {FLAGS.command}!") + + +if __name__ == "__main__": + main() diff --git a/deepmd/pt/infer/__init__.py b/deepmd/pt/infer/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/pt/infer/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py new file mode 100644 index 0000000000..8a3a61400d --- /dev/null +++ b/deepmd/pt/infer/deep_eval.py @@ -0,0 +1,754 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.dpmodel.output_def import ( + ModelOutputDef, + OutputVariableCategory, + OutputVariableDef, +) +from deepmd.infer.deep_dipole import ( + DeepDipole, +) +from deepmd.infer.deep_dos import ( + DeepDOS, +) +from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper +from deepmd.infer.deep_eval import ( + DeepEvalBackend, +) +from deepmd.infer.deep_polar import ( + DeepGlobalPolar, + DeepPolar, +) +from deepmd.infer.deep_pot import ( + DeepPot, +) +from deepmd.infer.deep_wfc import ( + DeepWFC, +) +from deepmd.pt.model.model import ( + get_model, +) +from deepmd.pt.train.wrapper import ( + ModelWrapper, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.auto_batch_size import ( + AutoBatchSize, +) +from deepmd.pt.utils.env import ( + DEVICE, + GLOBAL_PT_FLOAT_PRECISION, +) +from deepmd.pt.utils.utils import ( + to_torch_tensor, +) + +if TYPE_CHECKING: + import ase.neighborlist + + +class DeepEval(DeepEvalBackend): + """PyTorch backend implementaion of DeepEval. + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + output_def : ModelOutputDef + The output definition of the model. + *args : list + Positional arguments. + auto_batch_size : bool or int or AutomaticBatchSize, default: False + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. + """ + + def __init__( + self, + model_file: str, + output_def: ModelOutputDef, + *args: List[Any], + auto_batch_size: Union[bool, int, AutoBatchSize] = True, + neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None, + head: Optional[str] = None, + **kwargs: Dict[str, Any], + ): + self.output_def = output_def + self.model_path = model_file + if str(self.model_path).endswith(".pt"): + state_dict = torch.load(model_file, map_location=env.DEVICE) + if "model" in state_dict: + state_dict = state_dict["model"] + self.input_param = state_dict["_extra_state"]["model_params"] + self.multi_task = "model_dict" in self.input_param + if self.multi_task: + model_keys = list(self.input_param["model_dict"].keys()) + assert ( + head is not None + ), f"Head must be set for multitask model! Available heads are: {model_keys}" + assert ( + head in model_keys + ), f"No head named {head} in model! Available heads are: {model_keys}" + self.input_param = self.input_param["model_dict"][head] + state_dict_head = {"_extra_state": state_dict["_extra_state"]} + for item in state_dict: + if f"model.{head}." in item: + state_dict_head[ + item.replace(f"model.{head}.", "model.Default.") + ] = state_dict[item].clone() + state_dict = state_dict_head + self.input_param["resuming"] = True + model = get_model(self.input_param).to(DEVICE) + model = torch.jit.script(model) + self.dp = ModelWrapper(model) + self.dp.load_state_dict(state_dict) + elif str(self.model_path).endswith(".pth"): + model = torch.jit.load(model_file, map_location=env.DEVICE) + self.dp = ModelWrapper(model) + else: + raise ValueError("Unknown model file format!") + self.rcut = self.dp.model["Default"].get_rcut() + self.type_map = self.dp.model["Default"].get_type_map() + if isinstance(auto_batch_size, bool): + if auto_batch_size: + self.auto_batch_size = AutoBatchSize() + else: + self.auto_batch_size = None + elif isinstance(auto_batch_size, int): + self.auto_batch_size = AutoBatchSize(auto_batch_size) + elif isinstance(auto_batch_size, AutoBatchSize): + self.auto_batch_size = auto_batch_size + else: + raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + self._has_spin = getattr(self.dp.model["Default"], "has_spin", False) + if callable(self._has_spin): + self._has_spin = self._has_spin() + + def get_rcut(self) -> float: + """Get the cutoff radius of this model.""" + return self.rcut + + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" + return len(self.type_map) + + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" + return self.type_map + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" + return self.dp.model["Default"].get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" + return self.dp.model["Default"].get_dim_aparam() + + @property + def model_type(self) -> "DeepEvalWrapper": + """The the evaluator of the model type.""" + model_output_type = self.dp.model["Default"].model_output_type() + if "energy" in model_output_type: + return DeepPot + elif "dos" in model_output_type: + return DeepDOS + elif "dipole" in model_output_type: + return DeepDipole + elif "polar" in model_output_type: + return DeepPolar + elif "global_polar" in model_output_type: + return DeepGlobalPolar + elif "wfc" in model_output_type: + return DeepWFC + else: + raise RuntimeError("Unknown model type") + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.dp.model["Default"].get_sel_type() + + def get_numb_dos(self) -> int: + """Get the number of DOS.""" + return self.dp.model["Default"].get_numb_dos() + + def get_has_efield(self): + """Check if the model has efield.""" + return False + + def get_ntypes_spin(self): + """Get the number of spin atom types of this model. Only used in old implement.""" + return 0 + + def get_has_spin(self): + """Check if the model has spin atom types.""" + return self._has_spin + + def eval( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + atomic: bool = False, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + **kwargs: Dict[str, Any], + ) -> Dict[str, np.ndarray]: + """Evaluate the energy, force and virial by using this DP. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + Calculate the atomic energy and virial + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + **kwargs + Other parameters + + Returns + ------- + output_dict : dict + The output of the evaluation. The keys are the names of the output + variables, and the values are the corresponding output arrays. + """ + # convert all of the input to numpy array + atom_types = np.array(atom_types, dtype=np.int32) + coords = np.array(coords) + if cells is not None: + cells = np.array(cells) + natoms, numb_test = self._get_natoms_and_nframes( + coords, atom_types, len(atom_types.shape) > 1 + ) + request_defs = self._get_request_defs(atomic) + if "spin" not in kwargs or kwargs["spin"] is None: + out = self._eval_func(self._eval_model, numb_test, natoms)( + coords, cells, atom_types, fparam, aparam, request_defs + ) + else: + out = self._eval_func(self._eval_model_spin, numb_test, natoms)( + coords, + cells, + atom_types, + np.array(kwargs["spin"]), + fparam, + aparam, + request_defs, + ) + return dict( + zip( + [x.name for x in request_defs], + out, + ) + ) + + def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]: + """Get the requested output definitions. + + When atomic is True, all output_def are requested. + When atomic is False, only energy (tensor), force, and virial + are requested. + + Parameters + ---------- + atomic : bool + Whether to request the atomic output. + + Returns + ------- + list[OutputVariableDef] + The requested output definitions. + """ + if atomic: + return list(self.output_def.var_defs.values()) + else: + return [ + x + for x in self.output_def.var_defs.values() + if x.category + in ( + OutputVariableCategory.OUT, + OutputVariableCategory.REDU, + OutputVariableCategory.DERV_R, + OutputVariableCategory.DERV_C_REDU, + ) + ] + + def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: + """Wrapper method with auto batch size. + + Parameters + ---------- + inner_func : Callable + the method to be wrapped + numb_test : int + number of tests + natoms : int + number of atoms + + Returns + ------- + Callable + the wrapper + """ + if self.auto_batch_size is not None: + + def eval_func(*args, **kwargs): + return self.auto_batch_size.execute_all( + inner_func, numb_test, natoms, *args, **kwargs + ) + + else: + eval_func = inner_func + return eval_func + + def _get_natoms_and_nframes( + self, + coords: np.ndarray, + atom_types: np.ndarray, + mixed_type: bool = False, + ) -> Tuple[int, int]: + if mixed_type: + natoms = len(atom_types[0]) + else: + natoms = len(atom_types) + if natoms == 0: + assert coords.size == 0 + else: + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + return natoms, nframes + + def _eval_model( + self, + coords: np.ndarray, + cells: Optional[np.ndarray], + atom_types: np.ndarray, + fparam: Optional[np.ndarray], + aparam: Optional[np.ndarray], + request_defs: List[OutputVariableDef], + ): + model = self.dp.to(DEVICE) + + nframes = coords.shape[0] + if len(atom_types.shape) == 1: + natoms = len(atom_types) + atom_types = np.tile(atom_types, nframes).reshape(nframes, -1) + else: + natoms = len(atom_types[0]) + + coord_input = torch.tensor( + coords.reshape([-1, natoms, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE) + if cells is not None: + box_input = torch.tensor( + cells.reshape([-1, 3, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + else: + box_input = None + if fparam is not None: + fparam_input = to_torch_tensor(fparam.reshape(-1, self.get_dim_fparam())) + else: + fparam_input = None + if aparam is not None: + aparam_input = to_torch_tensor( + aparam.reshape(-1, natoms, self.get_dim_aparam()) + ) + else: + aparam_input = None + do_atomic_virial = any( + x.category == OutputVariableCategory.DERV_C for x in request_defs + ) + batch_output = model( + coord_input, + type_input, + box=box_input, + do_atomic_virial=do_atomic_virial, + fparam=fparam_input, + aparam=aparam_input, + ) + if isinstance(batch_output, tuple): + batch_output = batch_output[0] + + results = [] + for odef in request_defs: + pt_name = self._OUTDEF_DP2BACKEND[odef.name] + if pt_name in batch_output: + shape = self._get_output_shape(odef, nframes, natoms) + out = batch_output[pt_name].reshape(shape).detach().cpu().numpy() + results.append(out) + else: + shape = self._get_output_shape(odef, nframes, natoms) + results.append(np.full(np.abs(shape), np.nan)) # this is kinda hacky + return tuple(results) + + def _eval_model_spin( + self, + coords: np.ndarray, + cells: Optional[np.ndarray], + atom_types: np.ndarray, + spins: np.ndarray, + fparam: Optional[np.ndarray], + aparam: Optional[np.ndarray], + request_defs: List[OutputVariableDef], + ): + model = self.dp.to(DEVICE) + + nframes = coords.shape[0] + if len(atom_types.shape) == 1: + natoms = len(atom_types) + atom_types = np.tile(atom_types, nframes).reshape(nframes, -1) + else: + natoms = len(atom_types[0]) + + coord_input = torch.tensor( + coords.reshape([-1, natoms, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE) + spin_input = torch.tensor( + spins.reshape([-1, natoms, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + if cells is not None: + box_input = torch.tensor( + cells.reshape([-1, 3, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + else: + box_input = None + if fparam is not None: + fparam_input = to_torch_tensor(fparam.reshape(-1, self.get_dim_fparam())) + else: + fparam_input = None + if aparam is not None: + aparam_input = to_torch_tensor( + aparam.reshape(-1, natoms, self.get_dim_aparam()) + ) + else: + aparam_input = None + + do_atomic_virial = any( + x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs + ) + batch_output = model( + coord_input, + type_input, + spin=spin_input, + box=box_input, + do_atomic_virial=do_atomic_virial, + fparam=fparam_input, + aparam=aparam_input, + ) + if isinstance(batch_output, tuple): + batch_output = batch_output[0] + + results = [] + for odef in request_defs: + pt_name = self._OUTDEF_DP2BACKEND[odef.name] + if pt_name in batch_output: + shape = self._get_output_shape(odef, nframes, natoms) + out = batch_output[pt_name].reshape(shape).detach().cpu().numpy() + results.append(out) + else: + shape = self._get_output_shape(odef, nframes, natoms) + results.append(np.full(np.abs(shape), np.nan)) # this is kinda hacky + return tuple(results) + + def _get_output_shape(self, odef, nframes, natoms): + if odef.category == OutputVariableCategory.DERV_C_REDU: + # virial + return [nframes, *odef.shape[:-1], 9] + elif odef.category == OutputVariableCategory.REDU: + # energy + return [nframes, *odef.shape, 1] + elif odef.category == OutputVariableCategory.DERV_C: + # atom_virial + return [nframes, *odef.shape[:-1], natoms, 9] + elif odef.category == OutputVariableCategory.DERV_R: + # force + return [nframes, *odef.shape[:-1], natoms, 3] + elif odef.category == OutputVariableCategory.OUT: + # atom_energy, atom_tensor + # Something wrong here? + # return [nframes, *shape, natoms, 1] + return [nframes, natoms, *odef.shape, 1] + else: + raise RuntimeError("unknown category") + + +# For tests only +def eval_model( + model, + coords: Union[np.ndarray, torch.Tensor], + cells: Optional[Union[np.ndarray, torch.Tensor]], + atom_types: Union[np.ndarray, torch.Tensor, List[int]], + spins: Optional[Union[np.ndarray, torch.Tensor]] = None, + atomic: bool = False, + infer_batch_size: int = 2, + denoise: bool = False, +): + model = model.to(DEVICE) + energy_out = [] + atomic_energy_out = [] + force_out = [] + force_mag_out = [] + virial_out = [] + atomic_virial_out = [] + updated_coord_out = [] + logits_out = [] + err_msg = ( + f"All inputs should be the same format, " + f"but found {type(coords)}, {type(cells)}, {type(atom_types)} instead! " + ) + return_tensor = True + if isinstance(coords, torch.Tensor): + if cells is not None: + assert isinstance(cells, torch.Tensor), err_msg + if spins is not None: + assert isinstance(spins, torch.Tensor), err_msg + assert isinstance(atom_types, torch.Tensor) or isinstance(atom_types, list) + atom_types = torch.tensor(atom_types, dtype=torch.long, device=DEVICE) + elif isinstance(coords, np.ndarray): + if cells is not None: + assert isinstance(cells, np.ndarray), err_msg + if spins is not None: + assert isinstance(spins, np.ndarray), err_msg + assert isinstance(atom_types, np.ndarray) or isinstance(atom_types, list) + atom_types = np.array(atom_types, dtype=np.int32) + return_tensor = False + + nframes = coords.shape[0] + if len(atom_types.shape) == 1: + natoms = len(atom_types) + if isinstance(atom_types, torch.Tensor): + atom_types = torch.tile(atom_types.unsqueeze(0), [nframes, 1]).reshape( + nframes, -1 + ) + else: + atom_types = np.tile(atom_types, nframes).reshape(nframes, -1) + else: + natoms = len(atom_types[0]) + + coord_input = torch.tensor( + coords.reshape([-1, natoms, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + spin_input = None + if spins is not None: + spin_input = torch.tensor( + spins.reshape([-1, natoms, 3]), + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=DEVICE, + ) + has_spin = getattr(model, "has_spin", False) + if callable(has_spin): + has_spin = has_spin() + type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE) + box_input = None + if cells is None: + pbc = False + else: + pbc = True + box_input = torch.tensor( + cells.reshape([-1, 3, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + num_iter = int((nframes + infer_batch_size - 1) / infer_batch_size) + + for ii in range(num_iter): + batch_coord = coord_input[ii * infer_batch_size : (ii + 1) * infer_batch_size] + batch_atype = type_input[ii * infer_batch_size : (ii + 1) * infer_batch_size] + batch_box = None + batch_spin = None + if spin_input is not None: + batch_spin = spin_input[ii * infer_batch_size : (ii + 1) * infer_batch_size] + if pbc: + batch_box = box_input[ii * infer_batch_size : (ii + 1) * infer_batch_size] + input_dict = { + "coord": batch_coord, + "atype": batch_atype, + "box": batch_box, + "do_atomic_virial": atomic, + } + if has_spin: + input_dict["spin"] = batch_spin + batch_output = model(**input_dict) + if isinstance(batch_output, tuple): + batch_output = batch_output[0] + if not return_tensor: + if "energy" in batch_output: + energy_out.append(batch_output["energy"].detach().cpu().numpy()) + if "atom_energy" in batch_output: + atomic_energy_out.append( + batch_output["atom_energy"].detach().cpu().numpy() + ) + if "force" in batch_output: + force_out.append(batch_output["force"].detach().cpu().numpy()) + if "force_mag" in batch_output: + force_mag_out.append(batch_output["force_mag"].detach().cpu().numpy()) + if "virial" in batch_output: + virial_out.append(batch_output["virial"].detach().cpu().numpy()) + if "atom_virial" in batch_output: + atomic_virial_out.append( + batch_output["atom_virial"].detach().cpu().numpy() + ) + if "updated_coord" in batch_output: + updated_coord_out.append( + batch_output["updated_coord"].detach().cpu().numpy() + ) + if "logits" in batch_output: + logits_out.append(batch_output["logits"].detach().cpu().numpy()) + else: + if "energy" in batch_output: + energy_out.append(batch_output["energy"]) + if "atom_energy" in batch_output: + atomic_energy_out.append(batch_output["atom_energy"]) + if "force" in batch_output: + force_out.append(batch_output["force"]) + if "force_mag" in batch_output: + force_mag_out.append(batch_output["force_mag"]) + if "virial" in batch_output: + virial_out.append(batch_output["virial"]) + if "atom_virial" in batch_output: + atomic_virial_out.append(batch_output["atom_virial"]) + if "updated_coord" in batch_output: + updated_coord_out.append(batch_output["updated_coord"]) + if "logits" in batch_output: + logits_out.append(batch_output["logits"]) + if not return_tensor: + energy_out = ( + np.concatenate(energy_out) if energy_out else np.zeros([nframes, 1]) + ) + atomic_energy_out = ( + np.concatenate(atomic_energy_out) + if atomic_energy_out + else np.zeros([nframes, natoms, 1]) + ) + force_out = ( + np.concatenate(force_out) if force_out else np.zeros([nframes, natoms, 3]) + ) + force_mag_out = ( + np.concatenate(force_mag_out) + if force_mag_out + else np.zeros([nframes, natoms, 3]) + ) + virial_out = ( + np.concatenate(virial_out) if virial_out else np.zeros([nframes, 3, 3]) + ) + atomic_virial_out = ( + np.concatenate(atomic_virial_out) + if atomic_virial_out + else np.zeros([nframes, natoms, 3, 3]) + ) + updated_coord_out = ( + np.concatenate(updated_coord_out) if updated_coord_out else None + ) + logits_out = np.concatenate(logits_out) if logits_out else None + else: + energy_out = ( + torch.cat(energy_out) + if energy_out + else torch.zeros( + [nframes, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + atomic_energy_out = ( + torch.cat(atomic_energy_out) + if atomic_energy_out + else torch.zeros( + [nframes, natoms, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + force_out = ( + torch.cat(force_out) + if force_out + else torch.zeros( + [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + force_mag_out = ( + torch.cat(force_mag_out) + if force_mag_out + else torch.zeros( + [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + virial_out = ( + torch.cat(virial_out) + if virial_out + else torch.zeros( + [nframes, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + atomic_virial_out = ( + torch.cat(atomic_virial_out) + if atomic_virial_out + else torch.zeros( + [nframes, natoms, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE + ) + ) + updated_coord_out = torch.cat(updated_coord_out) if updated_coord_out else None + logits_out = torch.cat(logits_out) if logits_out else None + if denoise: + return updated_coord_out, logits_out + else: + results_dict = { + "energy": energy_out, + "force": force_out, + "virial": virial_out, + } + if has_spin: + results_dict["force_mag"] = force_mag_out + if atomic: + results_dict["atom_energy"] = atomic_energy_out + results_dict["atom_virial"] = atomic_virial_out + return results_dict diff --git a/deepmd/pt/infer/inference.py b/deepmd/pt/infer/inference.py new file mode 100644 index 0000000000..6c13b363bc --- /dev/null +++ b/deepmd/pt/infer/inference.py @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from copy import ( + deepcopy, +) + +import torch + +from deepmd.pt.model.model import ( + get_model, +) +from deepmd.pt.train.wrapper import ( + ModelWrapper, +) +from deepmd.pt.utils.env import ( + DEVICE, + JIT, +) + +if torch.__version__.startswith("2"): + import torch._dynamo +log = logging.getLogger(__name__) + + +class Tester: + def __init__( + self, + model_ckpt, + head=None, + ): + """Construct a DeePMD tester. + + Args: + - config: The Dict-like configuration with training options. + """ + # Model + state_dict = torch.load(model_ckpt, map_location=DEVICE) + if "model" in state_dict: + state_dict = state_dict["model"] + model_params = state_dict["_extra_state"]["model_params"] + self.multi_task = "model_dict" in model_params + if self.multi_task: + assert head is not None, "Head must be specified in multitask mode!" + self.head = head + assert head in model_params["model_dict"], ( + f"Specified head {head} not found in model {model_ckpt}! " + f"Available ones are {list(model_params['model_dict'].keys())}." + ) + model_params = model_params["model_dict"][head] + state_dict_head = {"_extra_state": state_dict["_extra_state"]} + for item in state_dict: + if f"model.{head}." in item: + state_dict_head[ + item.replace(f"model.{head}.", "model.Default.") + ] = state_dict[item].clone() + state_dict = state_dict_head + + self.model_params = deepcopy(model_params) + model_params["resuming"] = True + self.model = get_model(model_params).to(DEVICE) + + # Model Wrapper + self.wrapper = ModelWrapper(self.model) # inference only + if JIT: + self.wrapper = torch.jit.script(self.wrapper) + self.wrapper.load_state_dict(state_dict) diff --git a/deepmd/pt/loss/__init__.py b/deepmd/pt/loss/__init__.py new file mode 100644 index 0000000000..e64a129d51 --- /dev/null +++ b/deepmd/pt/loss/__init__.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .denoise import ( + DenoiseLoss, +) +from .dos import ( + DOSLoss, +) +from .ener import ( + EnergyStdLoss, +) +from .ener_spin import ( + EnergySpinLoss, +) +from .loss import ( + TaskLoss, +) +from .tensor import ( + TensorLoss, +) + +__all__ = [ + "DenoiseLoss", + "EnergyStdLoss", + "EnergySpinLoss", + "TensorLoss", + "TaskLoss", + "DOSLoss", +] diff --git a/deepmd/pt/loss/denoise.py b/deepmd/pt/loss/denoise.py new file mode 100644 index 0000000000..57691558cb --- /dev/null +++ b/deepmd/pt/loss/denoise.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch +import torch.nn.functional as F + +from deepmd.pt.loss.loss import ( + TaskLoss, +) +from deepmd.pt.utils import ( + env, +) + + +class DenoiseLoss(TaskLoss): + def __init__( + self, + ntypes, + masked_token_loss=1.0, + masked_coord_loss=1.0, + norm_loss=0.01, + use_l1=True, + beta=1.00, + mask_loss_coord=True, + mask_loss_token=True, + **kwargs, + ): + """Construct a layer to compute loss on coord, and type reconstruction.""" + super().__init__() + self.ntypes = ntypes + self.masked_token_loss = masked_token_loss + self.masked_coord_loss = masked_coord_loss + self.norm_loss = norm_loss + self.has_coord = self.masked_coord_loss > 0.0 + self.has_token = self.masked_token_loss > 0.0 + self.has_norm = self.norm_loss > 0.0 + self.use_l1 = use_l1 + self.beta = beta + self.frac_beta = 1.00 / self.beta + self.mask_loss_coord = mask_loss_coord + self.mask_loss_token = mask_loss_token + + def forward(self, model_pred, label, natoms, learning_rate, mae=False): + """Return loss on coord and type denoise. + + Returns + ------- + - loss: Loss to minimize. + """ + updated_coord = model_pred["updated_coord"] + logits = model_pred["logits"] + clean_coord = label["clean_coord"] + clean_type = label["clean_type"] + coord_mask = label["coord_mask"] + type_mask = label["type_mask"] + + loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0] + more_loss = {} + if self.has_coord: + if self.mask_loss_coord: + masked_updated_coord = updated_coord[coord_mask] + masked_clean_coord = clean_coord[coord_mask] + if masked_updated_coord.size(0) > 0: + coord_loss = F.smooth_l1_loss( + masked_updated_coord.view(-1, 3), + masked_clean_coord.view(-1, 3), + reduction="mean", + beta=self.beta, + ) + else: + coord_loss = torch.zeros( + 1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + )[0] + else: + coord_loss = F.smooth_l1_loss( + updated_coord.view(-1, 3), + clean_coord.view(-1, 3), + reduction="mean", + beta=self.beta, + ) + loss += self.masked_coord_loss * coord_loss + more_loss["coord_l1_error"] = coord_loss.detach() + if self.has_token: + if self.mask_loss_token: + masked_logits = logits[type_mask] + masked_target = clean_type[type_mask] + if masked_logits.size(0) > 0: + token_loss = F.nll_loss( + F.log_softmax(masked_logits, dim=-1), + masked_target, + reduction="mean", + ) + else: + token_loss = torch.zeros( + 1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + )[0] + else: + token_loss = F.nll_loss( + F.log_softmax(logits.view(-1, self.ntypes - 1), dim=-1), + clean_type.view(-1), + reduction="mean", + ) + loss += self.masked_token_loss * token_loss + more_loss["token_error"] = token_loss.detach() + if self.has_norm: + norm_x = model_pred["norm_x"] + norm_delta_pair_rep = model_pred["norm_delta_pair_rep"] + loss += self.norm_loss * (norm_x + norm_delta_pair_rep) + more_loss["norm_loss"] = norm_x.detach() + norm_delta_pair_rep.detach() + + return loss, more_loss diff --git a/deepmd/pt/loss/dos.py b/deepmd/pt/loss/dos.py new file mode 100644 index 0000000000..7fd2e04ff2 --- /dev/null +++ b/deepmd/pt/loss/dos.py @@ -0,0 +1,256 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + +import torch + +from deepmd.pt.loss.loss import ( + TaskLoss, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.utils.data import ( + DataRequirementItem, +) + + +class DOSLoss(TaskLoss): + def __init__( + self, + starter_learning_rate: float, + numb_dos: int, + start_pref_dos: float = 1.00, + limit_pref_dos: float = 1.00, + start_pref_cdf: float = 1000, + limit_pref_cdf: float = 1.00, + start_pref_ados: float = 0.0, + limit_pref_ados: float = 0.0, + start_pref_acdf: float = 0.0, + limit_pref_acdf: float = 0.0, + inference=False, + **kwargs, + ): + r"""Construct a loss for local and global tensors. + + Parameters + ---------- + tensor_name : str + The name of the tensor in the model predictions to compute the loss. + tensor_size : int + The size (dimension) of the tensor. + label_name : str + The name of the tensor in the labels to compute the loss. + pref_atomic : float + The prefactor of the weight of atomic loss. It should be larger than or equal to 0. + pref : float + The prefactor of the weight of global loss. It should be larger than or equal to 0. + inference : bool + If true, it will output all losses found in output, ignoring the pre-factors. + **kwargs + Other keyword arguments. + """ + super().__init__() + self.starter_learning_rate = starter_learning_rate + self.numb_dos = numb_dos + self.inference = inference + + self.start_pref_dos = start_pref_dos + self.limit_pref_dos = limit_pref_dos + self.start_pref_cdf = start_pref_cdf + self.limit_pref_cdf = limit_pref_cdf + + self.start_pref_ados = start_pref_ados + self.limit_pref_ados = limit_pref_ados + self.start_pref_acdf = start_pref_acdf + self.limit_pref_acdf = limit_pref_acdf + + assert ( + self.start_pref_dos >= 0.0 + and self.limit_pref_dos >= 0.0 + and self.start_pref_cdf >= 0.0 + and self.limit_pref_cdf >= 0.0 + and self.start_pref_ados >= 0.0 + and self.limit_pref_ados >= 0.0 + and self.start_pref_acdf >= 0.0 + and self.limit_pref_acdf >= 0.0 + ), "Can not assign negative weight to `pref` and `pref_atomic`" + + self.has_dos = (start_pref_dos != 0.0 and limit_pref_dos != 0.0) or inference + self.has_cdf = (start_pref_cdf != 0.0 and limit_pref_cdf != 0.0) or inference + self.has_ados = (start_pref_ados != 0.0 and limit_pref_ados != 0.0) or inference + self.has_acdf = (start_pref_acdf != 0.0 and limit_pref_acdf != 0.0) or inference + + assert ( + self.has_dos or self.has_cdf or self.has_ados or self.has_acdf + ), AssertionError("Can not assian zero weight both to `pref` and `pref_atomic`") + + def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False): + """Return loss on local and global tensors. + + Parameters + ---------- + input_dict : dict[str, torch.Tensor] + Model inputs. + model : torch.nn.Module + Model to be used to output the predictions. + label : dict[str, torch.Tensor] + Labels. + natoms : int + The local atom number. + + Returns + ------- + model_pred: dict[str, torch.Tensor] + Model predictions. + loss: torch.Tensor + Loss for model to minimize. + more_loss: dict[str, torch.Tensor] + Other losses for display. + """ + model_pred = model(**input_dict) + + coef = learning_rate / self.starter_learning_rate + pref_dos = ( + self.limit_pref_dos + (self.start_pref_dos - self.limit_pref_dos) * coef + ) + pref_cdf = ( + self.limit_pref_cdf + (self.start_pref_cdf - self.limit_pref_cdf) * coef + ) + pref_ados = ( + self.limit_pref_ados + (self.start_pref_ados - self.limit_pref_ados) * coef + ) + pref_acdf = ( + self.limit_pref_acdf + (self.start_pref_acdf - self.limit_pref_acdf) * coef + ) + + loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0] + more_loss = {} + if self.has_ados and "atom_dos" in model_pred and "atom_dos" in label: + find_local = label.get("find_atom_dos", 0.0) + pref_ados = pref_ados * find_local + local_tensor_pred_dos = model_pred["atom_dos"].reshape( + [-1, natoms, self.numb_dos] + ) + local_tensor_label_dos = label["atom_dos"].reshape( + [-1, natoms, self.numb_dos] + ) + diff = (local_tensor_pred_dos - local_tensor_label_dos).reshape( + [-1, self.numb_dos] + ) + if "mask" in model_pred: + diff = diff[model_pred["mask"].reshape([-1]).bool()] + l2_local_loss_dos = torch.mean(torch.square(diff)) + if not self.inference: + more_loss["l2_local_dos_loss"] = self.display_if_exist( + l2_local_loss_dos.detach(), find_local + ) + loss += pref_ados * l2_local_loss_dos + rmse_local_dos = l2_local_loss_dos.sqrt() + more_loss["rmse_local_dos"] = self.display_if_exist( + rmse_local_dos.detach(), find_local + ) + if self.has_acdf and "atom_dos" in model_pred and "atom_dos" in label: + find_local = label.get("find_atom_dos", 0.0) + pref_acdf = pref_acdf * find_local + local_tensor_pred_cdf = torch.cusum( + model_pred["atom_dos"].reshape([-1, natoms, self.numb_dos]), dim=-1 + ) + local_tensor_label_cdf = torch.cusum( + label["atom_dos"].reshape([-1, natoms, self.numb_dos]), dim=-1 + ) + diff = (local_tensor_pred_cdf - local_tensor_label_cdf).reshape( + [-1, self.numb_dos] + ) + if "mask" in model_pred: + diff = diff[model_pred["mask"].reshape([-1]).bool()] + l2_local_loss_cdf = torch.mean(torch.square(diff)) + if not self.inference: + more_loss["l2_local_cdf_loss"] = self.display_if_exist( + l2_local_loss_cdf.detach(), find_local + ) + loss += pref_acdf * l2_local_loss_cdf + rmse_local_cdf = l2_local_loss_cdf.sqrt() + more_loss["rmse_local_cdf"] = self.display_if_exist( + rmse_local_cdf.detach(), find_local + ) + if self.has_dos and "dos" in model_pred and "dos" in label: + find_global = label.get("find_dos", 0.0) + pref_dos = pref_dos * find_global + global_tensor_pred_dos = model_pred["dos"].reshape([-1, self.numb_dos]) + global_tensor_label_dos = label["dos"].reshape([-1, self.numb_dos]) + diff = global_tensor_pred_dos - global_tensor_label_dos + if "mask" in model_pred: + atom_num = model_pred["mask"].sum(-1, keepdim=True) + l2_global_loss_dos = torch.mean( + torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum() + ) + atom_num = torch.mean(atom_num.float()) + else: + atom_num = natoms + l2_global_loss_dos = torch.mean(torch.square(diff)) + if not self.inference: + more_loss["l2_global_dos_loss"] = self.display_if_exist( + l2_global_loss_dos.detach(), find_global + ) + loss += pref_dos * l2_global_loss_dos + rmse_global_dos = l2_global_loss_dos.sqrt() / atom_num + more_loss["rmse_global_dos"] = self.display_if_exist( + rmse_global_dos.detach(), find_global + ) + if self.has_cdf and "dos" in model_pred and "dos" in label: + find_global = label.get("find_dos", 0.0) + pref_cdf = pref_cdf * find_global + global_tensor_pred_cdf = torch.cusum( + model_pred["dos"].reshape([-1, self.numb_dos]), dim=-1 + ) + global_tensor_label_cdf = torch.cusum( + label["dos"].reshape([-1, self.numb_dos]), dim=-1 + ) + diff = global_tensor_pred_cdf - global_tensor_label_cdf + if "mask" in model_pred: + atom_num = model_pred["mask"].sum(-1, keepdim=True) + l2_global_loss_cdf = torch.mean( + torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum() + ) + atom_num = torch.mean(atom_num.float()) + else: + atom_num = natoms + l2_global_loss_cdf = torch.mean(torch.square(diff)) + if not self.inference: + more_loss["l2_global_cdf_loss"] = self.display_if_exist( + l2_global_loss_cdf.detach(), find_global + ) + loss += pref_cdf * l2_global_loss_cdf + rmse_global_dos = l2_global_loss_cdf.sqrt() / atom_num + more_loss["rmse_global_cdf"] = self.display_if_exist( + rmse_global_dos.detach(), find_global + ) + return model_pred, loss, more_loss + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + label_requirement = [] + if self.has_ados or self.has_acdf: + label_requirement.append( + DataRequirementItem( + "atom_dos", + ndof=self.numb_dos, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_dos or self.has_cdf: + label_requirement.append( + DataRequirementItem( + "dos", + ndof=self.numb_dos, + atomic=False, + must=False, + high_prec=False, + ) + ) + return label_requirement diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py new file mode 100644 index 0000000000..ccc23b690c --- /dev/null +++ b/deepmd/pt/loss/ener.py @@ -0,0 +1,291 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + +import torch +import torch.nn.functional as F + +from deepmd.pt.loss.loss import ( + TaskLoss, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + GLOBAL_PT_FLOAT_PRECISION, +) +from deepmd.utils.data import ( + DataRequirementItem, +) + + +class EnergyStdLoss(TaskLoss): + def __init__( + self, + starter_learning_rate=1.0, + start_pref_e=0.0, + limit_pref_e=0.0, + start_pref_f=0.0, + limit_pref_f=0.0, + start_pref_v=0.0, + limit_pref_v=0.0, + start_pref_ae: float = 0.0, + limit_pref_ae: float = 0.0, + start_pref_pf: float = 0.0, + limit_pref_pf: float = 0.0, + use_l1_all: bool = False, + inference=False, + **kwargs, + ): + r"""Construct a layer to compute loss on energy, force and virial. + + Parameters + ---------- + starter_learning_rate : float + The learning rate at the start of the training. + start_pref_e : float + The prefactor of energy loss at the start of the training. + limit_pref_e : float + The prefactor of energy loss at the end of the training. + start_pref_f : float + The prefactor of force loss at the start of the training. + limit_pref_f : float + The prefactor of force loss at the end of the training. + start_pref_v : float + The prefactor of virial loss at the start of the training. + limit_pref_v : float + The prefactor of virial loss at the end of the training. + start_pref_ae : float + The prefactor of atomic energy loss at the start of the training. + limit_pref_ae : float + The prefactor of atomic energy loss at the end of the training. + start_pref_pf : float + The prefactor of atomic prefactor force loss at the start of the training. + limit_pref_pf : float + The prefactor of atomic prefactor force loss at the end of the training. + use_l1_all : bool + Whether to use L1 loss, if False (default), it will use L2 loss. + inference : bool + If true, it will output all losses found in output, ignoring the pre-factors. + **kwargs + Other keyword arguments. + """ + super().__init__() + self.starter_learning_rate = starter_learning_rate + self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference + self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference + self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference + + # TODO EnergyStdLoss need support for atomic energy and atomic pref + self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference + self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference + + self.start_pref_e = start_pref_e + self.limit_pref_e = limit_pref_e + self.start_pref_f = start_pref_f + self.limit_pref_f = limit_pref_f + self.start_pref_v = start_pref_v + self.limit_pref_v = limit_pref_v + self.use_l1_all = use_l1_all + self.inference = inference + + def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): + """Return loss on energy and force. + + Parameters + ---------- + input_dict : dict[str, torch.Tensor] + Model inputs. + model : torch.nn.Module + Model to be used to output the predictions. + label : dict[str, torch.Tensor] + Labels. + natoms : int + The local atom number. + + Returns + ------- + model_pred: dict[str, torch.Tensor] + Model predictions. + loss: torch.Tensor + Loss for model to minimize. + more_loss: dict[str, torch.Tensor] + Other losses for display. + """ + model_pred = model(**input_dict) + coef = learning_rate / self.starter_learning_rate + pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef + pref_f = self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * coef + pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef + loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0] + more_loss = {} + # more_loss['log_keys'] = [] # showed when validation on the fly + # more_loss['test_keys'] = [] # showed when doing dp test + atom_norm = 1.0 / natoms + if self.has_e and "energy" in model_pred and "energy" in label: + find_energy = label.get("find_energy", 0.0) + pref_e = pref_e * find_energy + if not self.use_l1_all: + l2_ener_loss = torch.mean( + torch.square(model_pred["energy"] - label["energy"]) + ) + if not self.inference: + more_loss["l2_ener_loss"] = self.display_if_exist( + l2_ener_loss.detach(), find_energy + ) + loss += atom_norm * (pref_e * l2_ener_loss) + rmse_e = l2_ener_loss.sqrt() * atom_norm + more_loss["rmse_e"] = self.display_if_exist( + rmse_e.detach(), find_energy + ) + # more_loss['log_keys'].append('rmse_e') + else: # use l1 and for all atoms + l1_ener_loss = F.l1_loss( + model_pred["energy"].reshape(-1), + label["energy"].reshape(-1), + reduction="sum", + ) + loss += pref_e * l1_ener_loss + more_loss["mae_e"] = self.display_if_exist( + F.l1_loss( + model_pred["energy"].reshape(-1), + label["energy"].reshape(-1), + reduction="mean", + ).detach(), + find_energy, + ) + # more_loss['log_keys'].append('rmse_e') + if mae: + mae_e = ( + torch.mean(torch.abs(model_pred["energy"] - label["energy"])) + * atom_norm + ) + more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy) + mae_e_all = torch.mean( + torch.abs(model_pred["energy"] - label["energy"]) + ) + more_loss["mae_e_all"] = self.display_if_exist( + mae_e_all.detach(), find_energy + ) + + if self.has_f and "force" in model_pred and "force" in label: + find_force = label.get("find_force", 0.0) + pref_f = pref_f * find_force + if "force_target_mask" in model_pred: + force_target_mask = model_pred["force_target_mask"] + else: + force_target_mask = None + if not self.use_l1_all: + if force_target_mask is not None: + diff_f = (label["force"] - model_pred["force"]) * force_target_mask + force_cnt = force_target_mask.squeeze(-1).sum(-1) + l2_force_loss = torch.mean( + torch.square(diff_f).mean(-1).sum(-1) / force_cnt + ) + else: + diff_f = label["force"] - model_pred["force"] + l2_force_loss = torch.mean(torch.square(diff_f)) + if not self.inference: + more_loss["l2_force_loss"] = self.display_if_exist( + l2_force_loss.detach(), find_force + ) + loss += (pref_f * l2_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) + rmse_f = l2_force_loss.sqrt() + more_loss["rmse_f"] = self.display_if_exist(rmse_f.detach(), find_force) + else: + l1_force_loss = F.l1_loss( + label["force"], model_pred["force"], reduction="none" + ) + if force_target_mask is not None: + l1_force_loss *= force_target_mask + force_cnt = force_target_mask.squeeze(-1).sum(-1) + more_loss["mae_f"] = self.display_if_exist( + (l1_force_loss.mean(-1).sum(-1) / force_cnt).mean(), find_force + ) + l1_force_loss = (l1_force_loss.sum(-1).sum(-1) / force_cnt).sum() + else: + more_loss["mae_f"] = self.display_if_exist( + l1_force_loss.mean().detach(), find_force + ) + l1_force_loss = l1_force_loss.sum(-1).mean(-1).sum() + loss += (pref_f * l1_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) + if mae: + mae_f = torch.mean(torch.abs(diff_f)) + more_loss["mae_f"] = self.display_if_exist(mae_f.detach(), find_force) + + if self.has_v and "virial" in model_pred and "virial" in label: + find_virial = label.get("find_virial", 0.0) + pref_v = pref_v * find_virial + diff_v = label["virial"] - model_pred["virial"].reshape(-1, 9) + l2_virial_loss = torch.mean(torch.square(diff_v)) + if not self.inference: + more_loss["l2_virial_loss"] = self.display_if_exist( + l2_virial_loss.detach(), find_virial + ) + loss += atom_norm * (pref_v * l2_virial_loss) + rmse_v = l2_virial_loss.sqrt() * atom_norm + more_loss["rmse_v"] = self.display_if_exist(rmse_v.detach(), find_virial) + if mae: + mae_v = torch.mean(torch.abs(diff_v)) * atom_norm + more_loss["mae_v"] = self.display_if_exist(mae_v.detach(), find_virial) + if not self.inference: + more_loss["rmse"] = torch.sqrt(loss.detach()) + return model_pred, loss, more_loss + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + label_requirement = [] + if self.has_e: + label_requirement.append( + DataRequirementItem( + "energy", + ndof=1, + atomic=False, + must=False, + high_prec=True, + ) + ) + if self.has_f: + label_requirement.append( + DataRequirementItem( + "force", + ndof=3, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_v: + label_requirement.append( + DataRequirementItem( + "virial", + ndof=9, + atomic=False, + must=False, + high_prec=False, + ) + ) + if self.has_ae: + label_requirement.append( + DataRequirementItem( + "atom_ener", + ndof=1, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_pf: + label_requirement.append( + DataRequirementItem( + "atom_pref", + ndof=1, + atomic=True, + must=False, + high_prec=False, + repeat=3, + ) + ) + return label_requirement diff --git a/deepmd/pt/loss/ener_spin.py b/deepmd/pt/loss/ener_spin.py new file mode 100644 index 0000000000..3bd81adf77 --- /dev/null +++ b/deepmd/pt/loss/ener_spin.py @@ -0,0 +1,281 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + +import torch +import torch.nn.functional as F + +from deepmd.pt.loss.loss import ( + TaskLoss, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + GLOBAL_PT_FLOAT_PRECISION, +) +from deepmd.utils.data import ( + DataRequirementItem, +) + + +class EnergySpinLoss(TaskLoss): + def __init__( + self, + starter_learning_rate=1.0, + start_pref_e=0.0, + limit_pref_e=0.0, + start_pref_fr=0.0, + limit_pref_fr=0.0, + start_pref_fm=0.0, + limit_pref_fm=0.0, + start_pref_v=0.0, + limit_pref_v=0.0, + start_pref_ae: float = 0.0, + limit_pref_ae: float = 0.0, + start_pref_pf: float = 0.0, + limit_pref_pf: float = 0.0, + use_l1_all: bool = False, + inference=False, + **kwargs, + ): + """Construct a layer to compute loss on energy, real force, magnetic force and virial.""" + super().__init__() + self.starter_learning_rate = starter_learning_rate + self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference + self.has_fr = (start_pref_fr != 0.0 and limit_pref_fr != 0.0) or inference + self.has_fm = (start_pref_fm != 0.0 and limit_pref_fm != 0.0) or inference + + # TODO EnergySpinLoss needs support for virial, atomic energy and atomic pref + self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference + self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference + self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference + + self.start_pref_e = start_pref_e + self.limit_pref_e = limit_pref_e + self.start_pref_fr = start_pref_fr + self.limit_pref_fr = limit_pref_fr + self.start_pref_fm = start_pref_fm + self.limit_pref_fm = limit_pref_fm + self.start_pref_v = start_pref_v + self.limit_pref_v = limit_pref_v + self.use_l1_all = use_l1_all + self.inference = inference + + def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): + """Return energy loss with magnetic labels. + + Parameters + ---------- + input_dict : dict[str, torch.Tensor] + Model inputs. + model : torch.nn.Module + Model to be used to output the predictions. + label : dict[str, torch.Tensor] + Labels. + natoms : int + The local atom number. + + Returns + ------- + model_pred: dict[str, torch.Tensor] + Model predictions. + loss: torch.Tensor + Loss for model to minimize. + more_loss: dict[str, torch.Tensor] + Other losses for display. + """ + model_pred = model(**input_dict) + coef = learning_rate / self.starter_learning_rate + pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef + pref_fr = self.limit_pref_fr + (self.start_pref_fr - self.limit_pref_fr) * coef + pref_fm = self.limit_pref_fm + (self.start_pref_fm - self.limit_pref_fm) * coef + pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef + loss = torch.tensor(0.0, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE) + more_loss = {} + # more_loss['log_keys'] = [] # showed when validation on the fly + # more_loss['test_keys'] = [] # showed when doing dp test + atom_norm = 1.0 / natoms + if self.has_e and "energy" in model_pred and "energy" in label: + find_energy = label.get("find_energy", 0.0) + pref_e = pref_e * find_energy + if not self.use_l1_all: + l2_ener_loss = torch.mean( + torch.square(model_pred["energy"] - label["energy"]) + ) + if not self.inference: + more_loss["l2_ener_loss"] = self.display_if_exist( + l2_ener_loss.detach(), find_energy + ) + loss += atom_norm * (pref_e * l2_ener_loss) + rmse_e = l2_ener_loss.sqrt() * atom_norm + more_loss["rmse_e"] = self.display_if_exist( + rmse_e.detach(), find_energy + ) + # more_loss['log_keys'].append('rmse_e') + else: # use l1 and for all atoms + l1_ener_loss = F.l1_loss( + model_pred["energy"].reshape(-1), + label["energy"].reshape(-1), + reduction="sum", + ) + loss += pref_e * l1_ener_loss + more_loss["mae_e"] = self.display_if_exist( + F.l1_loss( + model_pred["energy"].reshape(-1), + label["energy"].reshape(-1), + reduction="mean", + ).detach(), + find_energy, + ) + # more_loss['log_keys'].append('rmse_e') + if mae: + mae_e = ( + torch.mean(torch.abs(model_pred["energy"] - label["energy"])) + * atom_norm + ) + more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy) + mae_e_all = torch.mean( + torch.abs(model_pred["energy"] - label["energy"]) + ) + more_loss["mae_e_all"] = self.display_if_exist( + mae_e_all.detach(), find_energy + ) + + if self.has_fr and "force" in model_pred and "force" in label: + find_force_r = label.get("find_force", 0.0) + pref_fr = pref_fr * find_force_r + if not self.use_l1_all: + diff_fr = label["force"] - model_pred["force"] + l2_force_real_loss = torch.mean(torch.square(diff_fr)) + if not self.inference: + more_loss["l2_force_r_loss"] = self.display_if_exist( + l2_force_real_loss.detach(), find_force_r + ) + loss += (pref_fr * l2_force_real_loss).to(GLOBAL_PT_FLOAT_PRECISION) + rmse_fr = l2_force_real_loss.sqrt() + more_loss["rmse_fr"] = self.display_if_exist( + rmse_fr.detach(), find_force_r + ) + if mae: + mae_fr = torch.mean(torch.abs(diff_fr)) + more_loss["mae_fr"] = self.display_if_exist( + mae_fr.detach(), find_force_r + ) + else: + l1_force_real_loss = F.l1_loss( + label["force"], model_pred["force"], reduction="none" + ) + more_loss["mae_fr"] = self.display_if_exist( + l1_force_real_loss.mean().detach(), find_force_r + ) + l1_force_real_loss = l1_force_real_loss.sum(-1).mean(-1).sum() + loss += (pref_fr * l1_force_real_loss).to(GLOBAL_PT_FLOAT_PRECISION) + + if self.has_fm and "force_mag" in model_pred and "force_mag" in label: + find_force_m = label.get("find_force_mag", 0.0) + pref_fm = pref_fm * find_force_m + nframes = model_pred["force_mag"].shape[0] + atomic_mask = model_pred["mask_mag"].expand([-1, -1, 3]) + label_force_mag = label["force_mag"][atomic_mask].view(nframes, -1, 3) + model_pred_force_mag = model_pred["force_mag"][atomic_mask].view( + nframes, -1, 3 + ) + if not self.use_l1_all: + diff_fm = label_force_mag - model_pred_force_mag + l2_force_mag_loss = torch.mean(torch.square(diff_fm)) + if not self.inference: + more_loss["l2_force_m_loss"] = self.display_if_exist( + l2_force_mag_loss.detach(), find_force_m + ) + loss += (pref_fm * l2_force_mag_loss).to(GLOBAL_PT_FLOAT_PRECISION) + rmse_fm = l2_force_mag_loss.sqrt() + more_loss["rmse_fm"] = self.display_if_exist( + rmse_fm.detach(), find_force_m + ) + if mae: + mae_fm = torch.mean(torch.abs(diff_fm)) + more_loss["mae_fm"] = self.display_if_exist( + mae_fm.detach(), find_force_m + ) + else: + l1_force_mag_loss = F.l1_loss( + label_force_mag, model_pred_force_mag, reduction="none" + ) + more_loss["mae_fm"] = self.display_if_exist( + l1_force_mag_loss.mean().detach(), find_force_m + ) + l1_force_mag_loss = l1_force_mag_loss.sum(-1).mean(-1).sum() + loss += (pref_fm * l1_force_mag_loss).to(GLOBAL_PT_FLOAT_PRECISION) + + if not self.inference: + more_loss["rmse"] = torch.sqrt(loss.detach()) + return model_pred, loss, more_loss + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + label_requirement = [] + if self.has_e: + label_requirement.append( + DataRequirementItem( + "energy", + ndof=1, + atomic=False, + must=False, + high_prec=True, + ) + ) + if self.has_fr: + label_requirement.append( + DataRequirementItem( + "force", + ndof=3, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_fm: + label_requirement.append( + DataRequirementItem( + "force_mag", + ndof=3, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_v: + label_requirement.append( + DataRequirementItem( + "virial", + ndof=9, + atomic=False, + must=False, + high_prec=False, + ) + ) + if self.has_ae: + label_requirement.append( + DataRequirementItem( + "atom_ener", + ndof=1, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_pf: + label_requirement.append( + DataRequirementItem( + "atom_pref", + ndof=1, + atomic=True, + must=False, + high_prec=False, + repeat=3, + ) + ) + return label_requirement diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py new file mode 100644 index 0000000000..7e26f6571a --- /dev/null +++ b/deepmd/pt/loss/loss.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + List, +) + +import torch + +from deepmd.utils.data import ( + DataRequirementItem, +) + + +class TaskLoss(torch.nn.Module, ABC): + def __init__(self, **kwargs): + """Construct loss.""" + super().__init__() + + def forward(self, input_dict, model, label, natoms, learning_rate): + """Return loss .""" + raise NotImplementedError + + @property + @abstractmethod + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + pass + + @staticmethod + def display_if_exist(loss: torch.Tensor, find_property: float) -> torch.Tensor: + """Display NaN if labeled property is not found. + + Parameters + ---------- + loss : torch.Tensor + the loss tensor + find_property : float + whether the property is found + """ + return loss if bool(find_property) else torch.nan diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py new file mode 100644 index 0000000000..3dd91d203e --- /dev/null +++ b/deepmd/pt/loss/tensor.py @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + +import torch + +from deepmd.pt.loss.loss import ( + TaskLoss, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.utils.data import ( + DataRequirementItem, +) + + +class TensorLoss(TaskLoss): + def __init__( + self, + tensor_name: str, + tensor_size: int, + label_name: str, + pref_atomic: float = 0.0, + pref: float = 0.0, + inference=False, + **kwargs, + ): + r"""Construct a loss for local and global tensors. + + Parameters + ---------- + tensor_name : str + The name of the tensor in the model predictions to compute the loss. + tensor_size : int + The size (dimension) of the tensor. + label_name : str + The name of the tensor in the labels to compute the loss. + pref_atomic : float + The prefactor of the weight of atomic loss. It should be larger than or equal to 0. + pref : float + The prefactor of the weight of global loss. It should be larger than or equal to 0. + inference : bool + If true, it will output all losses found in output, ignoring the pre-factors. + **kwargs + Other keyword arguments. + """ + super().__init__() + self.tensor_name = tensor_name + self.tensor_size = tensor_size + self.label_name = label_name + self.local_weight = pref_atomic + self.global_weight = pref + self.inference = inference + + assert ( + self.local_weight >= 0.0 and self.global_weight >= 0.0 + ), "Can not assign negative weight to `pref` and `pref_atomic`" + self.has_local_weight = self.local_weight > 0.0 or inference + self.has_global_weight = self.global_weight > 0.0 or inference + assert self.has_local_weight or self.has_global_weight, AssertionError( + "Can not assian zero weight both to `pref` and `pref_atomic`" + ) + + def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False): + """Return loss on local and global tensors. + + Parameters + ---------- + input_dict : dict[str, torch.Tensor] + Model inputs. + model : torch.nn.Module + Model to be used to output the predictions. + label : dict[str, torch.Tensor] + Labels. + natoms : int + The local atom number. + + Returns + ------- + model_pred: dict[str, torch.Tensor] + Model predictions. + loss: torch.Tensor + Loss for model to minimize. + more_loss: dict[str, torch.Tensor] + Other losses for display. + """ + model_pred = model(**input_dict) + del learning_rate, mae + loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0] + more_loss = {} + if ( + self.has_local_weight + and self.tensor_name in model_pred + and "atomic_" + self.label_name in label + ): + find_local = label.get("find_" + "atomic_" + self.label_name, 0.0) + local_weight = self.local_weight * find_local + local_tensor_pred = model_pred[self.tensor_name].reshape( + [-1, natoms, self.tensor_size] + ) + local_tensor_label = label["atomic_" + self.label_name].reshape( + [-1, natoms, self.tensor_size] + ) + diff = (local_tensor_pred - local_tensor_label).reshape( + [-1, self.tensor_size] + ) + if "mask" in model_pred: + diff = diff[model_pred["mask"].reshape([-1]).bool()] + l2_local_loss = torch.mean(torch.square(diff)) + if not self.inference: + more_loss[f"l2_local_{self.tensor_name}_loss"] = self.display_if_exist( + l2_local_loss.detach(), find_local + ) + loss += local_weight * l2_local_loss + rmse_local = l2_local_loss.sqrt() + more_loss[f"rmse_local_{self.tensor_name}"] = self.display_if_exist( + rmse_local.detach(), find_local + ) + if ( + self.has_global_weight + and "global_" + self.tensor_name in model_pred + and self.label_name in label + ): + find_global = label.get("find_" + self.label_name, 0.0) + global_weight = self.global_weight * find_global + global_tensor_pred = model_pred["global_" + self.tensor_name].reshape( + [-1, self.tensor_size] + ) + global_tensor_label = label[self.label_name].reshape([-1, self.tensor_size]) + diff = global_tensor_pred - global_tensor_label + if "mask" in model_pred: + atom_num = model_pred["mask"].sum(-1, keepdim=True) + l2_global_loss = torch.mean( + torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum() + ) + atom_num = torch.mean(atom_num.float()) + else: + atom_num = natoms + l2_global_loss = torch.mean(torch.square(diff)) + if not self.inference: + more_loss[f"l2_global_{self.tensor_name}_loss"] = self.display_if_exist( + l2_global_loss.detach(), find_global + ) + loss += global_weight * l2_global_loss + rmse_global = l2_global_loss.sqrt() / atom_num + more_loss[f"rmse_global_{self.tensor_name}"] = self.display_if_exist( + rmse_global.detach(), find_global + ) + return model_pred, loss, more_loss + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + label_requirement = [] + if self.has_local_weight: + label_requirement.append( + DataRequirementItem( + "atomic_" + self.label_name, + ndof=self.tensor_size, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.has_global_weight: + label_requirement.append( + DataRequirementItem( + self.label_name, + ndof=self.tensor_size, + atomic=False, + must=False, + high_prec=False, + ) + ) + return label_requirement diff --git a/deepmd/pt/model/__init__.py b/deepmd/pt/model/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/pt/model/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/pt/model/atomic_model/__init__.py b/deepmd/pt/model/atomic_model/__init__.py new file mode 100644 index 0000000000..a747f28556 --- /dev/null +++ b/deepmd/pt/model/atomic_model/__init__.py @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""The atomic model provides the prediction of some property on each +atom. All the atomic models are not supposed to be directly accessed +by users, but it provides a convenient interface for the +implementation of models. + +Taking the energy models for example, the developeres only needs to +implement the atomic energy prediction via an atomic model, and the +model can be automatically made by the `deepmd.dpmodel.make_model` +method. The `DPModel` is made by +``` +DPModel = make_model(DPAtomicModel) +``` + +""" + +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .linear_atomic_model import ( + DPZBLLinearEnergyAtomicModel, + LinearEnergyAtomicModel, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) + +__all__ = [ + "BaseAtomicModel", + "DPAtomicModel", + "PairTabAtomicModel", + "LinearEnergyAtomicModel", + "DPZBLLinearEnergyAtomicModel", +] diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py new file mode 100644 index 0000000000..129b8dc11d --- /dev/null +++ b/deepmd/pt/model/atomic_model/base_atomic_model.py @@ -0,0 +1,304 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import logging +from typing import ( + Callable, + Dict, + List, + Optional, + Tuple, +) + +import numpy as np +import torch + +from deepmd.dpmodel.atomic_model import ( + make_base_atomic_model, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.utils import ( + AtomExcludeMask, + PairExcludeMask, +) +from deepmd.pt.utils.nlist import ( + extend_input_and_build_neighbor_list, +) +from deepmd.pt.utils.stat import ( + compute_output_stats, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, +) +from deepmd.utils.path import ( + DPPath, +) + +log = logging.getLogger(__name__) + +BaseAtomicModel_ = make_base_atomic_model(torch.Tensor) + + +class BaseAtomicModel(BaseAtomicModel_): + def __init__( + self, + atom_exclude_types: List[int] = [], + pair_exclude_types: List[Tuple[int, int]] = [], + ): + super().__init__() + self.reinit_atom_exclude(atom_exclude_types) + self.reinit_pair_exclude(pair_exclude_types) + + def reinit_atom_exclude( + self, + exclude_types: List[int] = [], + ): + self.atom_exclude_types = exclude_types + if exclude_types == []: + self.atom_excl = None + else: + self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types) + + def reinit_pair_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.pair_exclude_types = exclude_types + if exclude_types == []: + self.pair_excl = None + else: + self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types) + + # to make jit happy... + def make_atom_mask( + self, + atype: torch.Tensor, + ) -> torch.Tensor: + """The atoms with type < 0 are treated as virutal atoms, + which serves as place-holders for multi-frame calculations + with different number of atoms in different frames. + + Parameters + ---------- + atype + Atom types. >= 0 for real atoms <0 for virtual atoms. + + Returns + ------- + mask + True for real atoms and False for virutal atoms. + + """ + # supposed to be supported by all backends + return atype >= 0 + + def atomic_output_def(self) -> FittingOutputDef: + old_def = self.fitting_output_def() + old_list = list(old_def.get_data().values()) + return FittingOutputDef( + old_list # noqa:RUF005 + + [ + OutputVariableDef( + name="mask", + shape=[1], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ) + ] + ) + + def forward_common_atomic( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Dict[str, torch.Tensor]: + """Common interface for atomic inference. + + This method accept extended coordinates, extended atom typs, neighbor list, + and predict the atomic contribution of the fit property. + + Parameters + ---------- + extended_coord + extended coodinates, shape: nf x (nall x 3) + extended_atype + extended atom typs, shape: nf x nall + for a type < 0 indicating the atomic is virtual. + nlist + neighbor list, shape: nf x nloc x nsel + mapping + extended to local index mapping, shape: nf x nall + fparam + frame parameters, shape: nf x dim_fparam + aparam + atomic parameter, shape: nf x nloc x dim_aparam + + Returns + ------- + ret_dict + dict of output atomic properties. + should implement the definition of `fitting_output_def`. + ret_dict["mask"] of shape nf x nloc will be provided. + ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real. + ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual. + + """ + _, nloc, _ = nlist.shape + atype = extended_atype[:, :nloc] + + if self.pair_excl is not None: + pair_mask = self.pair_excl(nlist, extended_atype) + # exclude neighbors in the nlist + nlist = torch.where(pair_mask == 1, nlist, -1) + + ext_atom_mask = self.make_atom_mask(extended_atype) + ret_dict = self.forward_atomic( + extended_coord, + torch.where(ext_atom_mask, extended_atype, 0), + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + ) + + # nf x nloc + atom_mask = ext_atom_mask[:, :nloc].to(torch.int32) + if self.atom_excl is not None: + atom_mask *= self.atom_excl(atype) + + for kk in ret_dict.keys(): + out_shape = ret_dict[kk].shape + ret_dict[kk] = ( + ret_dict[kk].reshape([out_shape[0], out_shape[1], -1]) + * atom_mask[:, :, None] + ).view(out_shape) + ret_dict["mask"] = atom_mask + + return ret_dict + + def serialize(self) -> dict: + return { + "atom_exclude_types": self.atom_exclude_types, + "pair_exclude_types": self.pair_exclude_types, + } + + def get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]: + """Get a forward wrapper of the atomic model for output bias calculation.""" + + def model_forward(coord, atype, box, fparam=None, aparam=None): + with torch.no_grad(): # it's essential for pure torch forward function to use auto_batchsize + ( + extended_coord, + extended_atype, + mapping, + nlist, + ) = extend_input_and_build_neighbor_list( + coord, + atype, + self.get_rcut(), + self.get_sel(), + mixed_types=self.mixed_types(), + box=box, + ) + atomic_ret = self.forward_common_atomic( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + ) + return {kk: vv.detach() for kk, vv in atomic_ret.items()} + + return model_forward + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute or load the statistics parameters of the model, + such as mean and standard deviation of descriptors or the energy bias of the fitting net. + When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update), + and saved in the `stat_file_path`(s). + When `sampled` is not provided, it will check the existence of `stat_file_path`(s) + and load the calculated statistics parameters. + + Parameters + ---------- + sampled_func + The sampled data frames from different data systems. + stat_file_path + The path to the statistics files. + """ + raise NotImplementedError + + def change_out_bias( + self, + merged, + origin_type_map, + full_type_map, + bias_adjust_mode="change-by-statistic", + ) -> None: + """Change the output bias according to the input data and the pretrained model. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + origin_type_map : List[str] + The original type_map in dataset, they are targets to change the output bias. + full_type_map : List[str] + The full type_map in pre-trained model + bias_adjust_mode : str + The mode for changing output bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on labels of target dataset, + and do least square on the errors to obtain the target shift as bias. + 'set-by-statistic' : directly use the statistic output bias in the target dataset. + """ + sorter = np.argsort(full_type_map) + missing_types = [t for t in origin_type_map if t not in full_type_map] + assert ( + not missing_types + ), f"Some types are not in the pre-trained model: {list(missing_types)} !" + idx_type_map = sorter[ + np.searchsorted(full_type_map, origin_type_map, sorter=sorter) + ] + original_bias = self.get_out_bias() + if bias_adjust_mode == "change-by-statistic": + delta_bias = compute_output_stats( + merged, + self.get_ntypes(), + keys=["energy"], + model_forward=self.get_forward_wrapper_func(), + )["energy"] + self.set_out_bias(delta_bias, add=True) + elif bias_adjust_mode == "set-by-statistic": + bias_atom = compute_output_stats( + merged, + self.get_ntypes(), + keys=["energy"], + )["energy"] + self.set_out_bias(bias_atom) + else: + raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode) + bias_atom = self.get_out_bias() + log.info( + f"Change output bias of {origin_type_map!s} " + f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} " + f"to {to_numpy_array(bias_atom[idx_type_map]).reshape(-1)!s}." + ) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py new file mode 100644 index 0000000000..13b8f09a79 --- /dev/null +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -0,0 +1,269 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import functools +import logging +from typing import ( + Dict, + List, + Optional, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, +) +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.task.base_fitting import ( + BaseFitting, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + +log = logging.getLogger(__name__) + + +@BaseAtomicModel.register("standard") +class DPAtomicModel(torch.nn.Module, BaseAtomicModel): + """Model give atomic prediction of some physical property. + + Parameters + ---------- + descriptor + Descriptor + fitting_net + Fitting net + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + descriptor, + fitting, + type_map: List[str], + **kwargs, + ): + torch.nn.Module.__init__(self) + ntypes = len(type_map) + self.type_map = type_map + self.ntypes = ntypes + self.descriptor = descriptor + self.rcut = self.descriptor.get_rcut() + self.sel = self.descriptor.get_sel() + self.fitting_net = fitting + # order matters ntypes and type_map should be initialized first. + BaseAtomicModel.__init__(self, **kwargs) + + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of the fitting net.""" + return ( + self.fitting_net.output_def() + if self.fitting_net is not None + else self.coord_denoise_net.output_def() + ) + + @torch.jit.export + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.rcut + + @torch.jit.export + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.type_map + + def get_sel(self) -> List[int]: + """Get the neighbor selection.""" + return self.sel + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.descriptor.mixed_types() + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "@version": 1, + "type": "standard", + "type_map": self.type_map, + "descriptor": self.descriptor.serialize(), + "fitting": self.fitting_net.serialize(), + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "DPAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("type", None) + descriptor_obj = BaseDescriptor.deserialize(data.pop("descriptor")) + fitting_obj = BaseFitting.deserialize(data.pop("fitting")) + type_map = data.pop("type_map", None) + obj = cls(descriptor_obj, fitting_obj, type_map=type_map, **data) + return obj + + def forward_atomic( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Dict[str, torch.Tensor]: + """Return atomic prediction. + + Parameters + ---------- + extended_coord + coodinates in extended region + extended_atype + atomic type in extended region + nlist + neighbor list. nf x nloc x nsel + mapping + mapps the extended indices to local indices + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nloc, nnei = nlist.shape + atype = extended_atype[:, :nloc] + if self.do_grad_r() or self.do_grad_c(): + extended_coord.requires_grad_(True) + descriptor, rot_mat, g2, h2, sw = self.descriptor( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + ) + assert descriptor is not None + # energy, force + fit_ret = self.fitting_net( + descriptor, + atype, + gr=rot_mat, + g2=g2, + h2=h2, + fparam=fparam, + aparam=aparam, + ) + return fit_ret + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute or load the statistics parameters of the model, + such as mean and standard deviation of descriptors or the energy bias of the fitting net. + When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update), + and saved in the `stat_file_path`(s). + When `sampled` is not provided, it will check the existence of `stat_file_path`(s) + and load the calculated statistics parameters. + + Parameters + ---------- + sampled_func + The lazy sampled function to get data frames from different data systems. + stat_file_path + The dictionary of paths to the statistics files. + """ + if stat_file_path is not None and self.type_map is not None: + # descriptors and fitting net with different type_map + # should not share the same parameters + stat_file_path /= " ".join(self.type_map) + + @functools.lru_cache + def wrapped_sampler(): + sampled = sampled_func() + if self.pair_excl is not None: + pair_exclude_types = self.pair_excl.get_exclude_types() + for sample in sampled: + sample["pair_exclude_types"] = list(pair_exclude_types) + if self.atom_excl is not None: + atom_exclude_types = self.atom_excl.get_exclude_types() + for sample in sampled: + sample["atom_exclude_types"] = list(atom_exclude_types) + return sampled + + self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path) + if self.fitting_net is not None: + self.fitting_net.compute_output_stats(wrapped_sampler, stat_file_path) + + def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None: + """ + Modify the output bias for the atomic model. + + Parameters + ---------- + out_bias : torch.Tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + self.fitting_net["bias_atom_e"] = ( + out_bias + self.fitting_net["bias_atom_e"] if add else out_bias + ) + + def get_out_bias(self) -> torch.Tensor: + """Return the output bias of the atomic model.""" + return self.fitting_net["bias_atom_e"] + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.fitting_net.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.fitting_net.get_dim_aparam() + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.fitting_net.get_sel_type() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py new file mode 100644 index 0000000000..f599399e66 --- /dev/null +++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py @@ -0,0 +1,506 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Dict, + List, + Optional, + Tuple, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.nlist import ( + build_multiple_neighbor_list, + get_multiple_nlist_key, + nlist_distinguish_types, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) + + +class LinearEnergyAtomicModel(torch.nn.Module, BaseAtomicModel): + """Linear model make linear combinations of several existing models. + + Parameters + ---------- + models : list[DPAtomicModel or PairTabAtomicModel] + A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel. + type_map : list[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + models: List[BaseAtomicModel], + type_map: List[str], + **kwargs, + ): + torch.nn.Module.__init__(self) + self.models = torch.nn.ModuleList(models) + sub_model_type_maps = [md.get_type_map() for md in models] + err_msg = [] + self.mapping_list = [] + common_type_map = set(type_map) + self.type_map = type_map + for tpmp in sub_model_type_maps: + if not common_type_map.issubset(set(tpmp)): + err_msg.append( + f"type_map {tpmp} is not a subset of type_map {type_map}" + ) + self.mapping_list.append(self.remap_atype(tpmp, self.type_map)) + assert len(err_msg) == 0, "\n".join(err_msg) + + self.atomic_bias = None + self.mixed_types_list = [model.mixed_types() for model in self.models] + self.rcuts = torch.tensor( + self.get_model_rcuts(), dtype=torch.float64, device=env.DEVICE + ) + self.nsels = torch.tensor(self.get_model_nsels(), device=env.DEVICE) + BaseAtomicModel.__init__(self, **kwargs) + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return True + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return max(self.get_model_rcuts()) + + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.type_map + + def get_model_rcuts(self) -> List[float]: + """Get the cut-off radius for each individual models.""" + return [model.get_rcut() for model in self.models] + + def get_sel(self) -> List[int]: + return [max([model.get_nsel() for model in self.models])] + + def get_model_nsels(self) -> List[int]: + """Get the processed sels for each individual models. Not distinguishing types.""" + return [model.get_nsel() for model in self.models] + + def get_model_sels(self) -> List[List[int]]: + """Get the sels for each individual models.""" + return [model.get_sel() for model in self.models] + + def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]: + # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut. + zipped = torch.stack( + [ + self.rcuts, + self.nsels, + ], + dim=0, + ).T + inner_sorting = torch.argsort(zipped[:, 1], dim=0) + inner_sorted = zipped[inner_sorting] + outer_sorting = torch.argsort(inner_sorted[:, 0], stable=True) + outer_sorted = inner_sorted[outer_sorting] + sorted_rcuts: List[float] = outer_sorted[:, 0].tolist() + sorted_sels: List[int] = outer_sorted[:, 1].to(torch.int64).tolist() + return sorted_rcuts, sorted_sels + + def forward_atomic( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Dict[str, torch.Tensor]: + """Return atomic prediction. + + Parameters + ---------- + extended_coord + coodinates in extended region, (nframes, nall * 3) + extended_atype + atomic type in extended region, (nframes, nall) + nlist + neighbor list, (nframes, nloc, nsel). + mapping + mapps the extended indices to local indices. + fparam + frame parameter. (nframes, ndf) + aparam + atomic parameter. (nframes, nloc, nda) + + Returns + ------- + result_dict + the result dict, defined by the fitting net output def. + """ + nframes, nloc, nnei = nlist.shape + if self.do_grad_r() or self.do_grad_c(): + extended_coord.requires_grad_(True) + extended_coord = extended_coord.view(nframes, -1, 3) + sorted_rcuts, sorted_sels = self._sort_rcuts_sels() + nlists = build_multiple_neighbor_list( + extended_coord, + nlist, + sorted_rcuts, + sorted_sels, + ) + raw_nlists = [ + nlists[get_multiple_nlist_key(rcut, sel)] + for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels()) + ] + nlists_ = [ + nl if mt else nlist_distinguish_types(nl, extended_atype, sel) + for mt, nl, sel in zip( + self.mixed_types_list, raw_nlists, self.get_model_sels() + ) + ] + ener_list = [] + + for i, model in enumerate(self.models): + mapping = self.mapping_list[i] + ener_list.append( + model.forward_atomic( + extended_coord, + mapping[extended_atype], + nlists_[i], + mapping, + fparam, + aparam, + )["energy"] + ) + + weights = self._compute_weight(extended_coord, extended_atype, nlists_) + + atype = extended_atype[:, :nloc] + for idx, model in enumerate(self.models): + # TODO: provide interfaces for atomic models to access bias_atom_e + if isinstance(model, DPAtomicModel): + bias_atom_e = model.fitting_net.bias_atom_e + elif isinstance(model, PairTabAtomicModel): + bias_atom_e = model.bias_atom_e + else: + bias_atom_e = None + if bias_atom_e is not None: + ener_list[idx] += bias_atom_e[atype] + + fit_ret = { + "energy": torch.sum(torch.stack(ener_list) * torch.stack(weights), dim=0), + } # (nframes, nloc, 1) + return fit_ret + + @staticmethod + def remap_atype(ori_map: List[str], new_map: List[str]) -> torch.Tensor: + """ + This method is used to map the atype from the common type_map to the original type_map of + indivial AtomicModels. It creates a index mapping for the conversion. + + Parameters + ---------- + ori_map : List[str] + The original type map of an AtomicModel. + new_map : List[str] + The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method, + must be a subset of the ori_map. + + Returns + ------- + torch.Tensor + """ + type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)} + # this maps the atype in the new map to the original map + mapping = torch.tensor( + [type_2_idx[new_map[idx]] for idx in range(len(new_map))], device=env.DEVICE + ) + return mapping + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def serialize(self) -> dict: + return { + "@class": "Model", + "@version": 1, + "type": "linear", + "models": [model.serialize() for model in self.models], + "type_map": self.type_map, + } + + @classmethod + def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class") + data.pop("type") + type_map = data.pop("type_map") + models = [ + BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model) + for model in data["models"] + ] + data.pop("models") + return cls(models, type_map, **data) + + def _compute_weight( + self, extended_coord, extended_atype, nlists_ + ) -> List[torch.Tensor]: + """This should be a list of user defined weights that matches the number of models to be combined.""" + nmodels = len(self.models) + return [ + torch.ones(1, dtype=torch.float64, device=env.DEVICE) / nmodels + for _ in range(nmodels) + ] + + def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None: + """ + Modify the output bias for all the models in the linear atomic model. + + Parameters + ---------- + out_bias : torch.Tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + for model in self.models: + model.set_out_bias(out_bias, add=add) + + def get_out_bias(self) -> torch.Tensor: + """Return the weighted output bias of the linear atomic model.""" + # TODO add get_out_bias for linear atomic model + raise NotImplementedError + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + # tricky... + return max([model.get_dim_fparam() for model in self.models]) + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return max([model.get_dim_aparam() for model in self.models]) + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + if any(model.get_sel_type() == [] for model in self.models): + return [] + # join all the selected types + # make torch.jit happy... + return torch.unique( + torch.cat( + [ + torch.as_tensor(model.get_sel_type(), dtype=torch.int32) + for model in self.models + ] + ) + ).tolist() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False + + +class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel): + """Model linearly combine a list of AtomicModels. + + Parameters + ---------- + dp_model + The DPAtomicModel being combined. + zbl_model + The PairTable model being combined. + sw_rmin + The lower boundary of the interpolation between short-range tabulated interaction and DP. + sw_rmax + The upper boundary of the interpolation between short-range tabulated interaction and DP. + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + smin_alpha + The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. + This distance is calculated by softmin. + """ + + def __init__( + self, + dp_model: DPAtomicModel, + zbl_model: PairTabAtomicModel, + sw_rmin: float, + sw_rmax: float, + type_map: List[str], + smin_alpha: Optional[float] = 0.1, + **kwargs, + ): + models = [dp_model, zbl_model] + super().__init__(models, type_map, **kwargs) + + self.sw_rmin = sw_rmin + self.sw_rmax = sw_rmax + self.smin_alpha = smin_alpha + + # this is a placeholder being updated in _compute_weight, to handle Jit attribute init error. + self.zbl_weight = torch.empty(0, dtype=torch.float64, device=env.DEVICE) + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute or load the statistics parameters of the model, + such as mean and standard deviation of descriptors or the energy bias of the fitting net. + When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update), + and saved in the `stat_file_path`(s). + When `sampled` is not provided, it will check the existence of `stat_file_path`(s) + and load the calculated statistics parameters. + + Parameters + ---------- + sampled_func + The lazy sampled function to get data frames from different data systems. + stat_file_path + The dictionary of paths to the statistics files. + """ + self.models[0].compute_or_load_stat(sampled_func, stat_file_path) + self.models[1].compute_or_load_stat(sampled_func, stat_file_path) + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "@version": 2, + "type": "zbl", + "models": LinearEnergyAtomicModel( + models=[self.models[0], self.models[1]], type_map=self.type_map + ).serialize(), + "sw_rmin": self.sw_rmin, + "sw_rmax": self.sw_rmax, + "smin_alpha": self.smin_alpha, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 2, 1) + sw_rmin = data.pop("sw_rmin") + sw_rmax = data.pop("sw_rmax") + smin_alpha = data.pop("smin_alpha") + linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models")) + dp_model, zbl_model = linear_model.models + type_map = linear_model.type_map + + data.pop("@class", None) + data.pop("type", None) + return cls( + dp_model=dp_model, + zbl_model=zbl_model, + sw_rmin=sw_rmin, + sw_rmax=sw_rmax, + type_map=type_map, + smin_alpha=smin_alpha, + **data, + ) + + def _compute_weight( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlists_: List[torch.Tensor], + ) -> List[torch.Tensor]: + """ZBL weight. + + Returns + ------- + List[torch.Tensor] + the atomic ZBL weight for interpolation. (nframes, nloc, 1) + """ + assert ( + self.sw_rmax > self.sw_rmin + ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`." + + dp_nlist = nlists_[0] + zbl_nlist = nlists_[1] + + zbl_nnei = zbl_nlist.shape[-1] + dp_nnei = dp_nlist.shape[-1] + + # use the larger rr based on nlist + nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist + masked_nlist = torch.clamp(nlist_larger, 0) + pairwise_rr = PairTabAtomicModel._get_pairwise_dist( + extended_coord, masked_nlist + ) + numerator = torch.sum( + pairwise_rr * torch.exp(-pairwise_rr / self.smin_alpha), dim=-1 + ) # masked nnei will be zero, no need to handle + denominator = torch.sum( + torch.where( + nlist_larger != -1, + torch.exp(-pairwise_rr / self.smin_alpha), + torch.zeros_like(nlist_larger), + ), + dim=-1, + ) # handle masked nnei. + + sigma = numerator / torch.clamp(denominator, 1e-20) # nfrmes, nloc + u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin) + coef = torch.zeros_like(u) + left_mask = sigma < self.sw_rmin + mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax) + right_mask = sigma >= self.sw_rmax + coef[left_mask] = 1 + smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1 + coef[mid_mask] = smooth[mid_mask] + coef[right_mask] = 0 + self.zbl_weight = coef # nframes, nloc + return [1 - coef.unsqueeze(-1), coef.unsqueeze(-1)] # to match the model order. diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py new file mode 100644 index 0000000000..4db77790e9 --- /dev/null +++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py @@ -0,0 +1,505 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Callable, + Dict, + List, + Optional, + Union, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.stat import ( + compute_output_stats, +) +from deepmd.utils.pair_tab import ( + PairTab, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + + +@BaseAtomicModel.register("pairtab") +class PairTabAtomicModel(torch.nn.Module, BaseAtomicModel): + """Pairwise tabulation energy model. + + This model can be used to tabulate the pairwise energy between atoms for either + short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not + be used alone, but rather as one submodel of a linear (sum) model, such as + DP+D3. + + Do not put the model on the first model of a linear model, since the linear + model fetches the type map from the first model. + + At this moment, the model does not smooth the energy at the cutoff radius, so + one needs to make sure the energy has been smoothed to zero. + + Parameters + ---------- + tab_file : str + The path to the tabulation file. + rcut : float + The cutoff radius. + sel : int or list[int] + The maxmum number of atoms in the cut-off radius. + type_map : List[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + rcond : float, optional + The condition number for the regression of atomic energy. + atom_ener + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + + """ + + def __init__( + self, + tab_file: str, + rcut: float, + sel: Union[int, List[int]], + type_map: List[str], + rcond: Optional[float] = None, + atom_ener: Optional[List[float]] = None, + **kwargs, + ): + torch.nn.Module.__init__(self) + self.tab_file = tab_file + self.rcut = rcut + self.tab = self._set_pairtab(tab_file, rcut) + + BaseAtomicModel.__init__(self, **kwargs) + self.rcond = rcond + self.atom_ener = atom_ener + self.type_map = type_map + self.ntypes = len(type_map) + + # handle deserialization with no input file + if self.tab_file is not None: + ( + tab_info, + tab_data, + ) = self.tab.get() # this returns -> Tuple[np.array, np.array] + nspline, ntypes_tab = tab_info[-2:].astype(int) + self.register_buffer("tab_info", torch.from_numpy(tab_info)) + self.register_buffer( + "tab_data", + torch.from_numpy(tab_data).reshape(ntypes_tab, ntypes_tab, nspline, 4), + ) + if self.ntypes != ntypes_tab: + raise ValueError( + "The `type_map` provided does not match the number of columns in the table." + ) + else: + self.register_buffer("tab_info", None) + self.register_buffer("tab_data", None) + self.bias_atom_e = torch.zeros( + self.ntypes, 1, dtype=env.GLOBAL_PT_ENER_FLOAT_PRECISION, device=env.DEVICE + ) + + # self.model_type = "ener" + # self.model_version = MODEL_VERSION ## this shoud be in the parent class + + if isinstance(sel, int): + self.sel = sel + elif isinstance(sel, list): + self.sel = sum(sel) + else: + raise TypeError("sel must be int or list[int]") + + @torch.jit.ignore + def _set_pairtab(self, tab_file: str, rcut: float) -> PairTab: + return PairTab(tab_file, rcut) + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def get_rcut(self) -> float: + return self.rcut + + def get_type_map(self) -> List[str]: + return self.type_map + + def get_sel(self) -> List[int]: + return [self.sel] + + def get_nsel(self) -> int: + return self.sel + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + # to match DPA1 and DPA2. + return True + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "@version": 1, + "type": "pairtab", + "tab": self.tab.serialize(), + "rcut": self.rcut, + "sel": self.sel, + "type_map": self.type_map, + "rcond": self.rcond, + "atom_ener": self.atom_ener, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "PairTabAtomicModel": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + rcut = data.pop("rcut") + sel = data.pop("sel") + type_map = data.pop("type_map") + rcond = data.pop("rcond") + atom_ener = data.pop("atom_ener") + tab = PairTab.deserialize(data.pop("tab")) + data.pop("@class", None) + data.pop("type", None) + tab_model = cls(None, rcut, sel, type_map, rcond, atom_ener, **data) + + tab_model.tab = tab + tab_model.register_buffer("tab_info", torch.from_numpy(tab_model.tab.tab_info)) + nspline, ntypes = tab_model.tab.tab_info[-2:].astype(int) + tab_model.register_buffer( + "tab_data", + torch.from_numpy(tab_model.tab.tab_data).reshape( + ntypes, ntypes, nspline, 4 + ), + ) + return tab_model + + def compute_or_load_stat( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute the output statistics (e.g. energy bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + stat_file_path : Optional[DPPath] + The path to the stat file. + + """ + bias_atom_e = compute_output_stats( + merged, + self.ntypes, + keys=["energy"], + stat_file_path=stat_file_path, + rcond=self.rcond, + atom_ener=self.atom_ener, + )["energy"] + self.bias_atom_e.copy_( + torch.tensor(bias_atom_e, device=env.DEVICE).view([self.ntypes, 1]) + ) + + def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None: + """ + Modify the output bias for the atomic model. + + Parameters + ---------- + out_bias : torch.Tensor + The new bias to be applied. + add : bool, optional + Whether to add the new bias to the existing one. + If False, the output bias will be directly replaced by the new bias. + If True, the new bias will be added to the existing one. + """ + self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias + + def get_out_bias(self) -> torch.Tensor: + """Return the output bias of the atomic model.""" + return self.bias_atom_e + + def forward_atomic( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + nframes, nloc, nnei = nlist.shape + extended_coord = extended_coord.view(nframes, -1, 3) + if self.do_grad_r() or self.do_grad_c(): + extended_coord.requires_grad_(True) + + # this will mask all -1 in the nlist + mask = nlist >= 0 + masked_nlist = nlist * mask + + atype = extended_atype[:, :nloc] # (nframes, nloc) + pairwise_rr = self._get_pairwise_dist( + extended_coord, masked_nlist + ) # (nframes, nloc, nnei) + self.tab_data = self.tab_data.to(device=extended_coord.device).view( + int(self.tab_info[-1]), int(self.tab_info[-1]), int(self.tab_info[2]), 4 + ) + + # to calculate the atomic_energy, we need 3 tensors, i_type, j_type, pairwise_rr + # i_type : (nframes, nloc), this is atype. + # j_type : (nframes, nloc, nnei) + j_type = extended_atype[ + torch.arange(extended_atype.size(0), device=extended_coord.device)[ + :, None, None + ], + masked_nlist, + ] + + raw_atomic_energy = self._pair_tabulated_inter( + nlist, atype, j_type, pairwise_rr + ) + + atomic_energy = 0.5 * torch.sum( + torch.where( + nlist != -1, raw_atomic_energy, torch.zeros_like(raw_atomic_energy) + ), + dim=-1, + ).unsqueeze(-1) + + return {"energy": atomic_energy} + + def _pair_tabulated_inter( + self, + nlist: torch.Tensor, + i_type: torch.Tensor, + j_type: torch.Tensor, + rr: torch.Tensor, + ) -> torch.Tensor: + """Pairwise tabulated energy. + + Parameters + ---------- + nlist : torch.Tensor + The unmasked neighbour list. (nframes, nloc) + i_type : torch.Tensor + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : torch.Tensor + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + rr : torch.Tensor + The salar distance vector between two atoms. (nframes, nloc, nnei) + + Returns + ------- + torch.Tensor + The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + + Raises + ------ + Exception + If the distance is beyond the table. + + Notes + ----- + This function is used to calculate the pairwise energy between two atoms. + It uses a table containing cubic spline coefficients calculated in PairTab. + """ + nframes, nloc, nnei = nlist.shape + rmin = self.tab_info[0] + hh = self.tab_info[1] + hi = 1.0 / hh + + nspline = int(self.tab_info[2] + 0.1) + + uu = (rr - rmin) * hi # this is broadcasted to (nframes,nloc,nnei) + + # if nnei of atom 0 has -1 in the nlist, uu would be 0. + # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms. + uu = torch.where(nlist != -1, uu, nspline + 1) + + if torch.any(uu < 0): + raise Exception("coord go beyond table lower boundary") + + idx = uu.to(torch.int) + + uu -= idx + + table_coef = self._extract_spline_coefficient( + i_type, j_type, idx, self.tab_data, nspline + ) + table_coef = table_coef.view(nframes, nloc, nnei, 4) + ener = self._calculate_ener(table_coef, uu) + + # here we need to overwrite energy to zero at rcut and beyond. + mask_beyond_rcut = rr >= self.rcut + # also overwrite values beyond extrapolation to zero + extrapolation_mask = rr >= rmin + nspline * hh + ener[mask_beyond_rcut] = 0 + ener[extrapolation_mask] = 0 + + return ener + + @staticmethod + def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tensor: + """Get pairwise distance `dr`. + + Parameters + ---------- + coords : torch.Tensor + The coordinate of the atoms, shape of (nframes, nall, 3). + nlist + The masked nlist, shape of (nframes, nloc, nnei) + + Returns + ------- + torch.Tensor + The pairwise distance between the atoms (nframes, nloc, nnei). + """ + nframes, nloc, nnei = nlist.shape + coord_l = coords[:, :nloc].view(nframes, -1, 1, 3) + index = nlist.view(nframes, -1).unsqueeze(-1).expand(-1, -1, 3) + coord_r = torch.gather(coords, 1, index) + coord_r = coord_r.view(nframes, nloc, nnei, 3) + diff = coord_r - coord_l + pairwise_rr = torch.linalg.norm(diff, dim=-1, keepdim=True).squeeze(-1) + return pairwise_rr + + @staticmethod + def _extract_spline_coefficient( + i_type: torch.Tensor, + j_type: torch.Tensor, + idx: torch.Tensor, + tab_data: torch.Tensor, + nspline: int, + ) -> torch.Tensor: + """Extract the spline coefficient from the table. + + Parameters + ---------- + i_type : torch.Tensor + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : torch.Tensor + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + idx : torch.Tensor + The index of the spline coefficient. (nframes, nloc, nnei) + tab_data : torch.Tensor + The table storing all the spline coefficient. (ntype, ntype, nspline, 4) + nspline : int + The number of splines in the table. + + Returns + ------- + torch.Tensor + The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed. + + """ + # (nframes, nloc, nnei) + expanded_i_type = i_type.unsqueeze(-1).expand(-1, -1, j_type.shape[-1]) + + # handle the case where idx is beyond the number of splines + clipped_indices = torch.clamp(idx, 0, nspline - 1).to(torch.int64) + + nframes = i_type.shape[0] + nloc = i_type.shape[1] + nnei = j_type.shape[2] + ntypes = tab_data.shape[0] + # tab_data_idx: (nframes, nloc, nnei) + tab_data_idx = ( + expanded_i_type * ntypes * nspline + j_type * nspline + clipped_indices + ) + # tab_data: (ntype, ntype, nspline, 4) + tab_data = tab_data.view(ntypes * ntypes * nspline, 4) + # tab_data_idx: (nframes * nloc * nnei, 4) + tab_data_idx = tab_data_idx.view(nframes * nloc * nnei, 1).expand(-1, 4) + # (nframes, nloc, nnei, 4) + final_coef = torch.gather(tab_data, 0, tab_data_idx).view( + nframes, nloc, nnei, 4 + ) + + # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`. + final_coef[idx > nspline] = 0 + return final_coef + + @staticmethod + def _calculate_ener(coef: torch.Tensor, uu: torch.Tensor) -> torch.Tensor: + """Calculate energy using spline coeeficients. + + Parameters + ---------- + coef : torch.Tensor + The spline coefficients. (nframes, nloc, nnei, 4) + uu : torch.Tensor + The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei) + + Returns + ------- + torch.Tensor + The atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + """ + a3, a2, a1, a0 = torch.unbind(coef, dim=-1) + etmp = (a3 * uu + a2) * uu + a1 # this should be elementwise operations. + ener = etmp * uu + a0 # this energy has the extrapolated value when rcut > rmax + return ener + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return 0 + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return 0 + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return [] + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/pt/model/backbone/__init__.py b/deepmd/pt/model/backbone/__init__.py new file mode 100644 index 0000000000..a76bdb2a2d --- /dev/null +++ b/deepmd/pt/model/backbone/__init__.py @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .backbone import ( + BackBone, +) +from .evoformer2b import ( + Evoformer2bBackBone, +) + +__all__ = [ + "BackBone", + "Evoformer2bBackBone", +] diff --git a/deepmd/pt/model/backbone/backbone.py b/deepmd/pt/model/backbone/backbone.py new file mode 100644 index 0000000000..ddeedfeff5 --- /dev/null +++ b/deepmd/pt/model/backbone/backbone.py @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch + + +class BackBone(torch.nn.Module): + def __init__(self, **kwargs): + """BackBone base method.""" + super().__init__() + + def forward(self, **kwargs): + """Calculate backBone.""" + raise NotImplementedError diff --git a/deepmd/pt/model/backbone/evoformer2b.py b/deepmd/pt/model/backbone/evoformer2b.py new file mode 100644 index 0000000000..1146b3a298 --- /dev/null +++ b/deepmd/pt/model/backbone/evoformer2b.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.pt.model.backbone import ( + BackBone, +) +from deepmd.pt.model.network.network import ( + Evoformer2bEncoder, +) + + +class Evoformer2bBackBone(BackBone): + def __init__( + self, + nnei, + layer_num=6, + attn_head=8, + atomic_dim=1024, + pair_dim=100, + feature_dim=1024, + ffn_dim=2048, + post_ln=False, + final_layer_norm=True, + final_head_layer_norm=False, + emb_layer_norm=False, + atomic_residual=False, + evo_residual=False, + residual_factor=1.0, + activation_function="gelu", + **kwargs, + ): + """Construct an evoformer backBone.""" + super().__init__() + self.nnei = nnei + self.layer_num = layer_num + self.attn_head = attn_head + self.atomic_dim = atomic_dim + self.pair_dim = pair_dim + self.feature_dim = feature_dim + self.head_dim = feature_dim // attn_head + assert ( + feature_dim % attn_head == 0 + ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!" + self.ffn_dim = ffn_dim + self.post_ln = post_ln + self.final_layer_norm = final_layer_norm + self.final_head_layer_norm = final_head_layer_norm + self.emb_layer_norm = emb_layer_norm + self.activation_function = activation_function + self.atomic_residual = atomic_residual + self.evo_residual = evo_residual + self.residual_factor = float(residual_factor) + self.encoder = Evoformer2bEncoder( + nnei=self.nnei, + layer_num=self.layer_num, + attn_head=self.attn_head, + atomic_dim=self.atomic_dim, + pair_dim=self.pair_dim, + feature_dim=self.feature_dim, + ffn_dim=self.ffn_dim, + post_ln=self.post_ln, + final_layer_norm=self.final_layer_norm, + final_head_layer_norm=self.final_head_layer_norm, + emb_layer_norm=self.emb_layer_norm, + atomic_residual=self.atomic_residual, + evo_residual=self.evo_residual, + residual_factor=self.residual_factor, + activation_function=self.activation_function, + ) + + def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask): + """Encoder the atomic and pair representations. + + Args: + - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim]. + - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim]. + - nlist: Neighbor list with shape [nframes, nloc, nnei]. + - nlist_type: Neighbor types with shape [nframes, nloc, nnei]. + - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank. + + Returns + ------- + - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim]. + - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim]. + - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head]. + - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head]. + - norm_x: Normalization loss of atomic_rep. + - norm_delta_pair_rep: Normalization loss of delta_pair_rep. + """ + ( + atomic_rep, + transformed_atomic_rep, + pair_rep, + delta_pair_rep, + norm_x, + norm_delta_pair_rep, + ) = self.encoder(atomic_rep, pair_rep, nlist, nlist_type, nlist_mask) + return ( + atomic_rep, + transformed_atomic_rep, + pair_rep, + delta_pair_rep, + norm_x, + norm_delta_pair_rep, + ) diff --git a/deepmd/pt/model/descriptor/__init__.py b/deepmd/pt/model/descriptor/__init__.py new file mode 100644 index 0000000000..325cf29e42 --- /dev/null +++ b/deepmd/pt/model/descriptor/__init__.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .base_descriptor import ( + BaseDescriptor, +) +from .descriptor import ( + DescriptorBlock, + make_default_type_embedding, +) +from .dpa1 import ( + DescrptBlockSeAtten, + DescrptDPA1, +) +from .dpa2 import ( + DescrptDPA2, +) +from .env_mat import ( + prod_env_mat, +) +from .gaussian_lcc import ( + DescrptGaussianLcc, +) +from .hybrid import ( + DescrptBlockHybrid, + DescrptHybrid, +) +from .repformers import ( + DescrptBlockRepformers, +) +from .se_a import ( + DescrptBlockSeA, + DescrptSeA, +) +from .se_r import ( + DescrptSeR, +) + +__all__ = [ + "BaseDescriptor", + "DescriptorBlock", + "make_default_type_embedding", + "DescrptBlockSeA", + "DescrptBlockSeAtten", + "DescrptSeA", + "DescrptSeR", + "DescrptDPA1", + "DescrptDPA2", + "DescrptHybrid", + "prod_env_mat", + "DescrptGaussianLcc", + "DescrptBlockHybrid", + "DescrptBlockRepformers", +] diff --git a/deepmd/pt/model/descriptor/base_descriptor.py b/deepmd/pt/model/descriptor/base_descriptor.py new file mode 100644 index 0000000000..aa142b3acb --- /dev/null +++ b/deepmd/pt/model/descriptor/base_descriptor.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch + +from deepmd.dpmodel.descriptor import ( + make_base_descriptor, +) + +BaseDescriptor = make_base_descriptor(torch.Tensor, "forward") diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py new file mode 100644 index 0000000000..5aae848aa4 --- /dev/null +++ b/deepmd/pt/model/descriptor/descriptor.py @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Callable, + Dict, + List, + Optional, + Union, +) + +import torch + +from deepmd.pt.model.network.network import ( + TypeEmbedNet, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env_mat_stat import ( + EnvMatStatSe, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.plugin import ( + make_plugin_registry, +) + +log = logging.getLogger(__name__) + + +class DescriptorBlock(torch.nn.Module, ABC, make_plugin_registry("DescriptorBlock")): + """The building block of descriptor. + Given the input descriptor, provide with the atomic coordinates, + atomic types and neighbor list, calculate the new descriptor. + """ + + local_cluster = False + + def __new__(cls, *args, **kwargs): + if cls is DescriptorBlock: + try: + descrpt_type = kwargs["type"] + except KeyError: + raise KeyError("the type of DescriptorBlock should be set by `type`") + cls = cls.get_class_by_type(descrpt_type) + return super().__new__(cls) + + @abstractmethod + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + pass + + @abstractmethod + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + pass + + @abstractmethod + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + pass + + @abstractmethod + def get_ntypes(self) -> int: + """Returns the number of element types.""" + pass + + @abstractmethod + def get_dim_out(self) -> int: + """Returns the output dimension.""" + pass + + @abstractmethod + def get_dim_in(self) -> int: + """Returns the output dimension.""" + pass + + @abstractmethod + def get_dim_emb(self) -> int: + """Returns the embedding dimension.""" + pass + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + raise NotImplementedError + + def get_stats(self) -> Dict[str, StatItem]: + """Get the statistics of the descriptor.""" + raise NotImplementedError + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + if shared_level == 0: + # link buffers + if hasattr(self, "mean"): + if not resume: + # in case of change params during resume + base_env = EnvMatStatSe(base_class) + base_env.stats = base_class.stats + for kk in base_class.get_stats(): + base_env.stats[kk] += self.get_stats()[kk] + mean, stddev = base_env() + if not base_class.set_davg_zero: + base_class.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + base_class.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + # must share, even if not do stat + self.mean = base_class.mean + self.stddev = base_class.stddev + # self.load_state_dict(base_class.state_dict()) # this does not work, because it only inits the model + # the following will successfully link all the params except buffers + for item in self._modules: + self._modules[item] = base_class._modules[item] + else: + raise NotImplementedError + + @abstractmethod + def forward( + self, + nlist: torch.Tensor, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + extended_atype_embd: Optional[torch.Tensor] = None, + mapping: Optional[torch.Tensor] = None, + ): + """Calculate DescriptorBlock.""" + pass + + +def make_default_type_embedding( + ntypes, +): + aux = {} + aux["tebd_dim"] = 8 + return TypeEmbedNet(ntypes, aux["tebd_dim"]), aux diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py new file mode 100644 index 0000000000..21275317dc --- /dev/null +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -0,0 +1,282 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + List, + Optional, + Tuple, + Union, +) + +import torch + +from deepmd.pt.model.network.network import ( + TypeEmbedNet, +) +from deepmd.pt.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.path import ( + DPPath, +) + +from .base_descriptor import ( + BaseDescriptor, +) +from .se_atten import ( + DescrptBlockSeAtten, +) + + +@BaseDescriptor.register("dpa1") +@BaseDescriptor.register("se_atten") +class DescrptDPA1(BaseDescriptor, torch.nn.Module): + def __init__( + self, + rcut, + rcut_smth, + sel, + ntypes: int, + neuron: list = [25, 50, 100], + axis_neuron: int = 16, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + # set_davg_zero: bool = False, + set_davg_zero: bool = True, # TODO + attn: int = 128, + attn_layer: int = 2, + attn_dotr: bool = True, + attn_mask: bool = False, + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + activation_function="tanh", + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + return_rot=False, + concat_output_tebd: bool = True, + env_protection: float = 0.0, + type: Optional[str] = None, + # not implemented + resnet_dt: bool = False, + type_one_side: bool = True, + precision: str = "default", + trainable: bool = True, + exclude_types: List[Tuple[int, int]] = [], + stripped_type_embedding: bool = False, + smooth_type_embdding: bool = False, + ): + super().__init__() + if resnet_dt: + raise NotImplementedError("resnet_dt is not supported.") + if not type_one_side: + raise NotImplementedError("type_one_side is not supported.") + if precision != "default" and precision != "float64": + raise NotImplementedError("precison is not supported.") + if stripped_type_embedding: + raise NotImplementedError("stripped_type_embedding is not supported.") + if smooth_type_embdding: + raise NotImplementedError("smooth_type_embdding is not supported.") + del type + self.se_atten = DescrptBlockSeAtten( + rcut, + rcut_smth, + sel, + ntypes, + neuron=neuron, + axis_neuron=axis_neuron, + tebd_dim=tebd_dim, + tebd_input_mode=tebd_input_mode, + set_davg_zero=set_davg_zero, + attn=attn, + attn_layer=attn_layer, + attn_dotr=attn_dotr, + attn_mask=attn_mask, + post_ln=post_ln, + ffn=ffn, + ffn_embed_dim=ffn_embed_dim, + activation_function=activation_function, + scaling_factor=scaling_factor, + head_num=head_num, + normalize=normalize, + temperature=temperature, + return_rot=return_rot, + exclude_types=exclude_types, + env_protection=env_protection, + ) + self.type_embedding = TypeEmbedNet(ntypes, tebd_dim) + self.tebd_dim = tebd_dim + self.concat_output_tebd = concat_output_tebd + # set trainable + for param in self.parameters(): + param.requires_grad = trainable + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.se_atten.get_rcut() + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return self.se_atten.get_nsel() + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.se_atten.get_sel() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.se_atten.get_ntypes() + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + ret = self.se_atten.get_dim_out() + if self.concat_output_tebd: + ret += self.tebd_dim + return ret + + def get_dim_emb(self) -> int: + return self.se_atten.dim_emb + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return self.se_atten.mixed_types() + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + # For DPA1 descriptors, the user-defined share-level + # shared_level: 0 + # share all parameters in both type_embedding and se_atten + if shared_level == 0: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + self.se_atten.share_params(base_class.se_atten, 0, resume=resume) + # shared_level: 1 + # share all parameters in type_embedding + elif shared_level == 1: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + # Other shared levels + else: + raise NotImplementedError + + @property + def dim_out(self): + return self.get_dim_out() + + @property + def dim_emb(self): + return self.get_dim_emb() + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + return self.se_atten.compute_input_stats(merged, path) + + def serialize(self) -> dict: + """Serialize the obj to dict.""" + raise NotImplementedError + + @classmethod + def deserialize(cls) -> "DescrptDPA1": + """Deserialize from a dict.""" + raise NotImplementedError + + def forward( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + sw + The smooth switch function. shape: nf x nloc x nnei + + """ + del mapping + nframes, nloc, nnei = nlist.shape + nall = extended_coord.view(nframes, -1).shape[1] // 3 + g1_ext = self.type_embedding(extended_atype) + g1_inp = g1_ext[:, :nloc, :] + g1, g2, h2, rot_mat, sw = self.se_atten( + nlist, + extended_coord, + extended_atype, + g1_ext, + mapping=None, + ) + if self.concat_output_tebd: + g1 = torch.cat([g1, g1_inp], dim=-1) + + return g1, rot_mat, g2, h2, sw + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True) diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py new file mode 100644 index 0000000000..fb792a51e2 --- /dev/null +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -0,0 +1,501 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + List, + Optional, + Tuple, + Union, +) + +import torch + +from deepmd.pt.model.network.network import ( + Identity, + Linear, + TypeEmbedNet, +) +from deepmd.pt.utils.nlist import ( + build_multiple_neighbor_list, + get_multiple_nlist_key, +) +from deepmd.pt.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.path import ( + DPPath, +) + +from .base_descriptor import ( + BaseDescriptor, +) +from .repformers import ( + DescrptBlockRepformers, +) +from .se_atten import ( + DescrptBlockSeAtten, +) + + +@BaseDescriptor.register("dpa2") +class DescrptDPA2(torch.nn.Module, BaseDescriptor): + def __init__( + self, + ntypes: int, + repinit_rcut: float, + repinit_rcut_smth: float, + repinit_nsel: int, + repformer_rcut: float, + repformer_rcut_smth: float, + repformer_nsel: int, + # kwargs + tebd_dim: int = 8, + concat_output_tebd: bool = True, + repinit_neuron: List[int] = [25, 50, 100], + repinit_axis_neuron: int = 16, + repinit_set_davg_zero: bool = True, # TODO + repinit_activation="tanh", + # repinit still unclear: + # ffn, ffn_embed_dim, scaling_factor, normalize, + repformer_nlayers: int = 3, + repformer_g1_dim: int = 128, + repformer_g2_dim: int = 16, + repformer_axis_dim: int = 4, + repformer_do_bn_mode: str = "no", + repformer_bn_momentum: float = 0.1, + repformer_update_g1_has_conv: bool = True, + repformer_update_g1_has_drrd: bool = True, + repformer_update_g1_has_grrg: bool = True, + repformer_update_g1_has_attn: bool = True, + repformer_update_g2_has_g1g1: bool = True, + repformer_update_g2_has_attn: bool = True, + repformer_update_h2: bool = False, + repformer_attn1_hidden: int = 64, + repformer_attn1_nhead: int = 4, + repformer_attn2_hidden: int = 16, + repformer_attn2_nhead: int = 4, + repformer_attn2_has_gate: bool = False, + repformer_activation: str = "tanh", + repformer_update_style: str = "res_avg", + repformer_set_davg_zero: bool = True, # TODO + repformer_add_type_ebd_to_seq: bool = False, + env_protection: float = 0.0, + trainable: bool = True, + exclude_types: List[Tuple[int, int]] = [], + type: Optional[ + str + ] = None, # work around the bad design in get_trainer and DpLoaderSet! + rcut: Optional[ + float + ] = None, # work around the bad design in get_trainer and DpLoaderSet! + rcut_smth: Optional[ + float + ] = None, # work around the bad design in get_trainer and DpLoaderSet! + sel: Optional[ + int + ] = None, # work around the bad design in get_trainer and DpLoaderSet! + ): + r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492. + + Parameters + ---------- + ntypes : int + Number of atom types + repinit_rcut : float + The cut-off radius of the repinit block + repinit_rcut_smth : float + From this position the inverse distance smoothly decays + to 0 at the cut-off. Use in the repinit block. + repinit_nsel : int + Maximally possible number of neighbors for repinit block. + repformer_rcut : float + The cut-off radius of the repformer block + repformer_rcut_smth : float + From this position the inverse distance smoothly decays + to 0 at the cut-off. Use in the repformer block. + repformer_nsel : int + Maximally possible number of neighbors for repformer block. + tebd_dim : int + The dimension of atom type embedding + concat_output_tebd : bool + Whether to concat type embedding at the output of the descriptor. + repinit_neuron : List[int] + repinit block: the number of neurons in the embedding net. + repinit_axis_neuron : int + repinit block: the number of dimension of split in the + symmetrization op. + repinit_activation : str + repinit block: the activation function in the embedding net + repformer_nlayers : int + repformers block: the number of repformer layers + repformer_g1_dim : int + repformers block: the dimension of single-atom rep + repformer_g2_dim : int + repformers block: the dimension of invariant pair-atom rep + repformer_axis_dim : int + repformers block: the number of dimension of split in the + symmetrization ops. + repformer_do_bn_mode : bool + repformers block: do batch norm in the repformer layers + repformer_bn_momentum : float + repformers block: moment in the batch normalization + repformer_update_g1_has_conv : bool + repformers block: update the g1 rep with convolution term + repformer_update_g1_has_drrd : bool + repformers block: update the g1 rep with the drrd term + repformer_update_g1_has_grrg : bool + repformers block: update the g1 rep with the grrg term + repformer_update_g1_has_attn : bool + repformers block: update the g1 rep with the localized + self-attention + repformer_update_g2_has_g1g1 : bool + repformers block: update the g2 rep with the g1xg1 term + repformer_update_g2_has_attn : bool + repformers block: update the g2 rep with the gated self-attention + repformer_update_h2 : bool + repformers block: update the h2 rep + repformer_attn1_hidden : int + repformers block: the hidden dimension of localized self-attention + repformer_attn1_nhead : int + repformers block: the number of heads in localized self-attention + repformer_attn2_hidden : int + repformers block: the hidden dimension of gated self-attention + repformer_attn2_nhead : int + repformers block: the number of heads in gated self-attention + repformer_attn2_has_gate : bool + repformers block: has gate in the gated self-attention + repformer_activation : str + repformers block: the activation function in the MLPs. + repformer_update_style : str + repformers block: style of update a rep. + can be res_avg or res_incr. + res_avg updates a rep `u` with: + u = 1/\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) + res_incr updates a rep `u` with: + u = u + 1/\sqrt{n} (u_1 + u_2 + ... + u_n) + repformer_set_davg_zero : bool + repformers block: set the avg to zero in statistics + repformer_add_type_ebd_to_seq : bool + repformers block: concatenate the type embedding at the output. + trainable : bool + If the parameters in the descriptor are trainable. + exclude_types : List[Tuple[int, int]] = [], + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + + Returns + ------- + descriptor: torch.Tensor + the descriptor of shape nb x nloc x g1_dim. + invariant single-atom representation. + g2: torch.Tensor + invariant pair-atom representation. + h2: torch.Tensor + equivariant pair-atom representation. + rot_mat: torch.Tensor + rotation matrix for equivariant fittings + sw: torch.Tensor + The switch function for decaying inverse distance. + + """ + super().__init__() + del type, rcut, rcut_smth, sel + self.repinit = DescrptBlockSeAtten( + repinit_rcut, + repinit_rcut_smth, + repinit_nsel, + ntypes, + attn_layer=0, + neuron=repinit_neuron, + axis_neuron=repinit_axis_neuron, + tebd_dim=tebd_dim, + tebd_input_mode="concat", + # tebd_input_mode='dot_residual_s', + set_davg_zero=repinit_set_davg_zero, + exclude_types=exclude_types, + env_protection=env_protection, + activation_function=repinit_activation, + ) + self.repformers = DescrptBlockRepformers( + repformer_rcut, + repformer_rcut_smth, + repformer_nsel, + ntypes, + nlayers=repformer_nlayers, + g1_dim=repformer_g1_dim, + g2_dim=repformer_g2_dim, + axis_dim=repformer_axis_dim, + direct_dist=False, + do_bn_mode=repformer_do_bn_mode, + bn_momentum=repformer_bn_momentum, + update_g1_has_conv=repformer_update_g1_has_conv, + update_g1_has_drrd=repformer_update_g1_has_drrd, + update_g1_has_grrg=repformer_update_g1_has_grrg, + update_g1_has_attn=repformer_update_g1_has_attn, + update_g2_has_g1g1=repformer_update_g2_has_g1g1, + update_g2_has_attn=repformer_update_g2_has_attn, + update_h2=repformer_update_h2, + attn1_hidden=repformer_attn1_hidden, + attn1_nhead=repformer_attn1_nhead, + attn2_hidden=repformer_attn2_hidden, + attn2_nhead=repformer_attn2_nhead, + attn2_has_gate=repformer_attn2_has_gate, + activation_function=repformer_activation, + update_style=repformer_update_style, + set_davg_zero=repformer_set_davg_zero, + smooth=True, + add_type_ebd_to_seq=repformer_add_type_ebd_to_seq, + exclude_types=exclude_types, + env_protection=env_protection, + ) + self.type_embedding = TypeEmbedNet(ntypes, tebd_dim) + if self.repinit.dim_out == self.repformers.dim_in: + self.g1_shape_tranform = Identity() + else: + self.g1_shape_tranform = Linear( + self.repinit.dim_out, + self.repformers.dim_in, + bias=False, + init="glorot", + ) + assert self.repinit.rcut > self.repformers.rcut + assert self.repinit.sel[0] > self.repformers.sel[0] + self.concat_output_tebd = concat_output_tebd + self.tebd_dim = tebd_dim + self.rcut = self.repinit.get_rcut() + self.ntypes = ntypes + self.sel = self.repinit.sel + # set trainable + for param in self.parameters(): + param.requires_grad = trainable + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_out(self) -> int: + """Returns the output dimension of this descriptor.""" + ret = self.repformers.dim_out + if self.concat_output_tebd: + ret += self.tebd_dim + return ret + + def get_dim_emb(self) -> int: + """Returns the embedding dimension of this descriptor.""" + return self.repformers.dim_emb + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + # For DPA2 descriptors, the user-defined share-level + # shared_level: 0 + # share all parameters in type_embedding, repinit and repformers + if shared_level == 0: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + self.repinit.share_params(base_class.repinit, 0, resume=resume) + self._modules["g1_shape_tranform"] = base_class._modules[ + "g1_shape_tranform" + ] + self.repformers.share_params(base_class.repformers, 0, resume=resume) + # shared_level: 1 + # share all parameters in type_embedding and repinit + elif shared_level == 1: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + self.repinit.share_params(base_class.repinit, 0, resume=resume) + # shared_level: 2 + # share all parameters in type_embedding and repformers + elif shared_level == 2: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + self._modules["g1_shape_tranform"] = base_class._modules[ + "g1_shape_tranform" + ] + self.repformers.share_params(base_class.repformers, 0, resume=resume) + # shared_level: 3 + # share all parameters in type_embedding + elif shared_level == 3: + self._modules["type_embedding"] = base_class._modules["type_embedding"] + # Other shared levels + else: + raise NotImplementedError + + @property + def dim_out(self): + return self.get_dim_out() + + @property + def dim_emb(self): + """Returns the embedding dimension g2.""" + return self.get_dim_emb() + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + for ii, descrpt in enumerate([self.repinit, self.repformers]): + descrpt.compute_input_stats(merged, path) + + def serialize(self) -> dict: + """Serialize the obj to dict.""" + raise NotImplementedError + + @classmethod + def deserialize(cls) -> "DescrptDPA2": + """Deserialize from a dict.""" + raise NotImplementedError + + def forward( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, mapps extended region index to local region. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + sw + The smooth switch function. shape: nf x nloc x nnei + + """ + nframes, nloc, nnei = nlist.shape + nall = extended_coord.view(nframes, -1).shape[1] // 3 + # nlists + nlist_dict = build_multiple_neighbor_list( + extended_coord, + nlist, + [self.repformers.get_rcut(), self.repinit.get_rcut()], + [self.repformers.get_nsel(), self.repinit.get_nsel()], + ) + # repinit + g1_ext = self.type_embedding(extended_atype) + g1_inp = g1_ext[:, :nloc, :] + g1, _, _, _, _ = self.repinit( + nlist_dict[ + get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel()) + ], + extended_coord, + extended_atype, + g1_ext, + mapping, + ) + # linear to change shape + g1 = self.g1_shape_tranform(g1) + # mapping g1 + assert mapping is not None + mapping_ext = ( + mapping.view(nframes, nall).unsqueeze(-1).expand(-1, -1, g1.shape[-1]) + ) + g1_ext = torch.gather(g1, 1, mapping_ext) + # repformer + g1, g2, h2, rot_mat, sw = self.repformers( + nlist_dict[ + get_multiple_nlist_key( + self.repformers.get_rcut(), self.repformers.get_nsel() + ) + ], + extended_coord, + extended_atype, + g1_ext, + mapping, + ) + if self.concat_output_tebd: + g1 = torch.cat([g1, g1_inp], dim=-1) + return g1, rot_mat, g2, h2, sw + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + update_sel = UpdateSel() + local_jdata_cpy = update_sel.update_one_sel( + global_jdata, + local_jdata_cpy, + True, + rcut_key="repinit_rcut", + sel_key="repinit_nsel", + ) + local_jdata_cpy = update_sel.update_one_sel( + global_jdata, + local_jdata_cpy, + True, + rcut_key="repformer_rcut", + sel_key="repformer_nsel", + ) + return local_jdata_cpy diff --git a/deepmd/pt/model/descriptor/env_mat.py b/deepmd/pt/model/descriptor/env_mat.py new file mode 100644 index 0000000000..e89e7467d3 --- /dev/null +++ b/deepmd/pt/model/descriptor/env_mat.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +import torch + +from deepmd.pt.utils.preprocess import ( + compute_smooth_weight, +) + + +def _make_env_mat( + nlist, + coord, + rcut: float, + ruct_smth: float, + radial_only: bool = False, + protection: float = 0.0, +): + """Make smooth environment matrix.""" + bsz, natoms, nnei = nlist.shape + coord = coord.view(bsz, -1, 3) + nall = coord.shape[1] + mask = nlist >= 0 + # nlist = nlist * mask ## this impl will contribute nans in Hessian calculation. + nlist = torch.where(mask, nlist, nall - 1) + coord_l = coord[:, :natoms].view(bsz, -1, 1, 3) + index = nlist.view(bsz, -1).unsqueeze(-1).expand(-1, -1, 3) + coord_r = torch.gather(coord, 1, index) + coord_r = coord_r.view(bsz, natoms, nnei, 3) + diff = coord_r - coord_l + length = torch.linalg.norm(diff, dim=-1, keepdim=True) + # for index 0 nloc atom + length = length + ~mask.unsqueeze(-1) + t0 = 1 / (length + protection) + t1 = diff / (length + protection) ** 2 + weight = compute_smooth_weight(length, ruct_smth, rcut) + weight = weight * mask.unsqueeze(-1) + if radial_only: + env_mat = t0 * weight + else: + env_mat = torch.cat([t0, t1], dim=-1) * weight + return env_mat, diff * mask.unsqueeze(-1), weight + + +def prod_env_mat( + extended_coord, + nlist, + atype, + mean, + stddev, + rcut: float, + rcut_smth: float, + radial_only: bool = False, + protection: float = 0.0, +): + """Generate smooth environment matrix from atom coordinates and other context. + + Args: + - extended_coord: Copied atom coordinates with shape [nframes, nall*3]. + - atype: Atom types with shape [nframes, nloc]. + - mean: Average value of descriptor per element type with shape [len(sec), nnei, 4 or 1]. + - stddev: Standard deviation of descriptor per element type with shape [len(sec), nnei, 4 or 1]. + - rcut: Cut-off radius. + - rcut_smth: Smooth hyper-parameter for pair force & energy. + - radial_only: Whether to return a full description or a radial-only descriptor. + - protection: Protection parameter to prevent division by zero errors during calculations. + + Returns + ------- + - env_mat: Shape is [nframes, natoms[1]*nnei*4]. + """ + _env_mat_se_a, diff, switch = _make_env_mat( + nlist, + extended_coord, + rcut, + rcut_smth, + radial_only, + protection=protection, + ) # shape [n_atom, dim, 4 or 1] + t_avg = mean[atype] # [n_atom, dim, 4 or 1] + t_std = stddev[atype] # [n_atom, dim, 4 or 1] + env_mat_se_a = (_env_mat_se_a - t_avg) / t_std + return env_mat_se_a, diff, switch diff --git a/deepmd/pt/model/descriptor/gaussian_lcc.py b/deepmd/pt/model/descriptor/gaussian_lcc.py new file mode 100644 index 0000000000..e0708dd9e0 --- /dev/null +++ b/deepmd/pt/model/descriptor/gaussian_lcc.py @@ -0,0 +1,320 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, +) + +import torch +import torch.nn as nn + +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.network.network import ( + Evoformer3bEncoder, + GaussianEmbedding, + TypeEmbedNet, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.utils.path import ( + DPPath, +) + + +class DescrptGaussianLcc(torch.nn.Module, BaseDescriptor): + def __init__( + self, + rcut, + rcut_smth, + sel: int, + ntypes: int, + num_pair: int, + embed_dim: int = 768, + kernel_num: int = 128, + pair_embed_dim: int = 64, + num_block: int = 1, + layer_num: int = 12, + attn_head: int = 48, + pair_hidden_dim: int = 16, + ffn_embedding_dim: int = 768, + dropout: float = 0.0, + droppath_prob: float = 0.1, + pair_dropout: float = 0.25, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + pre_ln: bool = True, + do_tag_embedding: bool = False, + tag_ener_pref: bool = False, + atomic_sum_gbf: bool = False, + pre_add_seq: bool = True, + tri_update: bool = True, + **kwargs, + ): + """Construct a descriptor of Gaussian Based Local Cluster. + + Args: + - rcut: Cut-off radius. + - rcut_smth: Smooth hyper-parameter for pair force & energy. **Not used in this descriptor**. + - sel: For each element type, how many atoms is selected as neighbors. + - ntypes: Number of atom types. + - num_pair: Number of atom type pairs. Default is 2 * ntypes. + - kernel_num: Number of gaussian kernels. + - embed_dim: Dimension of atomic representation. + - pair_embed_dim: Dimension of pair representation. + - num_block: Number of evoformer blocks. + - layer_num: Number of attention layers. + - attn_head: Number of attention heads. + - pair_hidden_dim: Hidden dimension of pair representation during attention process. + - ffn_embedding_dim: Dimension during feed forward network. + - dropout: Dropout probability of atomic representation. + - droppath_prob: If not zero, it will use drop paths (Stochastic Depth) per sample and ignore `dropout`. + - pair_dropout: Dropout probability of pair representation during triangular update. + - attention_dropout: Dropout probability during attetion process. + - activation_dropout: Dropout probability of pair feed forward network. + - pre_ln: Do previous layer norm or not. + - do_tag_embedding: Add tag embedding to atomic and pair representations. (`tags`, `tags2`, `tags3` must exist) + - atomic_sum_gbf: Add sum of gaussian outputs to atomic representation or not. + - pre_add_seq: Add output of other descriptor (if has) to the atomic representation before attention. + """ + super().__init__() + self.rcut = rcut + self.rcut_smth = rcut_smth + self.embed_dim = embed_dim + self.num_pair = num_pair + self.kernel_num = kernel_num + self.pair_embed_dim = pair_embed_dim + self.num_block = num_block + self.layer_num = layer_num + self.attention_heads = attn_head + self.pair_hidden_dim = pair_hidden_dim + self.ffn_embedding_dim = ffn_embedding_dim + self.dropout = dropout + self.droppath_prob = droppath_prob + self.pair_dropout = pair_dropout + self.attention_dropout = attention_dropout + self.activation_dropout = activation_dropout + self.pre_ln = pre_ln + self.do_tag_embedding = do_tag_embedding + self.tag_ener_pref = tag_ener_pref + self.atomic_sum_gbf = atomic_sum_gbf + self.local_cluster = True + self.pre_add_seq = pre_add_seq + self.tri_update = tri_update + + if isinstance(sel, int): + sel = [sel] + + self.ntypes = ntypes + self.sec = torch.tensor(sel) + self.nnei = sum(sel) + + if self.do_tag_embedding: + self.tag_encoder = nn.Embedding(3, self.embed_dim) + self.tag_encoder2 = nn.Embedding(2, self.embed_dim) + self.tag_type_embedding = TypeEmbedNet(10, pair_embed_dim) + self.edge_type_embedding = nn.Embedding( + (ntypes + 1) * (ntypes + 1), + pair_embed_dim, + padding_idx=(ntypes + 1) * (ntypes + 1) - 1, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + ) + self.gaussian_encoder = GaussianEmbedding( + rcut, + kernel_num, + num_pair, + embed_dim, + pair_embed_dim, + sel, + ntypes, + atomic_sum_gbf, + ) + self.backbone = Evoformer3bEncoder( + self.nnei, + layer_num=self.layer_num, + attn_head=self.attention_heads, + atomic_dim=self.embed_dim, + pair_dim=self.pair_embed_dim, + pair_hidden_dim=self.pair_hidden_dim, + ffn_embedding_dim=self.ffn_embedding_dim, + dropout=self.dropout, + droppath_prob=self.droppath_prob, + pair_dropout=self.pair_dropout, + attention_dropout=self.attention_dropout, + activation_dropout=self.activation_dropout, + pre_ln=self.pre_ln, + tri_update=self.tri_update, + ) + + @property + def dim_out(self): + """Returns the output dimension of atomic representation.""" + return self.embed_dim + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return self.embed_dim + + @property + def dim_emb(self): + """Returns the output dimension of pair representation.""" + return self.pair_embed_dim + + def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None): + """Update mean and stddev for descriptor elements.""" + pass + + def forward( + self, + extended_coord, + nlist, + atype, + nlist_type, + nlist_loc=None, + atype_tebd=None, + nlist_tebd=None, + seq_input=None, + ): + """Calculate the atomic and pair representations of this descriptor. + + Args: + - extended_coord: Copied atom coordinates with shape [nframes, nall, 3]. + - nlist: Neighbor list with shape [nframes, nloc, nnei]. + - atype: Atom type with shape [nframes, nloc]. + - nlist_type: Atom type of neighbors with shape [nframes, nloc, nnei]. + - nlist_loc: Local index of neighbor list with shape [nframes, nloc, nnei]. + - atype_tebd: Atomic type embedding with shape [nframes, nloc, tebd_dim]. + - nlist_tebd: Type embeddings of neighbor with shape [nframes, nloc, nnei, tebd_dim]. + - seq_input: The sequential input from other descriptor with + shape [nframes, nloc, tebd_dim] or [nframes * nloc, 1 + nnei, tebd_dim] + + Returns + ------- + - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron]. + - ret: environment matrix with shape [nframes, nloc, self.neei, out_size] + """ + nframes, nloc = nlist.shape[:2] + nall = extended_coord.shape[1] + nlist2 = torch.cat( + [ + torch.arange(0, nloc, device=nlist.device) + .reshape(1, nloc, 1) + .expand(nframes, -1, -1), + nlist, + ], + dim=-1, + ) + nlist_loc2 = torch.cat( + [ + torch.arange(0, nloc, device=nlist_loc.device) + .reshape(1, nloc, 1) + .expand(nframes, -1, -1), + nlist_loc, + ], + dim=-1, + ) + nlist_type2 = torch.cat([atype.reshape(nframes, nloc, 1), nlist_type], dim=-1) + nnei2_mask = nlist2 != -1 + padding_mask = nlist2 == -1 + nlist2 = nlist2 * nnei2_mask + nlist_loc2 = nlist_loc2 * nnei2_mask + + # nframes x nloc x (1 + nnei2) x (1 + nnei2) + pair_mask = nnei2_mask.unsqueeze(-1) * nnei2_mask.unsqueeze(-2) + # nframes x nloc x (1 + nnei2) x (1 + nnei2) x head + attn_mask = torch.zeros( + [nframes, nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads], + device=nlist.device, + dtype=extended_coord.dtype, + ) + attn_mask.masked_fill_(padding_mask.unsqueeze(2).unsqueeze(-1), float("-inf")) + # (nframes x nloc) x head x (1 + nnei2) x (1 + nnei2) + attn_mask = ( + attn_mask.reshape( + nframes * nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads + ) + .permute(0, 3, 1, 2) + .contiguous() + ) + + # Atomic feature + # [(nframes x nloc) x (1 + nnei2) x tebd_dim] + atom_feature = torch.gather( + atype_tebd, + dim=1, + index=nlist_loc2.reshape(nframes, -1) + .unsqueeze(-1) + .expand(-1, -1, self.embed_dim), + ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim) + if self.pre_add_seq and seq_input is not None: + first_dim = seq_input.shape[0] + if first_dim == nframes * nloc: + atom_feature += seq_input + elif first_dim == nframes: + atom_feature_seq = torch.gather( + seq_input, + dim=1, + index=nlist_loc2.reshape(nframes, -1) + .unsqueeze(-1) + .expand(-1, -1, self.embed_dim), + ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim) + atom_feature += atom_feature_seq + else: + raise RuntimeError + atom_feature = atom_feature * nnei2_mask.reshape( + nframes * nloc, 1 + self.nnei, 1 + ) + + # Pair feature + # [(nframes x nloc) x (1 + nnei2)] + nlist_type2_reshape = nlist_type2.reshape(nframes * nloc, 1 + self.nnei) + # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2)] + edge_type = nlist_type2_reshape.unsqueeze(-1) * ( + self.ntypes + 1 + ) + nlist_type2_reshape.unsqueeze(-2) + # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim] + edge_feature = self.edge_type_embedding(edge_type) + + # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 2] + edge_type_2dim = torch.cat( + [ + nlist_type2_reshape.view(nframes * nloc, 1 + self.nnei, 1, 1).expand( + -1, -1, 1 + self.nnei, -1 + ), + nlist_type2_reshape.view(nframes * nloc, 1, 1 + self.nnei, 1).expand( + -1, 1 + self.nnei, -1, -1 + ) + + self.ntypes, + ], + dim=-1, + ) + # [(nframes x nloc) x (1 + nnei2) x 3] + coord_selected = torch.gather( + extended_coord.unsqueeze(1) + .expand(-1, nloc, -1, -1) + .reshape(nframes * nloc, nall, 3), + dim=1, + index=nlist2.reshape(nframes * nloc, 1 + self.nnei, 1).expand(-1, -1, 3), + ) + + # Update pair features (or and atomic features) with gbf features + # delta_pos: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 3]. + atomic_feature, pair_feature, delta_pos = self.gaussian_encoder( + coord_selected, atom_feature, edge_type_2dim, edge_feature + ) + # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim] + attn_bias = pair_feature + + # output: [(nframes x nloc) x (1 + nnei2) x tebd_dim] + # pair: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim] + output, pair = self.backbone( + atomic_feature, + pair=attn_bias, + attn_mask=attn_mask, + pair_mask=pair_mask, + atom_mask=nnei2_mask.reshape(nframes * nloc, 1 + self.nnei), + ) + + return output, pair, delta_pos, None diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py new file mode 100644 index 0000000000..204ca7589d --- /dev/null +++ b/deepmd/pt/model/descriptor/hybrid.py @@ -0,0 +1,525 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Union, +) + +import numpy as np +import torch + +from deepmd.pt.model.descriptor import ( + DescriptorBlock, +) +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.network.network import ( + Identity, + Linear, +) +from deepmd.pt.utils.nlist import ( + nlist_distinguish_types, +) +from deepmd.pt.utils.utils import ( + to_torch_tensor, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + + +@BaseDescriptor.register("hybrid") +class DescrptHybrid(BaseDescriptor, torch.nn.Module): + """Concate a list of descriptors to form a new descriptor. + + Parameters + ---------- + list : list : List[Union[BaseDescriptor, Dict[str, Any]]] + Build a descriptor from the concatenation of the list of descriptors. + The descriptor can be either an object or a dictionary. + """ + + def __init__( + self, + list: List[Union[BaseDescriptor, Dict[str, Any]]], + **kwargs, + ) -> None: + super().__init__() + # warning: list is conflict with built-in list + descrpt_list = list + if descrpt_list == [] or descrpt_list is None: + raise RuntimeError( + "cannot build descriptor from an empty list of descriptors." + ) + formatted_descript_list: List[BaseDescriptor] = [] + for ii in descrpt_list: + if isinstance(ii, BaseDescriptor): + formatted_descript_list.append(ii) + elif isinstance(ii, dict): + formatted_descript_list.append( + # pass other arguments (e.g. ntypes) to the descriptor + BaseDescriptor(**ii, **kwargs) + ) + else: + raise NotImplementedError + self.descrpt_list = torch.nn.ModuleList(formatted_descript_list) + self.numb_descrpt = len(self.descrpt_list) + for ii in range(1, self.numb_descrpt): + assert ( + self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes() + ), f"number of atom types in {ii}th descrptor does not match others" + # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type + self.nlist_cut_idx: List[torch.Tensor] = [] + if self.mixed_types() and not all( + descrpt.mixed_types() for descrpt in self.descrpt_list + ): + self.sel_no_mixed_types = np.max( + [ + descrpt.get_sel() + for descrpt in self.descrpt_list + if not descrpt.mixed_types() + ], + axis=0, + ).tolist() + else: + self.sel_no_mixed_types = None + for ii in range(self.numb_descrpt): + if self.mixed_types() == self.descrpt_list[ii].mixed_types(): + hybrid_sel = self.get_sel() + else: + assert self.sel_no_mixed_types is not None + hybrid_sel = self.sel_no_mixed_types + sub_sel = self.descrpt_list[ii].get_sel() + start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1] + end_idx = start_idx + np.array(sub_sel) + cut_idx = np.concatenate( + [range(ss, ee) for ss, ee in zip(start_idx, end_idx)] + ).astype(np.int64) + self.nlist_cut_idx.append(to_torch_tensor(cut_idx)) + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + # do not use numpy here - jit is not happy + return max([descrpt.get_rcut() for descrpt in self.descrpt_list]) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + if self.mixed_types(): + return [ + np.max( + [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0 + ).item() + ] + else: + return np.max( + [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0 + ).tolist() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.descrpt_list[0].get_ntypes() + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return sum([descrpt.get_dim_out() for descrpt in self.descrpt_list]) + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + return sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list]) + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return any(descrpt.mixed_types() for descrpt in self.descrpt_list) + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + if shared_level == 0: + for ii, des in enumerate(self.descrpt_list): + self.descrpt_list[ii].share_params( + base_class.descrpt_list[ii], shared_level, resume=resume + ) + else: + raise NotImplementedError + + def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None): + """Update mean and stddev for descriptor elements.""" + for descrpt in self.descrpt_list: + descrpt.compute_input_stats(merged, path) + + def forward( + self, + coord_ext: torch.Tensor, + atype_ext: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3. This descriptor returns None + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. this descriptor returns None + """ + out_descriptor = [] + out_gr = [] + out_g2: Optional[torch.Tensor] = None + out_h2: Optional[torch.Tensor] = None + out_sw: Optional[torch.Tensor] = None + if self.sel_no_mixed_types is not None: + nl_distinguish_types = nlist_distinguish_types( + nlist, + atype_ext, + self.sel_no_mixed_types, + ) + else: + nl_distinguish_types = None + # make jit happy + # for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx): + for ii, descrpt in enumerate(self.descrpt_list): + # cut the nlist to the correct length + if self.mixed_types() == descrpt.mixed_types(): + nl = nlist[:, :, self.nlist_cut_idx[ii]] + else: + # mixed_types is True, but descrpt.mixed_types is False + assert nl_distinguish_types is not None + nl = nl_distinguish_types[:, :, self.nlist_cut_idx[ii]] + odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping) + out_descriptor.append(odescriptor) + if gr is not None: + out_gr.append(gr) + out_descriptor = torch.cat(out_descriptor, dim=-1) + out_gr = torch.cat(out_gr, dim=-2) if out_gr else None + return out_descriptor, out_gr, out_g2, out_h2, out_sw + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["list"] = [ + BaseDescriptor.update_sel(global_jdata, sub_jdata) + for sub_jdata in local_jdata["list"] + ] + return local_jdata_cpy + + def serialize(self) -> dict: + return { + "@class": "Descriptor", + "type": "hybrid", + "@version": 1, + "list": [descrpt.serialize() for descrpt in self.descrpt_list], + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptHybrid": + data = data.copy() + class_name = data.pop("@class") + assert class_name == "Descriptor" + class_type = data.pop("type") + assert class_type == "hybrid" + check_version_compatibility(data.pop("@version"), 1, 1) + obj = cls( + list=[BaseDescriptor.deserialize(ii) for ii in data["list"]], + ) + return obj + + +@DescriptorBlock.register("hybrid") +class DescrptBlockHybrid(DescriptorBlock): + def __init__( + self, + list, + ntypes: int, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + hybrid_mode: str = "concat", + **kwargs, + ): + """Construct a hybrid descriptor. + + Args: + - descriptor_list: list of descriptors. + - descriptor_param: descriptor configs. + """ + super().__init__() + supported_descrpt = ["se_atten", "se_uni"] + descriptor_list = [] + for descriptor_param_item in list: + descriptor_type_tmp = descriptor_param_item["type"] + assert ( + descriptor_type_tmp in supported_descrpt + ), f"Only descriptors in {supported_descrpt} are supported for `hybrid` descriptor!" + descriptor_param_item["ntypes"] = ntypes + if descriptor_type_tmp == "se_atten": + descriptor_param_item["tebd_dim"] = tebd_dim + descriptor_param_item["tebd_input_mode"] = tebd_input_mode + descriptor_list.append(DescriptorBlock(**descriptor_param_item)) + self.descriptor_list = torch.nn.ModuleList(descriptor_list) + self.descriptor_param = list + self.rcut = [descrpt.rcut for descrpt in self.descriptor_list] + self.sec = [descrpt.sec for descrpt in self.descriptor_list] + self.sel = [descrpt.sel for descrpt in self.descriptor_list] + self.split_sel = [sum(ii) for ii in self.sel] + self.local_cluster_list = [ + descrpt.local_cluster for descrpt in self.descriptor_list + ] + self.local_cluster = True in self.local_cluster_list + self.hybrid_mode = hybrid_mode + self.tebd_dim = tebd_dim + assert self.hybrid_mode in ["concat", "sequential"] + sequential_transform = [] + if self.hybrid_mode == "sequential": + for ii in range(len(descriptor_list) - 1): + if descriptor_list[ii].dim_out == descriptor_list[ii + 1].dim_in: + sequential_transform.append(Identity()) + else: + sequential_transform.append( + Linear( + descriptor_list[ii].dim_out, + descriptor_list[ii + 1].dim_in, + bias=False, + init="glorot", + ) + ) + sequential_transform.append(Identity()) + self.sequential_transform = torch.nn.ModuleList(sequential_transform) + self.ntypes = ntypes + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return [sum(ii) for ii in self.get_sel()] + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return self.dim_in + + def get_dim_emb(self): + return self.dim_emb + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return all(descriptor.mixed_types() for descriptor in self.descriptor_list) + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + if self.hybrid_mode == "concat": + return sum([descrpt.dim_out for descrpt in self.descriptor_list]) + elif self.hybrid_mode == "sequential": + return self.descriptor_list[-1].dim_out + else: + raise RuntimeError + + @property + def dim_emb_list(self) -> List[int]: + """Returns the output dimension list of embeddings.""" + return [descrpt.dim_emb for descrpt in self.descriptor_list] + + @property + def dim_emb(self): + """Returns the output dimension of embedding.""" + if self.hybrid_mode == "concat": + return sum(self.dim_emb_list) + elif self.hybrid_mode == "sequential": + return self.descriptor_list[-1].dim_emb + else: + raise RuntimeError + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + if shared_level == 0: + for ii, des in enumerate(self.descriptor_list): + self.descriptor_list[ii].share_params( + base_class.descriptor_list[ii], shared_level, resume=resume + ) + else: + raise NotImplementedError + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + for ii, descrpt in enumerate(self.descriptor_list): + # need support for hybrid descriptors + descrpt.compute_input_stats(merged, path) + + def forward( + self, + nlist: torch.Tensor, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + extended_atype_embd: Optional[torch.Tensor] = None, + mapping: Optional[torch.Tensor] = None, + ): + """Calculate decoded embedding for each atom. + + Args: + - extended_coord: Tell atom coordinates with shape [nframes, natoms[1]*3]. + - nlist: Tell atom types with shape [nframes, natoms[1]]. + - atype: Tell atom count and element count. Its shape is [2+self.ntypes]. + - nlist_type: Tell simulation box with shape [nframes, 9]. + - atype_tebd: Tell simulation box with shape [nframes, 9]. + - nlist_tebd: Tell simulation box with shape [nframes, 9]. + + Returns + ------- + - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron]. + - ret: environment matrix with shape [nframes, nloc, self.neei, out_size] + """ + nlist_list = list(torch.split(nlist, self.split_sel, -1)) + nframes, nloc, nnei = nlist.shape + concat_rot_mat = True + if self.hybrid_mode == "concat": + out_descriptor = [] + # out_env_mat = [] + out_rot_mat_list = [] + # out_diff = [] + for ii, descrpt in enumerate(self.descriptor_list): + descriptor, env_mat, diff, rot_mat, sw = descrpt( + nlist_list[ii], + extended_coord, + extended_atype, + extended_atype_embd, + mapping, + ) + if descriptor.shape[0] == nframes * nloc: + # [nframes * nloc, 1 + nnei, emb_dim] + descriptor = descriptor[:, 0, :].reshape(nframes, nloc, -1) + out_descriptor.append(descriptor) + # out_env_mat.append(env_mat) + # out_diff.append(diff) + out_rot_mat_list.append(rot_mat) + if rot_mat is None: + concat_rot_mat = False + out_descriptor = torch.concat(out_descriptor, dim=-1) + if concat_rot_mat: + out_rot_mat = torch.concat(out_rot_mat_list, dim=-2) + else: + out_rot_mat = None + return out_descriptor, None, None, out_rot_mat, sw + elif self.hybrid_mode == "sequential": + assert extended_atype_embd is not None + assert mapping is not None + nframes, nloc, nnei = nlist.shape + nall = extended_coord.view(nframes, -1).shape[1] // 3 + seq_input_ext = extended_atype_embd + seq_input = ( + seq_input_ext[:, :nloc, :] if len(self.descriptor_list) == 0 else None + ) + env_mat, diff, rot_mat, sw = None, None, None, None + env_mat_list, diff_list = [], [] + for ii, (descrpt, seq_transform) in enumerate( + zip(self.descriptor_list, self.sequential_transform) + ): + seq_output, env_mat, diff, rot_mat, sw = descrpt( + nlist_list[ii], + extended_coord, + extended_atype, + seq_input_ext, + mapping, + ) + seq_input = seq_transform(seq_output) + mapping_ext = ( + mapping.view(nframes, nall) + .unsqueeze(-1) + .expand(-1, -1, seq_input.shape[-1]) + ) + seq_input_ext = torch.gather(seq_input, 1, mapping_ext) + env_mat_list.append(env_mat) + diff_list.append(diff) + return seq_input, env_mat_list, diff_list, rot_mat, sw + else: + raise RuntimeError diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py new file mode 100644 index 0000000000..a58d6b0e2c --- /dev/null +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -0,0 +1,749 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + List, +) + +import torch + +from deepmd.pt.model.network.network import ( + SimpleLinear, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.utils import ( + ActivationFn, +) + + +def torch_linear(*args, **kwargs): + return torch.nn.Linear( + *args, **kwargs, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + + +def _make_nei_g1( + g1_ext: torch.Tensor, + nlist: torch.Tensor, +) -> torch.Tensor: + # nlist: nb x nloc x nnei + nb, nloc, nnei = nlist.shape + # g1_ext: nb x nall x ng1 + ng1 = g1_ext.shape[-1] + # index: nb x (nloc x nnei) x ng1 + index = nlist.reshape(nb, nloc * nnei).unsqueeze(-1).expand(-1, -1, ng1) + # gg1 : nb x (nloc x nnei) x ng1 + gg1 = torch.gather(g1_ext, dim=1, index=index) + # gg1 : nb x nloc x nnei x ng1 + gg1 = gg1.view(nb, nloc, nnei, ng1) + return gg1 + + +def _apply_nlist_mask( + gg: torch.Tensor, + nlist_mask: torch.Tensor, +) -> torch.Tensor: + # gg: nf x nloc x nnei x ng + # msk: nf x nloc x nnei + return gg.masked_fill(~nlist_mask.unsqueeze(-1), 0.0) + + +def _apply_switch(gg: torch.Tensor, sw: torch.Tensor) -> torch.Tensor: + # gg: nf x nloc x nnei x ng + # sw: nf x nloc x nnei + return gg * sw.unsqueeze(-1) + + +def _apply_h_norm( + hh: torch.Tensor, # nf x nloc x nnei x 3 +) -> torch.Tensor: + """Normalize h by the std of vector length. + do not have an idea if this is a good way. + """ + nf, nl, nnei, _ = hh.shape + # nf x nloc x nnei + normh = torch.linalg.norm(hh, dim=-1) + # nf x nloc + std = torch.std(normh, dim=-1) + # nf x nloc x nnei x 3 + hh = hh[:, :, :, :] / (1.0 + std[:, :, None, None]) + return hh + + +class Atten2Map(torch.nn.Module): + def __init__( + self, + ni: int, + nd: int, + nh: int, + has_gate: bool = False, # apply gate to attn map + smooth: bool = True, + attnw_shift: float = 20.0, + ): + super().__init__() + self.ni = ni + self.nd = nd + self.nh = nh + self.mapqk = SimpleLinear(ni, nd * 2 * nh, bias=False) + self.has_gate = has_gate + self.smooth = smooth + self.attnw_shift = attnw_shift + + def forward( + self, + g2: torch.Tensor, # nb x nloc x nnei x ng2 + h2: torch.Tensor, # nb x nloc x nnei x 3 + nlist_mask: torch.Tensor, # nb x nloc x nnei + sw: torch.Tensor, # nb x nloc x nnei + ) -> torch.Tensor: + ( + nb, + nloc, + nnei, + _, + ) = g2.shape + nd, nh = self.nd, self.nh + # nb x nloc x nnei x nd x (nh x 2) + g2qk = self.mapqk(g2).view(nb, nloc, nnei, nd, nh * 2) + # nb x nloc x (nh x 2) x nnei x nd + g2qk = torch.permute(g2qk, (0, 1, 4, 2, 3)) + # nb x nloc x nh x nnei x nd + g2q, g2k = torch.split(g2qk, nh, dim=2) + # g2q = torch.nn.functional.normalize(g2q, dim=-1) + # g2k = torch.nn.functional.normalize(g2k, dim=-1) + # nb x nloc x nh x nnei x nnei + attnw = torch.matmul(g2q, torch.transpose(g2k, -1, -2)) / nd**0.5 + if self.has_gate: + gate = torch.matmul(h2, torch.transpose(h2, -1, -2)).unsqueeze(-3) + attnw = attnw * gate + # mask the attenmap, nb x nloc x 1 x 1 x nnei + attnw_mask = ~nlist_mask.unsqueeze(2).unsqueeze(2) + # mask the attenmap, nb x nloc x 1 x nnei x 1 + attnw_mask_c = ~nlist_mask.unsqueeze(2).unsqueeze(-1) + if self.smooth: + attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[ + :, :, None, None, : + ] - self.attnw_shift + else: + attnw = attnw.masked_fill( + attnw_mask, + float("-inf"), + ) + attnw = torch.softmax(attnw, dim=-1) + attnw = attnw.masked_fill( + attnw_mask, + 0.0, + ) + # nb x nloc x nh x nnei x nnei + attnw = attnw.masked_fill( + attnw_mask_c, + 0.0, + ) + if self.smooth: + attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :] + # nb x nloc x nnei x nnei + h2h2t = torch.matmul(h2, torch.transpose(h2, -1, -2)) / 3.0**0.5 + # nb x nloc x nh x nnei x nnei + ret = attnw * h2h2t[:, :, None, :, :] + # ret = torch.softmax(g2qk, dim=-1) + # nb x nloc x nnei x nnei x nh + ret = torch.permute(ret, (0, 1, 3, 4, 2)) + return ret + + +class Atten2MultiHeadApply(torch.nn.Module): + def __init__( + self, + ni: int, + nh: int, + ): + super().__init__() + self.ni = ni + self.nh = nh + self.mapv = SimpleLinear(ni, ni * nh, bias=False) + self.head_map = SimpleLinear(ni * nh, ni) + + def forward( + self, + AA: torch.Tensor, # nf x nloc x nnei x nnei x nh + g2: torch.Tensor, # nf x nloc x nnei x ng2 + ) -> torch.Tensor: + nf, nloc, nnei, ng2 = g2.shape + nh = self.nh + # nf x nloc x nnei x ng2 x nh + g2v = self.mapv(g2).view(nf, nloc, nnei, ng2, nh) + # nf x nloc x nh x nnei x ng2 + g2v = torch.permute(g2v, (0, 1, 4, 2, 3)) + # g2v = torch.nn.functional.normalize(g2v, dim=-1) + # nf x nloc x nh x nnei x nnei + AA = torch.permute(AA, (0, 1, 4, 2, 3)) + # nf x nloc x nh x nnei x ng2 + ret = torch.matmul(AA, g2v) + # nf x nloc x nnei x ng2 x nh + ret = torch.permute(ret, (0, 1, 3, 4, 2)).reshape(nf, nloc, nnei, (ng2 * nh)) + # nf x nloc x nnei x ng2 + return self.head_map(ret) + + +class Atten2EquiVarApply(torch.nn.Module): + def __init__( + self, + ni: int, + nh: int, + ): + super().__init__() + self.ni = ni + self.nh = nh + self.head_map = SimpleLinear(nh, 1, bias=False) + + def forward( + self, + AA: torch.Tensor, # nf x nloc x nnei x nnei x nh + h2: torch.Tensor, # nf x nloc x nnei x 3 + ) -> torch.Tensor: + nf, nloc, nnei, _ = h2.shape + nh = self.nh + # nf x nloc x nh x nnei x nnei + AA = torch.permute(AA, (0, 1, 4, 2, 3)) + h2m = torch.unsqueeze(h2, dim=2) + # nf x nloc x nh x nnei x 3 + h2m = torch.tile(h2m, [1, 1, nh, 1, 1]) + # nf x nloc x nh x nnei x 3 + ret = torch.matmul(AA, h2m) + # nf x nloc x nnei x 3 x nh + ret = torch.permute(ret, (0, 1, 3, 4, 2)).view(nf, nloc, nnei, 3, nh) + # nf x nloc x nnei x 3 + return torch.squeeze(self.head_map(ret), dim=-1) + + +class LocalAtten(torch.nn.Module): + def __init__( + self, + ni: int, + nd: int, + nh: int, + smooth: bool = True, + attnw_shift: float = 20.0, + ): + super().__init__() + self.ni = ni + self.nd = nd + self.nh = nh + self.mapq = SimpleLinear(ni, nd * 1 * nh, bias=False) + self.mapkv = SimpleLinear(ni, (nd + ni) * nh, bias=False) + self.head_map = SimpleLinear(ni * nh, ni) + self.smooth = smooth + self.attnw_shift = attnw_shift + + def forward( + self, + g1: torch.Tensor, # nb x nloc x ng1 + gg1: torch.Tensor, # nb x nloc x nnei x ng1 + nlist_mask: torch.Tensor, # nb x nloc x nnei + sw: torch.Tensor, # nb x nloc x nnei + ) -> torch.Tensor: + nb, nloc, nnei = nlist_mask.shape + ni, nd, nh = self.ni, self.nd, self.nh + assert ni == g1.shape[-1] + assert ni == gg1.shape[-1] + # nb x nloc x nd x nh + g1q = self.mapq(g1).view(nb, nloc, nd, nh) + # nb x nloc x nh x nd + g1q = torch.permute(g1q, (0, 1, 3, 2)) + # nb x nloc x nnei x (nd+ni) x nh + gg1kv = self.mapkv(gg1).view(nb, nloc, nnei, nd + ni, nh) + gg1kv = torch.permute(gg1kv, (0, 1, 4, 2, 3)) + # nb x nloc x nh x nnei x nd, nb x nloc x nh x nnei x ng1 + gg1k, gg1v = torch.split(gg1kv, [nd, ni], dim=-1) + + # nb x nloc x nh x 1 x nnei + attnw = torch.matmul(g1q.unsqueeze(-2), torch.transpose(gg1k, -1, -2)) / nd**0.5 + # nb x nloc x nh x nnei + attnw = attnw.squeeze(-2) + # mask the attenmap, nb x nloc x 1 x nnei + attnw_mask = ~nlist_mask.unsqueeze(-2) + # nb x nloc x nh x nnei + if self.smooth: + attnw = (attnw + self.attnw_shift) * sw.unsqueeze(-2) - self.attnw_shift + else: + attnw = attnw.masked_fill( + attnw_mask, + float("-inf"), + ) + attnw = torch.softmax(attnw, dim=-1) + attnw = attnw.masked_fill( + attnw_mask, + 0.0, + ) + if self.smooth: + attnw = attnw * sw.unsqueeze(-2) + + # nb x nloc x nh x ng1 + ret = ( + torch.matmul(attnw.unsqueeze(-2), gg1v).squeeze(-2).view(nb, nloc, nh * ni) + ) + # nb x nloc x ng1 + ret = self.head_map(ret) + return ret + + +class RepformerLayer(torch.nn.Module): + def __init__( + self, + rcut, + rcut_smth, + sel: int, + ntypes: int, + g1_dim=128, + g2_dim=16, + axis_dim: int = 4, + update_chnnl_2: bool = True, + do_bn_mode: str = "no", + bn_momentum: float = 0.1, + update_g1_has_conv: bool = True, + update_g1_has_drrd: bool = True, + update_g1_has_grrg: bool = True, + update_g1_has_attn: bool = True, + update_g2_has_g1g1: bool = True, + update_g2_has_attn: bool = True, + update_h2: bool = False, + attn1_hidden: int = 64, + attn1_nhead: int = 4, + attn2_hidden: int = 16, + attn2_nhead: int = 4, + attn2_has_gate: bool = False, + activation_function: str = "tanh", + update_style: str = "res_avg", + set_davg_zero: bool = True, # TODO + smooth: bool = True, + ): + super().__init__() + self.epsilon = 1e-4 # protection of 1./nnei + self.rcut = rcut + self.rcut_smth = rcut_smth + self.ntypes = ntypes + sel = [sel] if isinstance(sel, int) else sel + self.nnei = sum(sel) + assert len(sel) == 1 + self.sel = torch.tensor(sel, device=env.DEVICE) + self.sec = self.sel + self.axis_dim = axis_dim + self.set_davg_zero = set_davg_zero + self.do_bn_mode = do_bn_mode + self.bn_momentum = bn_momentum + self.act = ActivationFn(activation_function) + self.update_g1_has_grrg = update_g1_has_grrg + self.update_g1_has_drrd = update_g1_has_drrd + self.update_g1_has_conv = update_g1_has_conv + self.update_g1_has_attn = update_g1_has_attn + self.update_chnnl_2 = update_chnnl_2 + self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False + self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False + self.update_h2 = update_h2 if self.update_chnnl_2 else False + del update_g2_has_g1g1, update_g2_has_attn, update_h2 + self.update_style = update_style + self.smooth = smooth + self.g1_dim = g1_dim + self.g2_dim = g2_dim + + g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_dim) + self.linear1 = SimpleLinear(g1_in_dim, g1_dim) + self.linear2 = None + self.proj_g1g2 = None + self.proj_g1g1g2 = None + self.attn2g_map = None + self.attn2_mh_apply = None + self.attn2_lm = None + self.attn2h_map = None + self.attn2_ev_apply = None + self.loc_attn = None + + if self.update_chnnl_2: + self.linear2 = SimpleLinear(g2_dim, g2_dim) + if self.update_g1_has_conv: + self.proj_g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False) + if self.update_g2_has_g1g1: + self.proj_g1g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False) + if self.update_g2_has_attn: + self.attn2g_map = Atten2Map( + g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth + ) + self.attn2_mh_apply = Atten2MultiHeadApply(g2_dim, attn2_nhead) + self.attn2_lm = torch.nn.LayerNorm( + g2_dim, + elementwise_affine=True, + device=env.DEVICE, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + ) + if self.update_h2: + self.attn2h_map = Atten2Map( + g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth + ) + self.attn2_ev_apply = Atten2EquiVarApply(g2_dim, attn2_nhead) + if self.update_g1_has_attn: + self.loc_attn = LocalAtten(g1_dim, attn1_hidden, attn1_nhead, self.smooth) + + if self.do_bn_mode == "uniform": + self.bn1 = self._bn_layer() + self.bn2 = self._bn_layer() + elif self.do_bn_mode == "component": + self.bn1 = self._bn_layer(nf=g1_dim) + self.bn2 = self._bn_layer(nf=g2_dim) + elif self.do_bn_mode == "no": + self.bn1, self.bn2 = None, None + else: + raise RuntimeError(f"unknown bn_mode {self.do_bn_mode}") + + def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int: + ret = g1d + if self.update_g1_has_grrg: + ret += g2d * ax + if self.update_g1_has_drrd: + ret += g1d * ax + if self.update_g1_has_conv: + ret += g2d + return ret + + def _update_h2( + self, + g2: torch.Tensor, + h2: torch.Tensor, + nlist_mask: torch.Tensor, + sw: torch.Tensor, + ) -> torch.Tensor: + assert self.attn2h_map is not None + assert self.attn2_ev_apply is not None + nb, nloc, nnei, _ = g2.shape + # # nb x nloc x nnei x nh2 + # h2_1 = self.attn2_ev_apply(AA, h2) + # h2_update.append(h2_1) + # nb x nloc x nnei x nnei x nh + AAh = self.attn2h_map(g2, h2, nlist_mask, sw) + # nb x nloc x nnei x nh2 + h2_1 = self.attn2_ev_apply(AAh, h2) + return h2_1 + + def _update_g1_conv( + self, + gg1: torch.Tensor, + g2: torch.Tensor, + nlist_mask: torch.Tensor, + sw: torch.Tensor, + ) -> torch.Tensor: + assert self.proj_g1g2 is not None + nb, nloc, nnei, _ = g2.shape + ng1 = gg1.shape[-1] + ng2 = g2.shape[-1] + # gg1 : nb x nloc x nnei x ng2 + gg1 = self.proj_g1g2(gg1).view(nb, nloc, nnei, ng2) + # nb x nloc x nnei x ng2 + gg1 = _apply_nlist_mask(gg1, nlist_mask) + if not self.smooth: + # normalized by number of neighbors, not smooth + # nb x nloc x 1 + invnnei = 1.0 / (self.epsilon + torch.sum(nlist_mask, dim=-1)).unsqueeze(-1) + else: + gg1 = _apply_switch(gg1, sw) + invnnei = (1.0 / float(nnei)) * torch.ones( + (nb, nloc, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=gg1.device + ) + # nb x nloc x ng2 + g1_11 = torch.sum(g2 * gg1, dim=2) * invnnei + return g1_11 + + def _cal_h2g2( + self, + g2: torch.Tensor, + h2: torch.Tensor, + nlist_mask: torch.Tensor, + sw: torch.Tensor, + ) -> torch.Tensor: + # g2: nf x nloc x nnei x ng2 + # h2: nf x nloc x nnei x 3 + # msk: nf x nloc x nnei + nb, nloc, nnei, _ = g2.shape + ng2 = g2.shape[-1] + # nb x nloc x nnei x ng2 + g2 = _apply_nlist_mask(g2, nlist_mask) + if not self.smooth: + # nb x nloc + invnnei = 1.0 / (self.epsilon + torch.sum(nlist_mask, dim=-1)) + # nb x nloc x 1 x 1 + invnnei = invnnei.unsqueeze(-1).unsqueeze(-1) + else: + g2 = _apply_switch(g2, sw) + invnnei = (1.0 / float(nnei)) * torch.ones( + (nb, nloc, 1, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=g2.device + ) + # nb x nloc x 3 x ng2 + h2g2 = torch.matmul(torch.transpose(h2, -1, -2), g2) * invnnei + return h2g2 + + def _cal_grrg(self, h2g2: torch.Tensor) -> torch.Tensor: + # nb x nloc x 3 x ng2 + nb, nloc, _, ng2 = h2g2.shape + # nb x nloc x 3 x axis + h2g2m = torch.split(h2g2, self.axis_dim, dim=-1)[0] + # nb x nloc x axis x ng2 + g1_13 = torch.matmul(torch.transpose(h2g2m, -1, -2), h2g2) / (3.0**1) + # nb x nloc x (axisxng2) + g1_13 = g1_13.view(nb, nloc, self.axis_dim * ng2) + return g1_13 + + def _update_g1_grrg( + self, + g2: torch.Tensor, + h2: torch.Tensor, + nlist_mask: torch.Tensor, + sw: torch.Tensor, + ) -> torch.Tensor: + # g2: nf x nloc x nnei x ng2 + # h2: nf x nloc x nnei x 3 + # msk: nf x nloc x nnei + nb, nloc, nnei, _ = g2.shape + ng2 = g2.shape[-1] + # nb x nloc x 3 x ng2 + h2g2 = self._cal_h2g2(g2, h2, nlist_mask, sw) + # nb x nloc x (axisxng2) + g1_13 = self._cal_grrg(h2g2) + return g1_13 + + def _update_g2_g1g1( + self, + g1: torch.Tensor, # nb x nloc x ng1 + gg1: torch.Tensor, # nb x nloc x nnei x ng1 + nlist_mask: torch.Tensor, # nb x nloc x nnei + sw: torch.Tensor, # nb x nloc x nnei + ) -> torch.Tensor: + ret = g1.unsqueeze(-2) * gg1 + # nb x nloc x nnei x ng1 + ret = _apply_nlist_mask(ret, nlist_mask) + if self.smooth: + ret = _apply_switch(ret, sw) + return ret + + def _apply_bn( + self, + bn_number: int, + gg: torch.Tensor, + ): + if self.do_bn_mode == "uniform": + return self._apply_bn_uni(bn_number, gg) + elif self.do_bn_mode == "component": + return self._apply_bn_comp(bn_number, gg) + else: + return gg + + def _apply_nb_1(self, bn_number: int, gg: torch.Tensor) -> torch.Tensor: + nb, nl, nf = gg.shape + gg = gg.view([nb, 1, nl * nf]) + if bn_number == 1: + assert self.bn1 is not None + gg = self.bn1(gg) + else: + assert self.bn2 is not None + gg = self.bn2(gg) + return gg.view([nb, nl, nf]) + + def _apply_nb_2( + self, + bn_number: int, + gg: torch.Tensor, + ) -> torch.Tensor: + nb, nl, nnei, nf = gg.shape + gg = gg.view([nb, 1, nl * nnei * nf]) + if bn_number == 1: + assert self.bn1 is not None + gg = self.bn1(gg) + else: + assert self.bn2 is not None + gg = self.bn2(gg) + return gg.view([nb, nl, nnei, nf]) + + def _apply_bn_uni( + self, + bn_number: int, + gg: torch.Tensor, + mode: str = "1", + ) -> torch.Tensor: + if len(gg.shape) == 3: + return self._apply_nb_1(bn_number, gg) + elif len(gg.shape) == 4: + return self._apply_nb_2(bn_number, gg) + else: + raise RuntimeError(f"unsupported input shape {gg.shape}") + + def _apply_bn_comp( + self, + bn_number: int, + gg: torch.Tensor, + ) -> torch.Tensor: + ss = gg.shape + nf = ss[-1] + gg = gg.view([-1, nf]) + if bn_number == 1: + assert self.bn1 is not None + gg = self.bn1(gg).view(ss) + else: + assert self.bn2 is not None + gg = self.bn2(gg).view(ss) + return gg + + def forward( + self, + g1_ext: torch.Tensor, # nf x nall x ng1 + g2: torch.Tensor, # nf x nloc x nnei x ng2 + h2: torch.Tensor, # nf x nloc x nnei x 3 + nlist: torch.Tensor, # nf x nloc x nnei + nlist_mask: torch.Tensor, # nf x nloc x nnei + sw: torch.Tensor, # switch func, nf x nloc x nnei + ): + """ + Parameters + ---------- + g1_ext : nf x nall x ng1 extended single-atom chanel + g2 : nf x nloc x nnei x ng2 pair-atom channel, invariant + h2 : nf x nloc x nnei x 3 pair-atom channel, equivariant + nlist : nf x nloc x nnei neighbor list (padded neis are set to 0) + nlist_mask : nf x nloc x nnei masks of the neighbor list. real nei 1 otherwise 0 + sw : nf x nloc x nnei switch function + + Returns + ------- + g1: nf x nloc x ng1 updated single-atom chanel + g2: nf x nloc x nnei x ng2 updated pair-atom channel, invariant + h2: nf x nloc x nnei x 3 updated pair-atom channel, equivariant + """ + cal_gg1 = ( + self.update_g1_has_drrd + or self.update_g1_has_conv + or self.update_g1_has_attn + or self.update_g2_has_g1g1 + ) + + nb, nloc, nnei, _ = g2.shape + nall = g1_ext.shape[1] + g1, _ = torch.split(g1_ext, [nloc, nall - nloc], dim=1) + assert (nb, nloc) == g1.shape[:2] + assert (nb, nloc, nnei) == h2.shape[:3] + ng1 = g1.shape[-1] + ng2 = g2.shape[-1] + nh2 = h2.shape[-1] + + if self.bn1 is not None: + g1 = self._apply_bn(1, g1) + if self.bn2 is not None: + g2 = self._apply_bn(2, g2) + if self.update_h2: + h2 = _apply_h_norm(h2) + + g2_update: List[torch.Tensor] = [g2] + h2_update: List[torch.Tensor] = [h2] + g1_update: List[torch.Tensor] = [g1] + g1_mlp: List[torch.Tensor] = [g1] + + if cal_gg1: + gg1 = _make_nei_g1(g1_ext, nlist) + else: + gg1 = None + + if self.update_chnnl_2: + # nb x nloc x nnei x ng2 + assert self.linear2 is not None + g2_1 = self.act(self.linear2(g2)) + g2_update.append(g2_1) + + if self.update_g2_has_g1g1: + assert gg1 is not None + assert self.proj_g1g1g2 is not None + g2_update.append( + self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw)) + ) + + if self.update_g2_has_attn: + assert self.attn2g_map is not None + assert self.attn2_mh_apply is not None + assert self.attn2_lm is not None + # nb x nloc x nnei x nnei x nh + AAg = self.attn2g_map(g2, h2, nlist_mask, sw) + # nb x nloc x nnei x ng2 + g2_2 = self.attn2_mh_apply(AAg, g2) + g2_2 = self.attn2_lm(g2_2) + g2_update.append(g2_2) + + if self.update_h2: + h2_update.append(self._update_h2(g2, h2, nlist_mask, sw)) + + if self.update_g1_has_conv: + assert gg1 is not None + g1_mlp.append(self._update_g1_conv(gg1, g2, nlist_mask, sw)) + + if self.update_g1_has_grrg: + g1_mlp.append(self._update_g1_grrg(g2, h2, nlist_mask, sw)) + + if self.update_g1_has_drrd: + assert gg1 is not None + g1_mlp.append(self._update_g1_grrg(gg1, h2, nlist_mask, sw)) + + # nb x nloc x [ng1+ng2+(axisxng2)+(axisxng1)] + # conv grrg drrd + g1_1 = self.act(self.linear1(torch.cat(g1_mlp, dim=-1))) + g1_update.append(g1_1) + + if self.update_g1_has_attn: + assert gg1 is not None + assert self.loc_attn is not None + g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw)) + + # update + if self.update_chnnl_2: + g2_new = self.list_update(g2_update) + h2_new = self.list_update(h2_update) + else: + g2_new, h2_new = g2, h2 + g1_new = self.list_update(g1_update) + return g1_new, g2_new, h2_new + + @torch.jit.export + def list_update_res_avg( + self, + update_list: List[torch.Tensor], + ) -> torch.Tensor: + nitem = len(update_list) + uu = update_list[0] + for ii in range(1, nitem): + uu = uu + update_list[ii] + return uu / (float(nitem) ** 0.5) + + @torch.jit.export + def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor: + nitem = len(update_list) + uu = update_list[0] + scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0 + for ii in range(1, nitem): + uu = uu + scale * update_list[ii] + return uu + + @torch.jit.export + def list_update(self, update_list: List[torch.Tensor]) -> torch.Tensor: + if self.update_style == "res_avg": + return self.list_update_res_avg(update_list) + elif self.update_style == "res_incr": + return self.list_update_res_incr(update_list) + else: + raise RuntimeError(f"unknown update style {self.update_style}") + + def _bn_layer( + self, + nf: int = 1, + ) -> Callable: + return torch.nn.BatchNorm1d( + nf, + eps=1e-5, + momentum=self.bn_momentum, + affine=False, + track_running_stats=True, + device=env.DEVICE, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + ) diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py new file mode 100644 index 0000000000..16a38052b1 --- /dev/null +++ b/deepmd/pt/model/descriptor/repformers.py @@ -0,0 +1,345 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) + +import torch + +from deepmd.pt.model.descriptor.descriptor import ( + DescriptorBlock, +) +from deepmd.pt.model.descriptor.env_mat import ( + prod_env_mat, +) +from deepmd.pt.model.network.network import ( + SimpleLinear, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env_mat_stat import ( + EnvMatStatSe, +) +from deepmd.pt.utils.exclude_mask import ( + PairExcludeMask, +) +from deepmd.pt.utils.utils import ( + ActivationFn, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) + +from .repformer_layer import ( + RepformerLayer, +) + +mydtype = env.GLOBAL_PT_FLOAT_PRECISION +mydev = env.DEVICE + + +def torch_linear(*args, **kwargs): + return torch.nn.Linear(*args, **kwargs, dtype=mydtype, device=mydev) + + +simple_linear = SimpleLinear +mylinear = simple_linear + + +@DescriptorBlock.register("se_repformer") +@DescriptorBlock.register("se_uni") +class DescrptBlockRepformers(DescriptorBlock): + def __init__( + self, + rcut, + rcut_smth, + sel: int, + ntypes: int, + nlayers: int = 3, + g1_dim=128, + g2_dim=16, + axis_dim: int = 4, + direct_dist: bool = False, + do_bn_mode: str = "no", + bn_momentum: float = 0.1, + update_g1_has_conv: bool = True, + update_g1_has_drrd: bool = True, + update_g1_has_grrg: bool = True, + update_g1_has_attn: bool = True, + update_g2_has_g1g1: bool = True, + update_g2_has_attn: bool = True, + update_h2: bool = False, + attn1_hidden: int = 64, + attn1_nhead: int = 4, + attn2_hidden: int = 16, + attn2_nhead: int = 4, + attn2_has_gate: bool = False, + activation_function: str = "tanh", + update_style: str = "res_avg", + set_davg_zero: bool = True, # TODO + smooth: bool = True, + add_type_ebd_to_seq: bool = False, + exclude_types: List[Tuple[int, int]] = [], + env_protection: float = 0.0, + type: Optional[str] = None, + ): + """ + smooth: + If strictly smooth, cannot be used with update_g1_has_attn + add_type_ebd_to_seq: + At the presence of seq_input (optional input to forward), + whether or not add an type embedding to seq_input. + If no seq_input is given, it has no effect. + """ + super().__init__() + del type + self.epsilon = 1e-4 # protection of 1./nnei + self.rcut = rcut + self.rcut_smth = rcut_smth + self.ntypes = ntypes + self.nlayers = nlayers + sel = [sel] if isinstance(sel, int) else sel + self.nnei = sum(sel) + self.ndescrpt = self.nnei * 4 # use full descriptor. + assert len(sel) == 1 + self.sel = sel + self.sec = self.sel + self.split_sel = self.sel + self.axis_dim = axis_dim + self.set_davg_zero = set_davg_zero + self.g1_dim = g1_dim + self.g2_dim = g2_dim + self.act = ActivationFn(activation_function) + self.direct_dist = direct_dist + self.add_type_ebd_to_seq = add_type_ebd_to_seq + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + self.env_protection = env_protection + + self.g2_embd = mylinear(1, self.g2_dim) + layers = [] + for ii in range(nlayers): + layers.append( + RepformerLayer( + rcut, + rcut_smth, + sel, + ntypes, + self.g1_dim, + self.g2_dim, + axis_dim=self.axis_dim, + update_chnnl_2=(ii != nlayers - 1), + do_bn_mode=do_bn_mode, + bn_momentum=bn_momentum, + update_g1_has_conv=update_g1_has_conv, + update_g1_has_drrd=update_g1_has_drrd, + update_g1_has_grrg=update_g1_has_grrg, + update_g1_has_attn=update_g1_has_attn, + update_g2_has_g1g1=update_g2_has_g1g1, + update_g2_has_attn=update_g2_has_attn, + update_h2=update_h2, + attn1_hidden=attn1_hidden, + attn1_nhead=attn1_nhead, + attn2_has_gate=attn2_has_gate, + attn2_hidden=attn2_hidden, + attn2_nhead=attn2_nhead, + activation_function=activation_function, + update_style=update_style, + smooth=smooth, + ) + ) + self.layers = torch.nn.ModuleList(layers) + + sshape = (self.ntypes, self.nnei, 4) + mean = torch.zeros(sshape, dtype=mydtype, device=mydev) + stddev = torch.ones(sshape, dtype=mydtype, device=mydev) + self.register_buffer("mean", mean) + self.register_buffer("stddev", stddev) + self.stats = None + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return self.dim_in + + def get_dim_emb(self) -> int: + """Returns the embedding dimension g2.""" + return self.g2_dim + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.g1_dim + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return self.g1_dim + + @property + def dim_emb(self): + """Returns the embedding dimension g2.""" + return self.get_dim_emb() + + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def forward( + self, + nlist: torch.Tensor, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + extended_atype_embd: Optional[torch.Tensor] = None, + mapping: Optional[torch.Tensor] = None, + ): + assert mapping is not None + assert extended_atype_embd is not None + nframes, nloc, nnei = nlist.shape + nall = extended_coord.view(nframes, -1).shape[1] // 3 + atype = extended_atype[:, :nloc] + # nb x nloc x nnei x 4, nb x nloc x nnei x 3, nb x nloc x nnei x 1 + dmatrix, diff, sw = prod_env_mat( + extended_coord, + nlist, + atype, + self.mean, + self.stddev, + self.rcut, + self.rcut_smth, + protection=self.env_protection, + ) + nlist_mask = nlist != -1 + sw = torch.squeeze(sw, -1) + # beyond the cutoff sw should be 0.0 + sw = sw.masked_fill(~nlist_mask, 0.0) + + # [nframes, nloc, tebd_dim] + atype_embd = extended_atype_embd[:, :nloc, :] + assert list(atype_embd.shape) == [nframes, nloc, self.g1_dim] + + g1 = self.act(atype_embd) + # nb x nloc x nnei x 1, nb x nloc x nnei x 3 + if not self.direct_dist: + g2, h2 = torch.split(dmatrix, [1, 3], dim=-1) + else: + g2, h2 = torch.linalg.norm(diff, dim=-1, keepdim=True), diff + g2 = g2 / self.rcut + h2 = h2 / self.rcut + # nb x nloc x nnei x ng2 + g2 = self.act(self.g2_embd(g2)) + + # set all padding positions to index of 0 + # if the a neighbor is real or not is indicated by nlist_mask + nlist[nlist == -1] = 0 + # nb x nall x ng1 + mapping = mapping.view(nframes, nall).unsqueeze(-1).expand(-1, -1, self.g1_dim) + for idx, ll in enumerate(self.layers): + # g1: nb x nloc x ng1 + # g1_ext: nb x nall x ng1 + g1_ext = torch.gather(g1, 1, mapping) + g1, g2, h2 = ll.forward( + g1_ext, + g2, + h2, + nlist, + nlist_mask, + sw, + ) + + # uses the last layer. + # nb x nloc x 3 x ng2 + h2g2 = ll._cal_h2g2(g2, h2, nlist_mask, sw) + # (nb x nloc) x ng2 x 3 + rot_mat = torch.permute(h2g2, (0, 1, 3, 2)) + + return g1, g2, h2, rot_mat.view(-1, nloc, self.dim_emb, 3), sw + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + env_mat_stat = EnvMatStatSe(self) + if path is not None: + path = path / env_mat_stat.get_hash() + if path is None or not path.is_dir(): + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + else: + sampled = [] + env_mat_stat.load_or_compute_stats(sampled, path) + self.stats = env_mat_stat.stats + mean, stddev = env_mat_stat() + if not self.set_davg_zero: + self.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + + def get_stats(self) -> Dict[str, StatItem]: + """Get the statistics of the descriptor.""" + if self.stats is None: + raise RuntimeError( + "The statistics of the descriptor has not been computed." + ) + return self.stats diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py new file mode 100644 index 0000000000..e17b7c5d54 --- /dev/null +++ b/deepmd/pt/model/descriptor/se_a.py @@ -0,0 +1,657 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import itertools +from typing import ( + Callable, + ClassVar, + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.pt.model.descriptor import ( + DescriptorBlock, + prod_env_mat, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + PRECISION_DICT, + RESERVED_PRECISON_DICT, +) +from deepmd.pt.utils.env_mat_stat import ( + EnvMatStatSe, +) +from deepmd.pt.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +try: + from typing import ( + Final, + ) +except ImportError: + from torch.jit import Final + +from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.pt.model.network.mlp import ( + EmbeddingNet, + NetworkCollection, +) +from deepmd.pt.model.network.network import ( + TypeFilter, +) +from deepmd.pt.utils.exclude_mask import ( + PairExcludeMask, +) + +from .base_descriptor import ( + BaseDescriptor, +) + + +@BaseDescriptor.register("se_e2_a") +@BaseDescriptor.register("se_a") +class DescrptSeA(BaseDescriptor, torch.nn.Module): + def __init__( + self, + rcut, + rcut_smth, + sel, + neuron=[25, 50, 100], + axis_neuron=16, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = "float64", + resnet_dt: bool = False, + exclude_types: List[Tuple[int, int]] = [], + env_protection: float = 0.0, + old_impl: bool = False, + type_one_side: bool = True, + **kwargs, + ): + super().__init__() + self.sea = DescrptBlockSeA( + rcut, + rcut_smth, + sel, + neuron=neuron, + axis_neuron=axis_neuron, + set_davg_zero=set_davg_zero, + activation_function=activation_function, + precision=precision, + resnet_dt=resnet_dt, + exclude_types=exclude_types, + env_protection=env_protection, + old_impl=old_impl, + type_one_side=type_one_side, + **kwargs, + ) + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.sea.get_rcut() + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return self.sea.get_nsel() + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sea.get_sel() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.sea.get_ntypes() + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.sea.get_dim_out() + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + return self.sea.get_dim_emb() + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return self.sea.mixed_types() + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + # For SeA descriptors, the user-defined share-level + # shared_level: 0 + # share all parameters in sea + if shared_level == 0: + self.sea.share_params(base_class.sea, 0, resume=resume) + # Other shared levels + else: + raise NotImplementedError + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.sea.dim_out + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + return self.sea.compute_input_stats(merged, path) + + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + """Update the type exclusions.""" + self.sea.reinit_exclude(exclude_types) + + def forward( + self, + coord_ext: torch.Tensor, + atype_ext: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. + + """ + return self.sea.forward(nlist, coord_ext, atype_ext, None, mapping) + + def set_stat_mean_and_stddev( + self, + mean: torch.Tensor, + stddev: torch.Tensor, + ) -> None: + self.sea.mean = mean + self.sea.stddev = stddev + + def serialize(self) -> dict: + obj = self.sea + return { + "@class": "Descriptor", + "type": "se_e2_a", + "@version": 1, + "rcut": obj.rcut, + "rcut_smth": obj.rcut_smth, + "sel": obj.sel, + "neuron": obj.neuron, + "axis_neuron": obj.axis_neuron, + "resnet_dt": obj.resnet_dt, + "set_davg_zero": obj.set_davg_zero, + "activation_function": obj.activation_function, + # make deterministic + "precision": RESERVED_PRECISON_DICT[obj.prec], + "embeddings": obj.filter_layers.serialize(), + "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(), + "exclude_types": obj.exclude_types, + "env_protection": obj.env_protection, + "@variables": { + "davg": obj["davg"].detach().cpu().numpy(), + "dstd": obj["dstd"].detach().cpu().numpy(), + }, + ## to be updated when the options are supported. + "trainable": True, + "type_one_side": obj.type_one_side, + "spin": None, + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptSeA": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("type", None) + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + env_mat = data.pop("env_mat") + obj = cls(**data) + + def t_cvt(xx): + return torch.tensor(xx, dtype=obj.sea.prec, device=env.DEVICE) + + obj.sea["davg"] = t_cvt(variables["davg"]) + obj.sea["dstd"] = t_cvt(variables["dstd"]) + obj.sea.filter_layers = NetworkCollection.deserialize(embeddings) + return obj + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False) + + +@DescriptorBlock.register("se_e2_a") +class DescrptBlockSeA(DescriptorBlock): + ndescrpt: Final[int] + __constants__: ClassVar[list] = ["ndescrpt"] + + def __init__( + self, + rcut, + rcut_smth, + sel, + neuron=[25, 50, 100], + axis_neuron=16, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = "float64", + resnet_dt: bool = False, + exclude_types: List[Tuple[int, int]] = [], + env_protection: float = 0.0, + old_impl: bool = False, + type_one_side: bool = True, + trainable: bool = True, + **kwargs, + ): + """Construct an embedding net of type `se_a`. + + Args: + - rcut: Cut-off radius. + - rcut_smth: Smooth hyper-parameter for pair force & energy. + - sel: For each element type, how many atoms is selected as neighbors. + - filter_neuron: Number of neurons in each hidden layers of the embedding net. + - axis_neuron: Number of columns of the sub-matrix of the embedding matrix. + """ + super().__init__() + self.rcut = rcut + self.rcut_smth = rcut_smth + self.neuron = neuron + self.filter_neuron = self.neuron + self.axis_neuron = axis_neuron + self.set_davg_zero = set_davg_zero + self.activation_function = activation_function + self.precision = precision + self.prec = PRECISION_DICT[self.precision] + self.resnet_dt = resnet_dt + self.old_impl = old_impl + self.env_protection = env_protection + self.ntypes = len(sel) + self.type_one_side = type_one_side + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + + self.sel = sel + # should be on CPU to avoid D2H, as it is used as slice index + self.sec = [0, *np.cumsum(self.sel).tolist()] + self.split_sel = self.sel + self.nnei = sum(sel) + self.ndescrpt = self.nnei * 4 + + wanted_shape = (self.ntypes, self.nnei, 4) + mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE) + stddev = torch.ones(wanted_shape, dtype=self.prec, device=env.DEVICE) + self.register_buffer("mean", mean) + self.register_buffer("stddev", stddev) + self.filter_layers_old = None + self.filter_layers = None + + if self.old_impl: + if not self.type_one_side: + raise ValueError( + "The old implementation does not support type_one_side=False." + ) + filter_layers = [] + # TODO: remove + start_index = 0 + for type_i in range(self.ntypes): + one = TypeFilter(start_index, sel[type_i], self.filter_neuron) + filter_layers.append(one) + start_index += sel[type_i] + self.filter_layers_old = torch.nn.ModuleList(filter_layers) + else: + ndim = 1 if self.type_one_side else 2 + filter_layers = NetworkCollection( + ndim=ndim, ntypes=len(sel), network_type="embedding_network" + ) + for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim): + filter_layers[embedding_idx] = EmbeddingNet( + 1, + self.filter_neuron, + activation_function=self.activation_function, + precision=self.precision, + resnet_dt=self.resnet_dt, + ) + self.filter_layers = filter_layers + self.stats = None + # set trainable + for param in self.parameters(): + param.requires_grad = trainable + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + return self.neuron[-1] + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return self.dim_in + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return False + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.filter_neuron[-1] * self.axis_neuron + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return 0 + + def __setitem__(self, key, value): + if key in ("avg", "data_avg", "davg"): + self.mean = value + elif key in ("std", "data_std", "dstd"): + self.stddev = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.mean + elif key in ("std", "data_std", "dstd"): + return self.stddev + else: + raise KeyError(key) + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + env_mat_stat = EnvMatStatSe(self) + if path is not None: + path = path / env_mat_stat.get_hash() + if path is None or not path.is_dir(): + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + else: + sampled = [] + env_mat_stat.load_or_compute_stats(sampled, path) + self.stats = env_mat_stat.stats + mean, stddev = env_mat_stat() + if not self.set_davg_zero: + self.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + + def get_stats(self) -> Dict[str, StatItem]: + """Get the statistics of the descriptor.""" + if self.stats is None: + raise RuntimeError( + "The statistics of the descriptor has not been computed." + ) + return self.stats + + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def forward( + self, + nlist: torch.Tensor, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + extended_atype_embd: Optional[torch.Tensor] = None, + mapping: Optional[torch.Tensor] = None, + ): + """Calculate decoded embedding for each atom. + + Args: + - coord: Tell atom coordinates with shape [nframes, natoms[1]*3]. + - atype: Tell atom types with shape [nframes, natoms[1]]. + - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. + - box: Tell simulation box with shape [nframes, 9]. + + Returns + ------- + - `torch.Tensor`: descriptor matrix with shape [nframes, natoms[0]*self.filter_neuron[-1]*self.axis_neuron]. + """ + del extended_atype_embd, mapping + nloc = nlist.shape[1] + atype = extended_atype[:, :nloc] + dmatrix, diff, sw = prod_env_mat( + extended_coord, + nlist, + atype, + self.mean, + self.stddev, + self.rcut, + self.rcut_smth, + protection=self.env_protection, + ) + + if self.old_impl: + assert self.filter_layers_old is not None + dmatrix = dmatrix.view( + -1, self.ndescrpt + ) # shape is [nframes*nall, self.ndescrpt] + xyz_scatter = torch.empty( + 1, + device=env.DEVICE, + ) + ret = self.filter_layers_old[0](dmatrix) + xyz_scatter = ret + for ii, transform in enumerate(self.filter_layers_old[1:]): + # shape is [nframes*nall, 4, self.filter_neuron[-1]] + ret = transform.forward(dmatrix) + xyz_scatter = xyz_scatter + ret + else: + assert self.filter_layers is not None + dmatrix = dmatrix.view(-1, self.nnei, 4) + dmatrix = dmatrix.to(dtype=self.prec) + nfnl = dmatrix.shape[0] + # pre-allocate a shape to pass jit + xyz_scatter = torch.zeros( + [nfnl, 4, self.filter_neuron[-1]], + dtype=self.prec, + device=extended_coord.device, + ) + # nfnl x nnei + exclude_mask = self.emask(nlist, extended_atype).view(nfnl, -1) + for embedding_idx, ll in enumerate(self.filter_layers.networks): + if self.type_one_side: + ii = embedding_idx + # torch.jit is not happy with slice(None) + # ti_mask = torch.ones(nfnl, dtype=torch.bool, device=dmatrix.device) + # applying a mask seems to cause performance degradation + ti_mask = None + else: + # ti: center atom type, ii: neighbor type... + ii = embedding_idx // self.ntypes + ti = embedding_idx % self.ntypes + ti_mask = atype.ravel().eq(ti) + # nfnl x nt + if ti_mask is not None: + mm = exclude_mask[ti_mask, self.sec[ii] : self.sec[ii + 1]] + else: + mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]] + # nfnl x nt x 4 + if ti_mask is not None: + rr = dmatrix[ti_mask, self.sec[ii] : self.sec[ii + 1], :] + else: + rr = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :] + rr = rr * mm[:, :, None] + ss = rr[:, :, :1] + # nfnl x nt x ng + gg = ll.forward(ss) + # nfnl x 4 x ng + gr = torch.matmul(rr.permute(0, 2, 1), gg) + if ti_mask is not None: + xyz_scatter[ti_mask] += gr + else: + xyz_scatter += gr + + xyz_scatter /= self.nnei + xyz_scatter_1 = xyz_scatter.permute(0, 2, 1) + rot_mat = xyz_scatter_1[:, :, 1:4] + xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron] + result = torch.matmul( + xyz_scatter_1, xyz_scatter_2 + ) # shape is [nframes*nall, self.filter_neuron[-1], self.axis_neuron] + result = result.view(-1, nloc, self.filter_neuron[-1] * self.axis_neuron) + rot_mat = rot_mat.view([-1, nloc] + list(rot_mat.shape[1:])) # noqa:RUF005 + return ( + result.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION), + rot_mat.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION), + None, + None, + sw, + ) + + +def analyze_descrpt(matrix, ndescrpt, natoms): + """Collect avg, square avg and count of descriptors in a batch.""" + ntypes = natoms.shape[1] - 2 + start_index = 0 + sysr = [] + sysa = [] + sysn = [] + sysr2 = [] + sysa2 = [] + for type_i in range(ntypes): + end_index = start_index + natoms[0, 2 + type_i] + dd = matrix[:, start_index:end_index] # all descriptors for this element + start_index = end_index + dd = np.reshape( + dd, [-1, 4] + ) # Shape is [nframes*natoms[2+type_id]*self.nnei, 4] + ddr = dd[:, :1] + dda = dd[:, 1:] + sumr = np.sum(ddr) + suma = np.sum(dda) / 3.0 + sumn = dd.shape[0] # Value is nframes*natoms[2+type_id]*self.nnei + sumr2 = np.sum(np.multiply(ddr, ddr)) + suma2 = np.sum(np.multiply(dda, dda)) / 3.0 + sysr.append(sumr) + sysa.append(suma) + sysn.append(sumn) + sysr2.append(sumr2) + sysa2.append(suma2) + return sysr, sysr2, sysa, sysa2, sysn diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py new file mode 100644 index 0000000000..051c66385c --- /dev/null +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -0,0 +1,412 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.pt.model.descriptor.descriptor import ( + DescriptorBlock, +) +from deepmd.pt.model.descriptor.env_mat import ( + prod_env_mat, +) +from deepmd.pt.model.network.network import ( + NeighborWiseAttention, + TypeFilter, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env_mat_stat import ( + EnvMatStatSe, +) +from deepmd.pt.utils.exclude_mask import ( + PairExcludeMask, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) + + +@DescriptorBlock.register("se_atten") +class DescrptBlockSeAtten(DescriptorBlock): + def __init__( + self, + rcut, + rcut_smth, + sel, + ntypes: int, + neuron: list = [25, 50, 100], + axis_neuron: int = 16, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + # set_davg_zero: bool = False, + set_davg_zero: bool = True, # TODO + attn: int = 128, + attn_layer: int = 2, + attn_dotr: bool = True, + attn_mask: bool = False, + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + activation_function="tanh", + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + return_rot=False, + exclude_types: List[Tuple[int, int]] = [], + env_protection: float = 0.0, + type: Optional[str] = None, + ): + """Construct an embedding net of type `se_atten`. + + Args: + - rcut: Cut-off radius. + - rcut_smth: Smooth hyper-parameter for pair force & energy. + - sel: For each element type, how many atoms is selected as neighbors. + - filter_neuron: Number of neurons in each hidden layers of the embedding net. + - axis_neuron: Number of columns of the sub-matrix of the embedding matrix. + """ + super().__init__() + del type + self.rcut = rcut + self.rcut_smth = rcut_smth + self.filter_neuron = neuron + self.axis_neuron = axis_neuron + self.tebd_dim = tebd_dim + self.tebd_input_mode = tebd_input_mode + self.set_davg_zero = set_davg_zero + self.attn_dim = attn + self.attn_layer = attn_layer + self.attn_dotr = attn_dotr + self.attn_mask = attn_mask + self.post_ln = post_ln + self.ffn = ffn + self.ffn_embed_dim = ffn_embed_dim + self.activation = activation_function + # TODO: To be fixed: precision should be given from inputs + self.prec = torch.float64 + self.scaling_factor = scaling_factor + self.head_num = head_num + self.normalize = normalize + self.temperature = temperature + self.return_rot = return_rot + self.env_protection = env_protection + + if isinstance(sel, int): + sel = [sel] + + self.ntypes = ntypes + self.sel = sel + self.sec = self.sel + self.split_sel = self.sel + self.nnei = sum(sel) + self.ndescrpt = self.nnei * 4 + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + self.dpa1_attention = NeighborWiseAttention( + self.attn_layer, + self.nnei, + self.filter_neuron[-1], + self.attn_dim, + dotr=self.attn_dotr, + do_mask=self.attn_mask, + post_ln=self.post_ln, + ffn=self.ffn, + ffn_embed_dim=self.ffn_embed_dim, + activation=self.activation, + scaling_factor=self.scaling_factor, + head_num=self.head_num, + normalize=self.normalize, + temperature=self.temperature, + ) + + wanted_shape = (self.ntypes, self.nnei, 4) + mean = torch.zeros( + wanted_shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + stddev = torch.ones( + wanted_shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + self.register_buffer("mean", mean) + self.register_buffer("stddev", stddev) + + filter_layers = [] + one = TypeFilter( + 0, + self.nnei, + self.filter_neuron, + return_G=True, + tebd_dim=self.tebd_dim, + use_tebd=True, + tebd_mode=self.tebd_input_mode, + ) + filter_layers.append(one) + self.filter_layers = torch.nn.ModuleList(filter_layers) + self.stats = None + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_in(self) -> int: + """Returns the output dimension.""" + return self.dim_in + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_emb(self) -> int: + """Returns the output dimension of embedding.""" + return self.filter_neuron[-1] + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.filter_neuron[-1] * self.axis_neuron + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return self.tebd_dim + + @property + def dim_emb(self): + """Returns the output dimension of embedding.""" + return self.get_dim_emb() + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + env_mat_stat = EnvMatStatSe(self) + if path is not None: + path = path / env_mat_stat.get_hash() + if path is None or not path.is_dir(): + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + else: + sampled = [] + env_mat_stat.load_or_compute_stats(sampled, path) + self.stats = env_mat_stat.stats + mean, stddev = env_mat_stat() + if not self.set_davg_zero: + self.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + + def get_stats(self) -> Dict[str, StatItem]: + """Get the statistics of the descriptor.""" + if self.stats is None: + raise RuntimeError( + "The statistics of the descriptor has not been computed." + ) + return self.stats + + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def forward( + self, + nlist: torch.Tensor, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + extended_atype_embd: Optional[torch.Tensor] = None, + mapping: Optional[torch.Tensor] = None, + ) -> List[torch.Tensor]: + """Calculate decoded embedding for each atom. + + Args: + - coord: Tell atom coordinates with shape [nframes, natoms[1]*3]. + - atype: Tell atom types with shape [nframes, natoms[1]]. + - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. + - box: Tell simulation box with shape [nframes, 9]. + + Returns + ------- + - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron]. + - ret: environment matrix with shape [nframes, nloc, self.neei, out_size] + """ + del mapping + assert extended_atype_embd is not None + nframes, nloc, nnei = nlist.shape + atype = extended_atype[:, :nloc] + nb = nframes + nall = extended_coord.view(nb, -1, 3).shape[1] + dmatrix, diff, sw = prod_env_mat( + extended_coord, + nlist, + atype, + self.mean, + self.stddev, + self.rcut, + self.rcut_smth, + protection=self.env_protection, + ) + # [nfxnlocxnnei, self.ndescrpt] + dmatrix = dmatrix.view(-1, self.ndescrpt) + nlist_mask = nlist != -1 + nlist[nlist == -1] = 0 + sw = torch.squeeze(sw, -1) + # beyond the cutoff sw should be 0.0 + sw = sw.masked_fill(~nlist_mask, 0.0) + # nf x nloc x nt -> nf x nloc x nnei x nt + atype_tebd = extended_atype_embd[:, :nloc, :] + atype_tebd_nnei = atype_tebd.unsqueeze(2).expand(-1, -1, self.nnei, -1) + # nf x nall x nt + nt = extended_atype_embd.shape[-1] + atype_tebd_ext = extended_atype_embd + # nb x (nloc x nnei) x nt + index = nlist.reshape(nb, nloc * nnei).unsqueeze(-1).expand(-1, -1, nt) + # nb x (nloc x nnei) x nt + atype_tebd_nlist = torch.gather(atype_tebd_ext, dim=1, index=index) + # nb x nloc x nnei x nt + atype_tebd_nlist = atype_tebd_nlist.view(nb, nloc, nnei, nt) + ret = self.filter_layers[0]( + dmatrix, + atype_tebd=atype_tebd_nnei, + nlist_tebd=atype_tebd_nlist, + ) # shape is [nframes*nall, self.neei, out_size] + input_r = torch.nn.functional.normalize( + dmatrix.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1 + ) + ret = self.dpa1_attention( + ret, nlist_mask, input_r=input_r, sw=sw + ) # shape is [nframes*nloc, self.neei, out_size] + inputs_reshape = dmatrix.view(-1, self.nnei, 4).permute( + 0, 2, 1 + ) # shape is [nframes*natoms[0], 4, self.neei] + xyz_scatter = torch.matmul( + inputs_reshape, ret + ) # shape is [nframes*natoms[0], 4, out_size] + xyz_scatter = xyz_scatter / self.nnei + xyz_scatter_1 = xyz_scatter.permute(0, 2, 1) + rot_mat = xyz_scatter_1[:, :, 1:4] + xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron] + result = torch.matmul( + xyz_scatter_1, xyz_scatter_2 + ) # shape is [nframes*nloc, self.filter_neuron[-1], self.axis_neuron] + return ( + result.view(-1, nloc, self.filter_neuron[-1] * self.axis_neuron), + ret.view(-1, nloc, self.nnei, self.filter_neuron[-1]), + dmatrix.view(-1, nloc, self.nnei, 4)[..., 1:], + rot_mat.view(-1, nloc, self.filter_neuron[-1], 3), + sw, + ) + + +def analyze_descrpt(matrix, ndescrpt, natoms, mixed_types=False, real_atype=None): + """Collect avg, square avg and count of descriptors in a batch.""" + ntypes = natoms.shape[1] - 2 + if not mixed_types: + sysr = [] + sysa = [] + sysn = [] + sysr2 = [] + sysa2 = [] + start_index = 0 + for type_i in range(ntypes): + end_index = start_index + natoms[0, 2 + type_i] + dd = matrix[:, start_index:end_index] + start_index = end_index + dd = np.reshape( + dd, [-1, 4] + ) # Shape is [nframes*natoms[2+type_id]*self.nnei, 4] + ddr = dd[:, :1] + dda = dd[:, 1:] + sumr = np.sum(ddr) + suma = np.sum(dda) / 3.0 + sumn = dd.shape[0] # Value is nframes*natoms[2+type_id]*self.nnei + sumr2 = np.sum(np.multiply(ddr, ddr)) + suma2 = np.sum(np.multiply(dda, dda)) / 3.0 + sysr.append(sumr) + sysa.append(suma) + sysn.append(sumn) + sysr2.append(sumr2) + sysa2.append(suma2) + else: + sysr = [0.0 for i in range(ntypes)] + sysa = [0.0 for i in range(ntypes)] + sysn = [0 for i in range(ntypes)] + sysr2 = [0.0 for i in range(ntypes)] + sysa2 = [0.0 for i in range(ntypes)] + for frame_item in range(matrix.shape[0]): + dd_ff = matrix[frame_item] + atype_frame = real_atype[frame_item] + for type_i in range(ntypes): + type_idx = atype_frame == type_i + dd = dd_ff[type_idx] + dd = np.reshape(dd, [-1, 4]) # typen_atoms * nnei, 4 + ddr = dd[:, :1] + dda = dd[:, 1:] + sumr = np.sum(ddr) + suma = np.sum(dda) / 3.0 + sumn = dd.shape[0] + sumr2 = np.sum(np.multiply(ddr, ddr)) + suma2 = np.sum(np.multiply(dda, dda)) / 3.0 + sysr[type_i] += sumr + sysa[type_i] += suma + sysn[type_i] += sumn + sysr2[type_i] += sumr2 + sysa2[type_i] += suma2 + + return sysr, sysr2, sysa, sysa2, sysn diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py new file mode 100644 index 0000000000..ff922e0649 --- /dev/null +++ b/deepmd/pt/model/descriptor/se_r.py @@ -0,0 +1,416 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.pt.model.descriptor import ( + prod_env_mat, +) +from deepmd.pt.model.network.mlp import ( + EmbeddingNet, + NetworkCollection, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + PRECISION_DICT, + RESERVED_PRECISON_DICT, +) +from deepmd.pt.utils.env_mat_stat import ( + EnvMatStatSe, +) +from deepmd.pt.utils.exclude_mask import ( + PairExcludeMask, +) +from deepmd.pt.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_descriptor import ( + BaseDescriptor, +) + + +@BaseDescriptor.register("se_e2_r") +@BaseDescriptor.register("se_r") +class DescrptSeR(BaseDescriptor, torch.nn.Module): + def __init__( + self, + rcut, + rcut_smth, + sel, + neuron=[25, 50, 100], + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = "float64", + resnet_dt: bool = False, + exclude_types: List[Tuple[int, int]] = [], + env_protection: float = 0.0, + old_impl: bool = False, + trainable: bool = True, + **kwargs, + ): + super().__init__() + self.rcut = rcut + self.rcut_smth = rcut_smth + self.neuron = neuron + self.filter_neuron = self.neuron + self.set_davg_zero = set_davg_zero + self.activation_function = activation_function + self.precision = precision + self.prec = PRECISION_DICT[self.precision] + self.resnet_dt = resnet_dt + self.old_impl = False # this does not support old implementation. + self.exclude_types = exclude_types + self.ntypes = len(sel) + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + self.env_protection = env_protection + + self.sel = sel + self.sec = torch.tensor( + np.append([0], np.cumsum(self.sel)), dtype=int, device=env.DEVICE + ) + self.split_sel = self.sel + self.nnei = sum(sel) + self.ndescrpt = self.nnei * 1 + + wanted_shape = (self.ntypes, self.nnei, 1) + mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE) + stddev = torch.ones(wanted_shape, dtype=self.prec, device=env.DEVICE) + self.register_buffer("mean", mean) + self.register_buffer("stddev", stddev) + self.filter_layers_old = None + self.filter_layers = None + + filter_layers = NetworkCollection( + ndim=1, ntypes=len(sel), network_type="embedding_network" + ) + # TODO: ndim=2 if type_one_side=False + for ii in range(self.ntypes): + filter_layers[(ii,)] = EmbeddingNet( + 1, + self.filter_neuron, + activation_function=self.activation_function, + precision=self.precision, + resnet_dt=self.resnet_dt, + ) + self.filter_layers = filter_layers + self.stats = None + # set trainable + for param in self.parameters(): + param.requires_grad = trainable + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.neuron[-1] + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + raise NotImplementedError + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return 0 + + def mixed_types(self) -> bool: + """If true, the discriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the discriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return False + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only descriptors of the same type can share params!" + # For SeR descriptors, the user-defined share-level + # shared_level: 0 + if shared_level == 0: + # link buffers + if hasattr(self, "mean") and not resume: + # in case of change params during resume + base_env = EnvMatStatSe(base_class) + base_env.stats = base_class.stats + for kk in base_class.get_stats(): + base_env.stats[kk] += self.get_stats()[kk] + mean, stddev = base_env() + if not base_class.set_davg_zero: + base_class.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + base_class.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + self.mean = base_class.mean + self.stddev = base_class.stddev + # self.load_state_dict(base_class.state_dict()) # this does not work, because it only inits the model + # the following will successfully link all the params except buffers + for item in self._modules: + self._modules[item] = base_class._modules[item] + # Other shared levels + else: + raise NotImplementedError + + def compute_input_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + path: Optional[DPPath] = None, + ): + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + env_mat_stat = EnvMatStatSe(self) + if path is not None: + path = path / env_mat_stat.get_hash() + if path is None or not path.is_dir(): + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + else: + sampled = [] + env_mat_stat.load_or_compute_stats(sampled, path) + self.stats = env_mat_stat.stats + mean, stddev = env_mat_stat() + if not self.set_davg_zero: + self.mean.copy_(torch.tensor(mean, device=env.DEVICE)) + self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE)) + + def get_stats(self) -> Dict[str, StatItem]: + """Get the statistics of the descriptor.""" + if self.stats is None: + raise RuntimeError( + "The statistics of the descriptor has not been computed." + ) + return self.stats + + def __setitem__(self, key, value): + if key in ("avg", "data_avg", "davg"): + self.mean = value + elif key in ("std", "data_std", "dstd"): + self.stddev = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.mean + elif key in ("std", "data_std", "dstd"): + return self.stddev + else: + raise KeyError(key) + + def reinit_exclude( + self, + exclude_types: List[Tuple[int, int]] = [], + ): + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def forward( + self, + coord_ext: torch.Tensor, + atype_ext: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. + + """ + del mapping + nloc = nlist.shape[1] + atype = atype_ext[:, :nloc] + dmatrix, diff, sw = prod_env_mat( + coord_ext, + nlist, + atype, + self.mean, + self.stddev, + self.rcut, + self.rcut_smth, + True, + protection=self.env_protection, + ) + + assert self.filter_layers is not None + dmatrix = dmatrix.view(-1, self.nnei, 1) + dmatrix = dmatrix.to(dtype=self.prec) + nfnl = dmatrix.shape[0] + # pre-allocate a shape to pass jit + xyz_scatter = torch.zeros( + [nfnl, 1, self.filter_neuron[-1]], dtype=self.prec, device=coord_ext.device + ) + + # nfnl x nnei + exclude_mask = self.emask(nlist, atype_ext).view(nfnl, -1) + for ii, ll in enumerate(self.filter_layers.networks): + # nfnl x nt + mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]] + # nfnl x nt x 1 + ss = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :] + ss = ss * mm[:, :, None] + # nfnl x nt x ng + gg = ll.forward(ss) + gg = torch.mean(gg, dim=1).unsqueeze(1) + xyz_scatter += gg * (self.sel[ii] / self.nnei) + + res_rescale = 1.0 / 5.0 + result = xyz_scatter * res_rescale + result = result.view(-1, nloc, self.filter_neuron[-1]) + return ( + result.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION), + None, + None, + None, + sw, + ) + + def set_stat_mean_and_stddev( + self, + mean: torch.Tensor, + stddev: torch.Tensor, + ) -> None: + self.mean = mean + self.stddev = stddev + + def serialize(self) -> dict: + return { + "@class": "Descriptor", + "type": "se_r", + "@version": 1, + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "sel": self.sel, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "set_davg_zero": self.set_davg_zero, + "activation_function": self.activation_function, + # make deterministic + "precision": RESERVED_PRECISON_DICT[self.prec], + "embeddings": self.filter_layers.serialize(), + "env_mat": DPEnvMat(self.rcut, self.rcut_smth).serialize(), + "exclude_types": self.exclude_types, + "env_protection": self.env_protection, + "@variables": { + "davg": self["davg"].detach().cpu().numpy(), + "dstd": self["dstd"].detach().cpu().numpy(), + }, + ## to be updated when the options are supported. + "trainable": True, + "type_one_side": True, + "spin": None, + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptSeR": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + env_mat = data.pop("env_mat") + obj = cls(**data) + + def t_cvt(xx): + return torch.tensor(xx, dtype=obj.prec, device=env.DEVICE) + + obj["davg"] = t_cvt(variables["davg"]) + obj["dstd"] = t_cvt(variables["dstd"]) + obj.filter_layers = NetworkCollection.deserialize(embeddings) + return obj + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py new file mode 100644 index 0000000000..1675215d7b --- /dev/null +++ b/deepmd/pt/model/model/__init__.py @@ -0,0 +1,194 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""The model that takes the coordinates, cell and atom types as input +and predicts some property. The models are automatically generated from +atomic models by the `deepmd.dpmodel.make_model` method. + +The `make_model` method does the reduction, auto-differentiation and +communication of the atomic properties according to output variable +definition `deepmd.dpmodel.OutputVariableDef`. + +All models should be inherited from :class:`deepmd.pt.model.model.model.BaseModel`. +Models generated by `make_model` have already done it. +""" + +import copy +import json + +import numpy as np + +from deepmd.pt.model.atomic_model import ( + DPAtomicModel, + PairTabAtomicModel, +) +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.task import ( + BaseFitting, +) +from deepmd.utils.spin import ( + Spin, +) + +from .dp_model import ( + DPModel, +) +from .dp_zbl_model import ( + DPZBLModel, +) +from .ener_model import ( + EnergyModel, +) +from .frozen import ( + FrozenModel, +) +from .make_hessian_model import ( + make_hessian_model, +) +from .make_model import ( + make_model, +) +from .model import ( + BaseModel, +) +from .spin_model import ( + SpinEnergyModel, + SpinModel, +) + + +def get_spin_model(model_params): + model_params = copy.deepcopy(model_params) + if not model_params["spin"]["use_spin"] or isinstance( + model_params["spin"]["use_spin"][0], int + ): + use_spin = np.full(len(model_params["type_map"]), False) + use_spin[model_params["spin"]["use_spin"]] = True + model_params["spin"]["use_spin"] = use_spin.tolist() + # include virtual spin and placeholder types + model_params["type_map"] += [item + "_spin" for item in model_params["type_map"]] + spin = Spin( + use_spin=model_params["spin"]["use_spin"], + virtual_scale=model_params["spin"]["virtual_scale"], + ) + pair_exclude_types = spin.get_pair_exclude_types( + exclude_types=model_params.get("pair_exclude_types", None) + ) + model_params["pair_exclude_types"] = pair_exclude_types + # for descriptor data stat + model_params["descriptor"]["exclude_types"] = pair_exclude_types + atom_exclude_types = spin.get_atom_exclude_types( + exclude_types=model_params.get("atom_exclude_types", None) + ) + model_params["atom_exclude_types"] = atom_exclude_types + if ( + "env_protection" not in model_params["descriptor"] + or model_params["descriptor"]["env_protection"] == 0.0 + ): + model_params["descriptor"]["env_protection"] = 1e-6 + if model_params["descriptor"]["type"] in ["se_e2_a"]: + # only expand sel for se_e2_a + model_params["descriptor"]["sel"] += model_params["descriptor"]["sel"] + backbone_model = get_standard_model(model_params) + return SpinEnergyModel(backbone_model=backbone_model, spin=spin) + + +def get_zbl_model(model_params): + model_params = copy.deepcopy(model_params) + ntypes = len(model_params["type_map"]) + # descriptor + model_params["descriptor"]["ntypes"] = ntypes + descriptor = BaseDescriptor(**model_params["descriptor"]) + # fitting + fitting_net = model_params.get("fitting_net", None) + fitting_net["type"] = fitting_net.get("type", "ener") + fitting_net["ntypes"] = descriptor.get_ntypes() + fitting_net["mixed_types"] = descriptor.mixed_types() + fitting_net["embedding_width"] = descriptor.get_dim_out() + fitting_net["dim_descrpt"] = descriptor.get_dim_out() + grad_force = "direct" not in fitting_net["type"] + if not grad_force: + fitting_net["out_dim"] = descriptor.get_dim_emb() + if "ener" in fitting_net["type"]: + fitting_net["return_energy"] = True + fitting = BaseFitting(**fitting_net) + dp_model = DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"]) + # pairtab + filepath = model_params["use_srtab"] + pt_model = PairTabAtomicModel( + filepath, + model_params["descriptor"]["rcut"], + model_params["descriptor"]["sel"], + type_map=model_params["type_map"], + ) + + rmin = model_params["sw_rmin"] + rmax = model_params["sw_rmax"] + atom_exclude_types = model_params.get("atom_exclude_types", []) + pair_exclude_types = model_params.get("pair_exclude_types", []) + return DPZBLModel( + dp_model, + pt_model, + rmin, + rmax, + type_map=model_params["type_map"], + atom_exclude_types=atom_exclude_types, + pair_exclude_types=pair_exclude_types, + ) + + +def get_standard_model(model_params): + model_params = copy.deepcopy(model_params) + ntypes = len(model_params["type_map"]) + # descriptor + model_params["descriptor"]["ntypes"] = ntypes + descriptor = BaseDescriptor(**model_params["descriptor"]) + # fitting + fitting_net = model_params.get("fitting_net", None) + fitting_net["type"] = fitting_net.get("type", "ener") + fitting_net["ntypes"] = descriptor.get_ntypes() + fitting_net["mixed_types"] = descriptor.mixed_types() + if fitting_net["type"] in ["dipole", "polar"]: + fitting_net["embedding_width"] = descriptor.get_dim_emb() + fitting_net["dim_descrpt"] = descriptor.get_dim_out() + grad_force = "direct" not in fitting_net["type"] + if not grad_force: + fitting_net["out_dim"] = descriptor.get_dim_emb() + if "ener" in fitting_net["type"]: + fitting_net["return_energy"] = True + fitting = BaseFitting(**fitting_net) + atom_exclude_types = model_params.get("atom_exclude_types", []) + pair_exclude_types = model_params.get("pair_exclude_types", []) + + model = DPModel( + descriptor=descriptor, + fitting=fitting, + type_map=model_params["type_map"], + atom_exclude_types=atom_exclude_types, + pair_exclude_types=pair_exclude_types, + ) + model.model_def_script = json.dumps(model_params) + return model + + +def get_model(model_params): + if "spin" in model_params: + return get_spin_model(model_params) + elif "use_srtab" in model_params: + return get_zbl_model(model_params) + else: + return get_standard_model(model_params) + + +__all__ = [ + "BaseModel", + "get_model", + "DPModel", + "EnergyModel", + "FrozenModel", + "SpinModel", + "SpinEnergyModel", + "DPZBLModel", + "make_model", + "make_hessian_model", +] diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py new file mode 100644 index 0000000000..45b120771b --- /dev/null +++ b/deepmd/pt/model/model/dipole_model.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from .dp_model import ( + DPModel, +) + + +class DipoleModel(DPModel): + model_type = "dipole" + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + model_ret = self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["dipole"] = model_ret["dipole"] + model_predict["global_dipole"] = model_ret["dipole_redu"] + if self.do_grad_r("dipole"): + model_predict["force"] = model_ret["dipole_derv_r"].squeeze(-2) + if self.do_grad_c("dipole"): + model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["atom_virial"] = model_ret["dipole_derv_c"].squeeze( + -3 + ) + if "mask" in model_ret: + model_predict["mask"] = model_ret["mask"] + else: + model_predict = model_ret + model_predict["updated_coord"] += coord + return model_predict + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["dipole"] = model_ret["dipole"] + model_predict["global_dipole"] = model_ret["dipole_redu"] + if self.do_grad_r("dipole"): + model_predict["force"] = model_ret["dipole_derv_r"].squeeze(-2) + if self.do_grad_c("dipole"): + model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["atom_virial"] = model_ret["dipole_derv_c"].squeeze( + -3 + ) + else: + model_predict = model_ret + return model_predict diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py new file mode 100644 index 0000000000..e043700bee --- /dev/null +++ b/deepmd/pt/model/model/dos_model.py @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from .dp_model import ( + DPModel, +) + + +class DOSModel(DPModel): + model_type = "dos" + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + model_ret = self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["atom_dos"] = model_ret["dos"] + model_predict["dos"] = model_ret["dos_redu"] + + if "mask" in model_ret: + model_predict["mask"] = model_ret["mask"] + else: + model_predict = model_ret + model_predict["updated_coord"] += coord + return model_predict + + @torch.jit.export + def get_numb_dos(self) -> int: + """Get the number of DOS for DOSFittingNet.""" + return self.get_fitting_net().dim_out + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["atom_dos"] = model_ret["dos"] + model_predict["dos"] = model_ret["dos_redu"] + + else: + model_predict = model_ret + return model_predict diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py new file mode 100644 index 0000000000..d7b3c4f4e2 --- /dev/null +++ b/deepmd/pt/model/model/dp_model.py @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from deepmd.pt.model.atomic_model import ( + DPAtomicModel, +) +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.model.model import ( + BaseModel, +) +from deepmd.pt.model.task.dipole import ( + DipoleFittingNet, +) +from deepmd.pt.model.task.dos import ( + DOSFittingNet, +) +from deepmd.pt.model.task.ener import ( + EnergyFittingNet, + EnergyFittingNetDirect, +) +from deepmd.pt.model.task.polarizability import ( + PolarFittingNet, +) + +from .make_model import ( + make_model, +) + + +@BaseModel.register("standard") +class DPModel(make_model(DPAtomicModel)): + def __new__( + cls, + descriptor=None, + fitting=None, + *args, + # disallow positional atomic_model_ + atomic_model_: Optional[DPAtomicModel] = None, + **kwargs, + ): + from deepmd.pt.model.model.dipole_model import ( + DipoleModel, + ) + from deepmd.pt.model.model.dos_model import ( + DOSModel, + ) + from deepmd.pt.model.model.ener_model import ( + EnergyModel, + ) + from deepmd.pt.model.model.polar_model import ( + PolarModel, + ) + + if atomic_model_ is not None: + fitting = atomic_model_.fitting_net + else: + assert fitting is not None, "fitting network is not provided" + + # according to the fitting network to decide the type of the model + if cls is DPModel: + # map fitting to model + if isinstance(fitting, EnergyFittingNet) or isinstance( + fitting, EnergyFittingNetDirect + ): + cls = EnergyModel + elif isinstance(fitting, DipoleFittingNet): + cls = DipoleModel + elif isinstance(fitting, PolarFittingNet): + cls = PolarModel + elif isinstance(fitting, DOSFittingNet): + cls = DOSModel + # else: unknown fitting type, fall back to DPModel + return super().__new__(cls) + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["descriptor"] = BaseDescriptor.update_sel( + global_jdata, local_jdata["descriptor"] + ) + return local_jdata_cpy + + def get_fitting_net(self): + """Get the fitting network.""" + return self.atomic_model.fitting_net + + def get_descriptor(self): + """Get the descriptor.""" + return self.atomic_model.descriptor + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + # directly call the forward_common method when no specific transform rule + return self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py new file mode 100644 index 0000000000..bbc82b8d77 --- /dev/null +++ b/deepmd/pt/model/model/dp_zbl_model.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from deepmd.dpmodel.model.dp_model import ( + DPModel, +) +from deepmd.pt.model.atomic_model import ( + DPZBLLinearEnergyAtomicModel, +) +from deepmd.pt.model.model.model import ( + BaseModel, +) + +from .make_model import ( + make_model, +) + +DPZBLModel_ = make_model(DPZBLLinearEnergyAtomicModel) + + +@BaseModel.register("zbl") +class DPZBLModel(DPZBLModel_): + model_type = "ener" + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + model_ret = self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + if self.do_grad_r("energy"): + model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2) + if self.do_grad_c("energy"): + model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(-3) + else: + model_predict["force"] = model_ret["dforce"] + if "mask" in model_ret: + model_predict["mask"] = model_ret["mask"] + return model_predict + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + if self.do_grad_r("energy"): + model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2) + if self.do_grad_c("energy"): + model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["extended_virial"] = model_ret["energy_derv_c"].squeeze( + -3 + ) + else: + assert model_ret["dforce"] is not None + model_predict["dforce"] = model_ret["dforce"] + return model_predict + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["dpmodel"] = DPModel.update_sel( + global_jdata, local_jdata["dpmodel"] + ) + return local_jdata_cpy diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py new file mode 100644 index 0000000000..5217293623 --- /dev/null +++ b/deepmd/pt/model/model/ener_model.py @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from .dp_model import ( + DPModel, +) + + +class EnergyModel(DPModel): + model_type = "ener" + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + model_ret = self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + if self.do_grad_r("energy"): + model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2) + if self.do_grad_c("energy"): + model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze( + -3 + ) + else: + model_predict["force"] = model_ret["dforce"] + if "mask" in model_ret: + model_predict["mask"] = model_ret["mask"] + else: + model_predict = model_ret + model_predict["updated_coord"] += coord + return model_predict + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + if self.do_grad_r("energy"): + model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2) + if self.do_grad_c("energy"): + model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2) + if do_atomic_virial: + model_predict["extended_virial"] = model_ret[ + "energy_derv_c" + ].squeeze(-3) + else: + assert model_ret["dforce"] is not None + model_predict["dforce"] = model_ret["dforce"] + else: + model_predict = model_ret + return model_predict diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py new file mode 100644 index 0000000000..e3dcd389bb --- /dev/null +++ b/deepmd/pt/model/model/frozen.py @@ -0,0 +1,174 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import tempfile +from typing import ( + Dict, + List, + Optional, +) + +import torch + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.entrypoints.convert_backend import ( + convert_backend, +) +from deepmd.pt.model.model.model import ( + BaseModel, +) + + +@BaseModel.register("frozen") +class FrozenModel(BaseModel): + """Load model from a frozen model, which cannot be trained. + + Parameters + ---------- + model_file : str + The path to the frozen model + """ + + def __init__(self, model_file: str, **kwargs): + super().__init__(**kwargs) + self.model_file = model_file + if model_file.endswith(".pth"): + self.model = torch.jit.load(model_file) + else: + # try to convert from other formats + with tempfile.NamedTemporaryFile(suffix=".pth") as f: + convert_backend(INPUT=model_file, OUTPUT=f.name) + self.model = torch.jit.load(f.name) + + @torch.jit.export + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of developer implemented atomic models.""" + return self.model.fitting_output_def() + + @torch.jit.export + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.model.get_rcut() + + @torch.jit.export + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.model.get_type_map() + + @torch.jit.export + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.model.get_sel() + + @torch.jit.export + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.model.get_dim_fparam() + + @torch.jit.export + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.model.get_dim_aparam() + + @torch.jit.export + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.model.get_sel_type() + + @torch.jit.export + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return self.model.is_aparam_nall() + + @torch.jit.export + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.model.mixed_types() + + @torch.jit.export + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + return self.model.forward( + coord, + atype, + box=box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + + @torch.jit.export + def get_model_def_script(self) -> str: + """Get the model definition script.""" + # try to use the original script instead of "frozen model" + # Note: this cannot change the script of the parent model + # it may still try to load hard-coded filename, which might + # be a problem + return self.model.get_model_def_script() + + def serialize(self) -> dict: + from deepmd.pt.model.model import ( + get_model, + ) + + # try to recover the original model + model_def_script = json.loads(self.get_model_def_script()) + model = get_model(model_def_script) + model.load_state_dict(self.model.state_dict()) + return model.serialize() + + @classmethod + def deserialize(cls, data: dict): + raise RuntimeError("Should not touch here.") + + @torch.jit.export + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.model.get_nnei() + + @torch.jit.export + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.model.get_nsel() + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + return local_jdata + + @torch.jit.export + def model_output_type(self) -> str: + """Get the output type for the model.""" + return self.model.model_output_type() diff --git a/deepmd/pt/model/model/make_hessian_model.py b/deepmd/pt/model/model/make_hessian_model.py new file mode 100644 index 0000000000..9588348f53 --- /dev/null +++ b/deepmd/pt/model/model/make_hessian_model.py @@ -0,0 +1,216 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import math +from typing import ( + Dict, + List, + Optional, + Union, +) + +import torch + +from deepmd.dpmodel import ( + get_hessian_name, +) + + +def make_hessian_model(T_Model): + """Make a model that can compute Hessian. + + LIMITATION: this model is not jitable due to the restrictions of torch jit script. + + LIMITATION: only the hessian of `forward_common` is available. + + Parameters + ---------- + T_Model + The model. Should provide the `forward_common` and `atomic_output_def` methods + + Returns + ------- + The model computes hessian. + + """ + + class CM(T_Model): + def __init__( + self, + *args, + **kwargs, + ): + super().__init__( + *args, + **kwargs, + ) + self.hess_fitting_def = copy.deepcopy(super().atomic_output_def()) + + def requires_hessian( + self, + keys: Union[str, List[str]], + ): + """Set which output variable(s) requires hessian.""" + if isinstance(keys, str): + keys = [keys] + for kk in self.hess_fitting_def.keys(): + if kk in keys: + self.hess_fitting_def[kk].r_hessian = True + + def atomic_output_def(self): + """Get the fitting output def.""" + return self.hess_fitting_def + + def forward_common( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + """Return model prediction. + + Parameters + ---------- + coord + The coordinates of the atoms. + shape: nf x (nloc x 3) + atype + The type of atoms. shape: nf x nloc + box + The simulation box. shape: nf x 9 + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + If calculate the atomic virial. + + Returns + ------- + ret_dict + The result dict of type Dict[str,torch.Tensor]. + The keys are defined by the `ModelOutputDef`. + + """ + ret = super().forward_common( + coord, + atype, + box=box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + vdef = self.atomic_output_def() + hess_yes = [vdef[kk].r_hessian for kk in vdef.keys()] + if any(hess_yes): + hess = self._cal_hessian_all( + coord, + atype, + box=box, + fparam=fparam, + aparam=aparam, + ) + ret.update(hess) + return ret + + def _cal_hessian_all( + self, + coord: torch.Tensor, + atype: torch.Tensor, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Dict[str, torch.Tensor]: + nf, nloc = atype.shape + coord = coord.view([nf, (nloc * 3)]) + box = box.view([nf, 9]) if box is not None else None + fparam = fparam.view([nf, -1]) if fparam is not None else None + aparam = aparam.view([nf, nloc, -1]) if aparam is not None else None + fdef = self.atomic_output_def() + # keys of values that require hessian + hess_keys: List[str] = [] + for kk in fdef.keys(): + if fdef[kk].r_hessian: + hess_keys.append(kk) + # result dict init by empty lists + res = {get_hessian_name(kk): [] for kk in hess_keys} + # loop over variable + for kk in hess_keys: + vdef = fdef[kk] + vshape = vdef.shape + vsize = math.prod(vdef.shape) + # loop over frames + for ii in range(nf): + icoord = coord[ii] + iatype = atype[ii] + ibox = box[ii] if box is not None else None + ifparam = fparam[ii] if fparam is not None else None + iaparam = aparam[ii] if aparam is not None else None + # loop over all components + for idx in range(vsize): + hess = self._cal_hessian_one_component( + idx, icoord, iatype, ibox, ifparam, iaparam + ) + res[get_hessian_name(kk)].append(hess) + res[get_hessian_name(kk)] = torch.stack(res[get_hessian_name(kk)]).view( + (nf, *vshape, nloc * 3, nloc * 3) + ) + return res + + def _cal_hessian_one_component( + self, + ci, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # coord, # (nloc x 3) + # atype, # nloc + # box: Optional[torch.Tensor] = None, # 9 + # fparam: Optional[torch.Tensor] = None, # nfp + # aparam: Optional[torch.Tensor] = None, # (nloc x nap) + wc = wrapper_class_forward_energy(self, ci, atype, box, fparam, aparam) + + hess = torch.autograd.functional.hessian( + wc, + coord, + create_graph=False, + ) + return hess + + class wrapper_class_forward_energy: + def __init__( + self, + obj: CM, + ci: int, + atype: torch.Tensor, + box: Optional[torch.Tensor], + fparam: Optional[torch.Tensor], + aparam: Optional[torch.Tensor], + ): + self.atype, self.box, self.fparam, self.aparam = atype, box, fparam, aparam + self.ci = ci + self.obj = obj + + def __call__( + self, + xx, + ): + ci = self.ci + atype, box, fparam, aparam = self.atype, self.box, self.fparam, self.aparam + res = super(CM, self.obj).forward_common( + xx.unsqueeze(0), + atype.unsqueeze(0), + box.unsqueeze(0) if box is not None else None, + fparam.unsqueeze(0) if fparam is not None else None, + aparam.unsqueeze(0) if aparam is not None else None, + do_atomic_virial=False, + ) + er = res["energy_redu"][0].view([-1])[ci] + return er + + return CM diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py new file mode 100644 index 0000000000..0e89c05b79 --- /dev/null +++ b/deepmd/pt/model/model/make_model.py @@ -0,0 +1,535 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, + Tuple, + Type, +) + +import torch + +from deepmd.dpmodel import ( + ModelOutputDef, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableCategory, + OutputVariableOperation, + check_operation_applied, +) +from deepmd.pt.model.atomic_model.base_atomic_model import ( + BaseAtomicModel, +) +from deepmd.pt.model.model.model import ( + BaseModel, +) +from deepmd.pt.model.model.transform_output import ( + communicate_extended_output, + fit_output_to_model_output, +) +from deepmd.pt.utils.env import ( + GLOBAL_PT_ENER_FLOAT_PRECISION, + GLOBAL_PT_FLOAT_PRECISION, + PRECISION_DICT, + RESERVED_PRECISON_DICT, +) +from deepmd.pt.utils.nlist import ( + extend_input_and_build_neighbor_list, + nlist_distinguish_types, +) +from deepmd.utils.path import ( + DPPath, +) + + +def make_model(T_AtomicModel: Type[BaseAtomicModel]): + """Make a model as a derived class of an atomic model. + + The model provide two interfaces. + + 1. the `forward_common_lower`, that takes extended coordinates, atyps and neighbor list, + and outputs the atomic and property and derivatives (if required) on the extended region. + + 2. the `forward_common`, that takes coordinates, atypes and cell and predicts + the atomic and reduced property, and derivatives (if required) on the local region. + + Parameters + ---------- + T_AtomicModel + The atomic model. + + Returns + ------- + CM + The model. + + """ + + class CM(BaseModel): + def __init__( + self, + *args, + # underscore to prevent conflict with normal inputs + atomic_model_: Optional[T_AtomicModel] = None, + **kwargs, + ): + super().__init__(*args, **kwargs) + if atomic_model_ is not None: + self.atomic_model: T_AtomicModel = atomic_model_ + else: + self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs) + self.precision_dict = PRECISION_DICT + self.reverse_precision_dict = RESERVED_PRECISON_DICT + self.global_pt_float_precision = GLOBAL_PT_FLOAT_PRECISION + self.global_pt_ener_float_precision = GLOBAL_PT_ENER_FLOAT_PRECISION + + def model_output_def(self): + """Get the output def for the model.""" + return ModelOutputDef(self.atomic_output_def()) + + @torch.jit.export + def model_output_type(self) -> List[str]: + """Get the output type for the model.""" + output_def = self.model_output_def() + var_defs = output_def.var_defs + # jit: Comprehension ifs are not supported yet + # type hint is critical for JIT + vars: List[str] = [] + for kk, vv in var_defs.items(): + # .value is critical for JIT + if vv.category == OutputVariableCategory.OUT.value: + vars.append(kk) + return vars + + # cannot use the name forward. torch script does not work + def forward_common( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + """Return model prediction. + + Parameters + ---------- + coord + The coordinates of the atoms. + shape: nf x (nloc x 3) + atype + The type of atoms. shape: nf x nloc + box + The simulation box. shape: nf x 9 + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + If calculate the atomic virial. + + Returns + ------- + ret_dict + The result dict of type Dict[str,torch.Tensor]. + The keys are defined by the `ModelOutputDef`. + + """ + cc, bb, fp, ap, input_prec = self.input_type_cast( + coord, box=box, fparam=fparam, aparam=aparam + ) + del coord, box, fparam, aparam + ( + extended_coord, + extended_atype, + mapping, + nlist, + ) = extend_input_and_build_neighbor_list( + cc, + atype, + self.get_rcut(), + self.get_sel(), + mixed_types=self.mixed_types(), + box=bb, + ) + model_predict_lower = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping, + do_atomic_virial=do_atomic_virial, + fparam=fp, + aparam=ap, + ) + model_predict = communicate_extended_output( + model_predict_lower, + self.model_output_def(), + mapping, + do_atomic_virial=do_atomic_virial, + ) + model_predict = self.output_type_cast(model_predict, input_prec) + return model_predict + + def change_out_bias( + self, + merged, + origin_type_map, + full_type_map, + bias_adjust_mode="change-by-statistic", + ) -> None: + """Change the output bias of atomic model according to the input data and the pretrained model. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + origin_type_map : List[str] + The original type_map in dataset, they are targets to change the output bias. + full_type_map : List[str] + The full type_map in pre-trained model + bias_adjust_mode : str + The mode for changing output bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on labels of target dataset, + and do least square on the errors to obtain the target shift as bias. + 'set-by-statistic' : directly use the statistic output bias in the target dataset. + """ + self.atomic_model.change_out_bias( + merged, + origin_type_map, + full_type_map, + bias_adjust_mode=bias_adjust_mode, + ) + + def forward_common_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + """Return model prediction. Lower interface that takes + extended atomic coordinates and types, nlist, and mapping + as input, and returns the predictions on the extended region. + The predictions are not reduced. + + Parameters + ---------- + extended_coord + coodinates in extended region. nf x (nall x 3) + extended_atype + atomic type in extended region. nf x nall + nlist + neighbor list. nf x nloc x nsel. + mapping + mapps the extended indices to local indices. nf x nall. + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + do_atomic_virial + whether calculate atomic virial. + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nall = extended_atype.shape[:2] + extended_coord = extended_coord.view(nframes, -1, 3) + nlist = self.format_nlist(extended_coord, extended_atype, nlist) + cc_ext, _, fp, ap, input_prec = self.input_type_cast( + extended_coord, fparam=fparam, aparam=aparam + ) + del extended_coord, fparam, aparam + atomic_ret = self.atomic_model.forward_common_atomic( + cc_ext, + extended_atype, + nlist, + mapping=mapping, + fparam=fp, + aparam=ap, + ) + model_predict = fit_output_to_model_output( + atomic_ret, + self.atomic_output_def(), + cc_ext, + do_atomic_virial=do_atomic_virial, + ) + model_predict = self.output_type_cast(model_predict, input_prec) + return model_predict + + def input_type_cast( + self, + coord: torch.Tensor, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Tuple[ + torch.Tensor, + Optional[torch.Tensor], + Optional[torch.Tensor], + Optional[torch.Tensor], + str, + ]: + """Cast the input data to global float type.""" + input_prec = self.reverse_precision_dict[coord.dtype] + ### + ### type checking would not pass jit, convert to coord prec anyway + ### + # for vv, kk in zip([fparam, aparam], ["frame", "atomic"]): + # if vv is not None and self.reverse_precision_dict[vv.dtype] != input_prec: + # log.warning( + # f"type of {kk} parameter {self.reverse_precision_dict[vv.dtype]}" + # " does not match" + # f" that of the coordinate {input_prec}" + # ) + _lst: List[Optional[torch.Tensor]] = [ + vv.to(coord.dtype) if vv is not None else None + for vv in [box, fparam, aparam] + ] + box, fparam, aparam = _lst + if ( + input_prec + == self.reverse_precision_dict[self.global_pt_float_precision] + ): + return coord, box, fparam, aparam, input_prec + else: + pp = self.global_pt_float_precision + return ( + coord.to(pp), + box.to(pp) if box is not None else None, + fparam.to(pp) if fparam is not None else None, + aparam.to(pp) if aparam is not None else None, + input_prec, + ) + + def output_type_cast( + self, + model_ret: Dict[str, torch.Tensor], + input_prec: str, + ) -> Dict[str, torch.Tensor]: + """Convert the model output to the input prec.""" + do_cast = ( + input_prec + != self.reverse_precision_dict[self.global_pt_float_precision] + ) + pp = self.precision_dict[input_prec] + odef = self.model_output_def() + for kk in odef.keys(): + if kk not in model_ret.keys(): + # do not return energy_derv_c if not do_atomic_virial + continue + if check_operation_applied(odef[kk], OutputVariableOperation.REDU): + model_ret[kk] = ( + model_ret[kk].to(self.global_pt_ener_float_precision) + if model_ret[kk] is not None + else None + ) + elif do_cast: + model_ret[kk] = ( + model_ret[kk].to(pp) if model_ret[kk] is not None else None + ) + return model_ret + + def format_nlist( + self, + extended_coord: torch.Tensor, + extended_atype: torch.Tensor, + nlist: torch.Tensor, + ): + """Format the neighbor list. + + 1. If the number of neighbors in the `nlist` is equal to sum(self.sel), + it does nothong + + 2. If the number of neighbors in the `nlist` is smaller than sum(self.sel), + the `nlist` is pad with -1. + + 3. If the number of neighbors in the `nlist` is larger than sum(self.sel), + the nearest sum(sel) neighbors will be preseved. + + Known limitations: + + In the case of not self.mixed_types, the nlist is always formatted. + May have side effact on the efficiency. + + Parameters + ---------- + extended_coord + coodinates in extended region. nf x nall x 3 + extended_atype + atomic type in extended region. nf x nall + nlist + neighbor list. nf x nloc x nsel + + Returns + ------- + formated_nlist + the formated nlist. + + """ + mixed_types = self.mixed_types() + nlist = self._format_nlist(extended_coord, nlist, sum(self.get_sel())) + if not mixed_types: + nlist = nlist_distinguish_types(nlist, extended_atype, self.get_sel()) + return nlist + + def _format_nlist( + self, + extended_coord: torch.Tensor, + nlist: torch.Tensor, + nnei: int, + ): + n_nf, n_nloc, n_nnei = nlist.shape + # nf x nall x 3 + extended_coord = extended_coord.view([n_nf, -1, 3]) + rcut = self.get_rcut() + + if n_nnei < nnei: + nlist = torch.cat( + [ + nlist, + -1 + * torch.ones( + [n_nf, n_nloc, nnei - n_nnei], + dtype=nlist.dtype, + device=nlist.device, + ), + ], + dim=-1, + ) + elif n_nnei > nnei: + m_real_nei = nlist >= 0 + nlist = torch.where(m_real_nei, nlist, 0) + # nf x nloc x 3 + coord0 = extended_coord[:, :n_nloc, :] + # nf x (nloc x nnei) x 3 + index = nlist.view(n_nf, n_nloc * n_nnei, 1).expand(-1, -1, 3) + coord1 = torch.gather(extended_coord, 1, index) + # nf x nloc x nnei x 3 + coord1 = coord1.view(n_nf, n_nloc, n_nnei, 3) + # nf x nloc x nnei + rr = torch.linalg.norm(coord0[:, :, None, :] - coord1, dim=-1) + rr = torch.where(m_real_nei, rr, float("inf")) + rr, nlist_mapping = torch.sort(rr, dim=-1) + nlist = torch.gather(nlist, 2, nlist_mapping) + nlist = torch.where(rr > rcut, -1, nlist) + nlist = nlist[..., :nnei] + else: # n_nnei == nnei: + pass # great! + assert nlist.shape[-1] == nnei + return nlist + + def do_grad_r( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is r_differentiable. + if var_name is None, returns if any of the variable is r_differentiable. + """ + return self.atomic_model.do_grad_r(var_name) + + def do_grad_c( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is c_differentiable. + if var_name is None, returns if any of the variable is c_differentiable. + """ + return self.atomic_model.do_grad_c(var_name) + + def serialize(self) -> dict: + return self.atomic_model.serialize() + + @classmethod + def deserialize(cls, data) -> "CM": + return cls(atomic_model_=T_AtomicModel.deserialize(data)) + + @torch.jit.export + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.atomic_model.get_dim_fparam() + + @torch.jit.export + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.atomic_model.get_dim_aparam() + + @torch.jit.export + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.atomic_model.get_sel_type() + + @torch.jit.export + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return self.atomic_model.is_aparam_nall() + + @torch.jit.export + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.atomic_model.get_rcut() + + @torch.jit.export + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.atomic_model.get_type_map() + + @torch.jit.export + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.atomic_model.get_nsel() + + @torch.jit.export + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.atomic_model.get_nnei() + + def atomic_output_def(self) -> FittingOutputDef: + """Get the output def of the atomic model.""" + return self.atomic_model.atomic_output_def() + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """Compute or load the statistics.""" + return self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path) + + def get_sel(self) -> List[int]: + """Returns the number of selected atoms for each type.""" + return self.atomic_model.get_sel() + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.atomic_model.mixed_types() + + return CM diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py new file mode 100644 index 0000000000..bf97472e33 --- /dev/null +++ b/deepmd/pt/model/model/model.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, +) + +import torch + +from deepmd.dpmodel.model.base_model import ( + make_base_model, +) +from deepmd.utils.path import ( + DPPath, +) + + +class BaseModel(torch.nn.Module, make_base_model()): + def __init__(self, *args, **kwargs): + """Construct a basic model for different tasks.""" + torch.nn.Module.__init__(self) + self.model_def_script = "" + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute or load the statistics parameters of the model, + such as mean and standard deviation of descriptors or the energy bias of the fitting net. + When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update), + and saved in the `stat_file_path`(s). + When `sampled` is not provided, it will check the existence of `stat_file_path`(s) + and load the calculated statistics parameters. + + Parameters + ---------- + sampled_func + The sampled data frames from different data systems. + stat_file_path + The path to the statistics files. + """ + raise NotImplementedError + + @torch.jit.export + def get_model_def_script(self) -> str: + """Get the model definition script.""" + return self.model_def_script + + @torch.jit.export + def get_ntypes(self): + """Returns the number of element types.""" + return len(self.get_type_map()) diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py new file mode 100644 index 0000000000..403058aa47 --- /dev/null +++ b/deepmd/pt/model/model/polar_model.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + Optional, +) + +import torch + +from .dp_model import ( + DPModel, +) + + +class PolarModel(DPModel): + model_type = "polar" + + def __init__( + self, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + + def forward( + self, + coord, + atype, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + model_ret = self.forward_common( + coord, + atype, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["polar"] = model_ret["polar"] + model_predict["global_polar"] = model_ret["polar_redu"] + if "mask" in model_ret: + model_predict["mask"] = model_ret["mask"] + else: + model_predict = model_ret + model_predict["updated_coord"] += coord + return model_predict + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + nlist, + mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + if self.get_fitting_net() is not None: + model_predict = {} + model_predict["polar"] = model_ret["polar"] + model_predict["global_polar"] = model_ret["polar_redu"] + else: + model_predict = model_ret + return model_predict diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py new file mode 100644 index 0000000000..df2f48e2e4 --- /dev/null +++ b/deepmd/pt/model/model/spin_model.py @@ -0,0 +1,560 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +from typing import ( + Dict, + List, + Optional, +) + +import torch + +from deepmd.pt.utils.utils import ( + to_torch_tensor, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.spin import ( + Spin, +) + +from .dp_model import ( + DPModel, +) + + +class SpinModel(torch.nn.Module): + """A spin model wrapper, with spin input preprocess and output split.""" + + def __init__( + self, + backbone_model, + spin: Spin, + ): + super().__init__() + self.backbone_model = backbone_model + self.spin = spin + self.ntypes_real = self.spin.ntypes_real + self.virtual_scale_mask = to_torch_tensor(self.spin.get_virtual_scale_mask()) + self.spin_mask = to_torch_tensor(self.spin.get_spin_mask()) + + def process_spin_input(self, coord, atype, spin): + """Generate virtual coordinates and types, concat into the input.""" + nframes, nloc = coord.shape[:-1] + atype_spin = torch.concat([atype, atype + self.ntypes_real], dim=-1) + virtual_coord = coord + spin * self.virtual_scale_mask[atype].reshape( + [nframes, nloc, 1] + ) + coord_spin = torch.concat([coord, virtual_coord], dim=-2) + return coord_spin, atype_spin + + def process_spin_input_lower( + self, + extended_coord, + extended_atype, + extended_spin, + nlist, + mapping: Optional[torch.Tensor] = None, + ): + """ + Add `extended_spin` into `extended_coord` to generate virtual atoms, and extend `nlist` and `mapping`. + Note that the final `extended_coord_updated` with shape [nframes, nall + nall, 3] has the following order: + - [:, :nloc]: original nloc real atoms. + - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms. + - [:, nloc + nloc: nloc + nall]: ghost real atoms. + - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms. + """ + nframes, nall = extended_coord.shape[:2] + nloc = nlist.shape[1] + virtual_extended_coord = ( + extended_coord + + extended_spin + * self.virtual_scale_mask[extended_atype].reshape([nframes, nall, 1]) + ) + virtual_extended_atype = extended_atype + self.ntypes_real + extended_coord_updated = self.concat_switch_virtual( + extended_coord, virtual_extended_coord, nloc + ) + extended_atype_updated = self.concat_switch_virtual( + extended_atype, virtual_extended_atype, nloc + ) + if mapping is not None: + virtual_mapping = mapping + nloc + mapping_updated = self.concat_switch_virtual(mapping, virtual_mapping, nloc) + else: + mapping_updated = None + # extend the nlist + nlist_updated = self.extend_nlist(extended_atype, nlist) + return ( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping_updated, + ) + + def process_spin_output( + self, atype, out_tensor, add_mag: bool = True, virtual_scale: bool = True + ): + """ + Split the output both real and virtual atoms, and scale the latter. + add_mag: whether to add magnetic tensor onto the real tensor. + Default: True. e.g. Ture for forces and False for atomic virials on real atoms. + virtual_scale: whether to scale the magnetic tensor with virtual scale factor. + Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms. + """ + nframes, nloc_double = out_tensor.shape[:2] + nloc = nloc_double // 2 + if virtual_scale: + virtual_scale_mask = self.virtual_scale_mask + else: + virtual_scale_mask = self.spin_mask + atomic_mask = virtual_scale_mask[atype].reshape([nframes, nloc, 1]) + out_real, out_mag = torch.split(out_tensor, [nloc, nloc], dim=1) + if add_mag: + out_real = out_real + out_mag + out_mag = (out_mag.view([nframes, nloc, -1]) * atomic_mask).view(out_mag.shape) + return out_real, out_mag, atomic_mask > 0.0 + + def process_spin_output_lower( + self, + extended_atype, + extended_out_tensor, + nloc: int, + add_mag: bool = True, + virtual_scale: bool = True, + ): + """ + Split the extended output of both real and virtual atoms with switch, and scale the latter. + add_mag: whether to add magnetic tensor onto the real tensor. + Default: True. e.g. Ture for forces and False for atomic virials on real atoms. + virtual_scale: whether to scale the magnetic tensor with virtual scale factor. + Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms. + """ + nframes, nall_double = extended_out_tensor.shape[:2] + nall = nall_double // 2 + if virtual_scale: + virtual_scale_mask = self.virtual_scale_mask + else: + virtual_scale_mask = self.spin_mask + atomic_mask = virtual_scale_mask[extended_atype].reshape([nframes, nall, 1]) + extended_out_real = torch.cat( + [ + extended_out_tensor[:, :nloc], + extended_out_tensor[:, nloc + nloc : nloc + nall], + ], + dim=1, + ) + extended_out_mag = torch.cat( + [ + extended_out_tensor[:, nloc : nloc + nloc], + extended_out_tensor[:, nloc + nall :], + ], + dim=1, + ) + if add_mag: + extended_out_real = extended_out_real + extended_out_mag + extended_out_mag = ( + extended_out_mag.view([nframes, nall, -1]) * atomic_mask + ).view(extended_out_mag.shape) + return extended_out_real, extended_out_mag, atomic_mask > 0.0 + + @staticmethod + def extend_nlist(extended_atype, nlist): + nframes, nloc, nnei = nlist.shape + nall = extended_atype.shape[1] + nlist_mask = nlist != -1 + nlist[nlist == -1] = 0 + nlist_shift = nlist + nall + nlist[~nlist_mask] = -1 + nlist_shift[~nlist_mask] = -1 + self_spin = torch.arange(0, nloc, dtype=nlist.dtype, device=nlist.device) + nall + self_spin = self_spin.view(1, -1, 1).expand(nframes, -1, -1) + # self spin + real neighbor + virtual neighbor + # nf x nloc x (1 + nnei + nnei) + extended_nlist = torch.cat([self_spin, nlist, nlist_shift], dim=-1) + # nf x (nloc + nloc) x (1 + nnei + nnei) + extended_nlist = torch.cat( + [extended_nlist, -1 * torch.ones_like(extended_nlist)], dim=-2 + ) + # update the index for switch + first_part_index = (nloc <= extended_nlist) & (extended_nlist < nall) + second_part_index = (nall <= extended_nlist) & (extended_nlist < (nall + nloc)) + extended_nlist[first_part_index] += nloc + extended_nlist[second_part_index] -= nall - nloc + return extended_nlist + + @staticmethod + def concat_switch_virtual(extended_tensor, extended_tensor_virtual, nloc: int): + """ + Concat real and virtual extended tensors, and switch all the local ones to the first nloc * 2 atoms. + - [:, :nloc]: original nloc real atoms. + - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms. + - [:, nloc + nloc: nloc + nall]: ghost real atoms. + - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms. + """ + nframes, nall = extended_tensor.shape[:2] + out_shape = list(extended_tensor.shape) + out_shape[1] *= 2 + extended_tensor_updated = torch.zeros( + out_shape, + dtype=extended_tensor.dtype, + device=extended_tensor.device, + ) + extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc] + extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[ + :, :nloc + ] + extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[ + :, nloc: + ] + extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:] + return extended_tensor_updated.view(out_shape) + + @staticmethod + def expand_aparam(aparam, nloc: int): + """Expand the atom parameters for virtual atoms if necessary.""" + nframes, natom, numb_aparam = aparam.shape[1:] + if natom == nloc: # good + pass + elif natom < nloc: # for spin with virtual atoms + aparam = torch.concat( + [ + aparam, + torch.zeros( + [nframes, nloc - natom, numb_aparam], + device=aparam.device, + dtype=aparam.dtype, + ), + ], + dim=1, + ) + else: + raise ValueError( + f"get an input aparam with {aparam.shape[1]} inputs, ", + f"which is larger than {nloc} atoms.", + ) + return aparam + + @torch.jit.export + def get_type_map(self) -> List[str]: + """Get the type map.""" + tmap = self.backbone_model.get_type_map() + ntypes = len(tmap) // 2 # ignore the virtual type + return tmap[:ntypes] + + @torch.jit.export + def get_rcut(self): + """Get the cut-off radius.""" + return self.backbone_model.get_rcut() + + @torch.jit.export + def get_dim_fparam(self): + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.backbone_model.get_dim_fparam() + + @torch.jit.export + def get_dim_aparam(self): + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.backbone_model.get_dim_aparam() + + @torch.jit.export + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.backbone_model.get_sel_type() + + @torch.jit.export + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + If False, the shape is (nframes, nloc, ndim). + """ + return self.backbone_model.is_aparam_nall() + + @torch.jit.export + def model_output_type(self) -> List[str]: + """Get the output type for the model.""" + return self.backbone_model.model_output_type() + + @torch.jit.export + def get_model_def_script(self) -> str: + """Get the model definition script.""" + return self.backbone_model.get_model_def_script() + + @torch.jit.export + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + # for C++ interface + if not self.backbone_model.mixed_types(): + return self.backbone_model.get_nnei() // 2 # ignore the virtual selected + else: + return self.backbone_model.get_nnei() + + @torch.jit.export + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + if not self.backbone_model.mixed_types(): + return self.backbone_model.get_nsel() // 2 # ignore the virtual selected + else: + return self.backbone_model.get_nsel() + + @torch.jit.export + def has_spin(self) -> bool: + """Returns whether it has spin input and output.""" + return True + + def __getattr__(self, name): + """Get attribute from the wrapped model.""" + if ( + name == "backbone_model" + ): # torch.nn.Module will exclude modules to self.__dict__["_modules"] + return self.__dict__["_modules"]["backbone_model"] + elif name in self.__dict__: + return self.__dict__[name] + else: + return getattr(self.backbone_model, name) + + def compute_or_load_stat( + self, + sampled_func, + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute or load the statistics parameters of the model, + such as mean and standard deviation of descriptors or the energy bias of the fitting net. + When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update), + and saved in the `stat_file_path`(s). + When `sampled` is not provided, it will check the existence of `stat_file_path`(s) + and load the calculated statistics parameters. + + Parameters + ---------- + sampled_func + The lazy sampled function to get data frames from different data systems. + stat_file_path + The dictionary of paths to the statistics files. + """ + + @functools.lru_cache + def spin_sampled_func(): + sampled = sampled_func() + spin_sampled = [] + for sys in sampled: + coord_updated, atype_updated = self.process_spin_input( + sys["coord"], sys["atype"], sys["spin"] + ) + tmp_dict = { + "coord": coord_updated, + "atype": atype_updated, + } + if "natoms" in sys: + natoms = sys["natoms"] + tmp_dict["natoms"] = torch.cat( + [2 * natoms[:, :2], natoms[:, 2:], natoms[:, 2:]], dim=-1 + ) + for item_key in sys.keys(): + if item_key not in ["coord", "atype", "spin", "natoms"]: + tmp_dict[item_key] = sys[item_key] + spin_sampled.append(tmp_dict) + return spin_sampled + + self.backbone_model.compute_or_load_stat(spin_sampled_func, stat_file_path) + + def forward_common( + self, + coord, + atype, + spin, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + nframes, nloc = coord.shape[:2] + coord_updated, atype_updated = self.process_spin_input(coord, atype, spin) + model_ret = self.backbone_model.forward_common( + coord_updated, + atype_updated, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_output_type = self.backbone_model.model_output_type() + if "mask" in model_output_type: + model_output_type.pop(model_output_type.index("mask")) + var_name = model_output_type[0] + model_ret[f"{var_name}"] = torch.split( + model_ret[f"{var_name}"], [nloc, nloc], dim=1 + )[0] + if self.backbone_model.do_grad_r(var_name): + ( + model_ret[f"{var_name}_derv_r"], + model_ret[f"{var_name}_derv_r_mag"], + model_ret["mask_mag"], + ) = self.process_spin_output(atype, model_ret[f"{var_name}_derv_r"]) + if self.backbone_model.do_grad_c(var_name) and do_atomic_virial: + ( + model_ret[f"{var_name}_derv_c"], + model_ret[f"{var_name}_derv_c_mag"], + model_ret["mask_mag"], + ) = self.process_spin_output( + atype, + model_ret[f"{var_name}_derv_c"], + add_mag=False, + virtual_scale=False, + ) + return model_ret + + def forward_common_lower( + self, + extended_coord, + extended_atype, + extended_spin, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + nframes, nloc = nlist.shape[:2] + ( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping_updated, + ) = self.process_spin_input_lower( + extended_coord, extended_atype, extended_spin, nlist, mapping=mapping + ) + model_ret = self.backbone_model.forward_common_lower( + extended_coord_updated, + extended_atype_updated, + nlist_updated, + mapping=mapping_updated, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_output_type = self.backbone_model.model_output_type() + if "mask" in model_output_type: + model_output_type.pop(model_output_type.index("mask")) + var_name = model_output_type[0] + model_ret[f"{var_name}"] = torch.split( + model_ret[f"{var_name}"], [nloc, nloc], dim=1 + )[0] + if self.backbone_model.do_grad_r(var_name): + ( + model_ret[f"{var_name}_derv_r"], + model_ret[f"{var_name}_derv_r_mag"], + model_ret["mask_mag"], + ) = self.process_spin_output_lower( + extended_atype, model_ret[f"{var_name}_derv_r"], nloc + ) + if self.backbone_model.do_grad_c(var_name) and do_atomic_virial: + ( + model_ret[f"{var_name}_derv_c"], + model_ret[f"{var_name}_derv_c_mag"], + model_ret["mask_mag"], + ) = self.process_spin_output_lower( + extended_atype, + model_ret[f"{var_name}_derv_c"], + nloc, + add_mag=False, + virtual_scale=False, + ) + return model_ret + + def serialize(self) -> dict: + return { + "backbone_model": self.backbone_model.serialize(), + "spin": self.spin.serialize(), + } + + @classmethod + def deserialize(cls, data) -> "SpinModel": + backbone_model_obj = DPModel.deserialize(data["backbone_model"]) + spin = Spin.deserialize(data["spin"]) + return cls( + backbone_model=backbone_model_obj, + spin=spin, + ) + + +class SpinEnergyModel(SpinModel): + """A spin model for energy.""" + + model_type = "ener" + + def __init__( + self, + backbone_model, + spin: Spin, + ): + super().__init__(backbone_model, spin) + + def forward( + self, + coord, + atype, + spin, + box: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ) -> Dict[str, torch.Tensor]: + if aparam is not None: + aparam = self.expand_aparam(aparam, coord.shape[1]) + model_ret = self.forward_common( + coord, + atype, + spin, + box, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + model_predict["mask_mag"] = model_ret["mask_mag"] + if self.backbone_model.do_grad_r("energy"): + model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2) + model_predict["force_mag"] = model_ret["energy_derv_r_mag"].squeeze(-2) + # not support virial by far + return model_predict + + @torch.jit.export + def forward_lower( + self, + extended_coord, + extended_atype, + extended_spin, + nlist, + mapping: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + do_atomic_virial: bool = False, + ): + model_ret = self.forward_common_lower( + extended_coord, + extended_atype, + extended_spin, + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + do_atomic_virial=do_atomic_virial, + ) + model_predict = {} + model_predict["atom_energy"] = model_ret["energy"] + model_predict["energy"] = model_ret["energy_redu"] + model_predict["mask_mag"] = model_ret["mask_mag"] + if self.backbone_model.do_grad_r("energy"): + model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2) + model_predict["extended_force_mag"] = model_ret[ + "energy_derv_r_mag" + ].squeeze(-2) + # not support virial by far + return model_predict diff --git a/deepmd/pt/model/model/transform_output.py b/deepmd/pt/model/model/transform_output.py new file mode 100644 index 0000000000..730e6b29d0 --- /dev/null +++ b/deepmd/pt/model/model/transform_output.py @@ -0,0 +1,249 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, + get_deriv_name, + get_reduce_name, +) +from deepmd.pt.utils import ( + env, +) + + +def atomic_virial_corr( + extended_coord: torch.Tensor, + atom_energy: torch.Tensor, +): + nall = extended_coord.shape[1] + nloc = atom_energy.shape[1] + coord, _ = torch.split(extended_coord, [nloc, nall - nloc], dim=1) + # no derivative with respect to the loc coord. + coord = coord.detach() + ce = coord * atom_energy + sumce0, sumce1, sumce2 = torch.split(torch.sum(ce, dim=1), [1, 1, 1], dim=-1) + faked_grad = torch.ones_like(sumce0) + lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad]) + extended_virial_corr0 = torch.autograd.grad( + [sumce0], [extended_coord], grad_outputs=lst, create_graph=True + )[0] + assert extended_virial_corr0 is not None + extended_virial_corr1 = torch.autograd.grad( + [sumce1], [extended_coord], grad_outputs=lst, create_graph=True + )[0] + assert extended_virial_corr1 is not None + extended_virial_corr2 = torch.autograd.grad( + [sumce2], [extended_coord], grad_outputs=lst, create_graph=True + )[0] + assert extended_virial_corr2 is not None + extended_virial_corr = torch.concat( + [ + extended_virial_corr0.unsqueeze(-1), + extended_virial_corr1.unsqueeze(-1), + extended_virial_corr2.unsqueeze(-1), + ], + dim=-1, + ) + return extended_virial_corr + + +def task_deriv_one( + atom_energy: torch.Tensor, + energy: torch.Tensor, + extended_coord: torch.Tensor, + do_virial: bool = True, + do_atomic_virial: bool = False, +): + faked_grad = torch.ones_like(energy) + lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad]) + extended_force = torch.autograd.grad( + [energy], [extended_coord], grad_outputs=lst, create_graph=True + )[0] + assert extended_force is not None + extended_force = -extended_force + if do_virial: + extended_virial = extended_force.unsqueeze(-1) @ extended_coord.unsqueeze(-2) + # the correction sums to zero, which does not contribute to global virial + if do_atomic_virial: + extended_virial_corr = atomic_virial_corr(extended_coord, atom_energy) + extended_virial = extended_virial + extended_virial_corr + # to [...,3,3] -> [...,9] + extended_virial = extended_virial.view(list(extended_virial.shape[:-2]) + [9]) # noqa:RUF005 + else: + extended_virial = None + return extended_force, extended_virial + + +def get_leading_dims( + vv: torch.Tensor, + vdef: OutputVariableDef, +): + """Get the dimensions of nf x nloc.""" + vshape = vv.shape + return list(vshape[: (len(vshape) - len(vdef.shape))]) + + +def get_atom_axis( + vdef: torch.Tensor, +): + """Get the axis of atoms.""" + atom_axis = -(len(vdef.shape) + 1) + return atom_axis + + +def take_deriv( + vv: torch.Tensor, + svv: torch.Tensor, + vdef: OutputVariableDef, + coord_ext: torch.Tensor, + do_virial: bool = False, + do_atomic_virial: bool = False, +): + size = 1 + for ii in vdef.shape: + size *= ii + vv1 = vv.view(list(get_leading_dims(vv, vdef)) + [size]) # noqa: RUF005 + svv1 = svv.view(list(get_leading_dims(svv, vdef)) + [size]) # noqa: RUF005 + split_vv1 = torch.split(vv1, [1] * size, dim=-1) + split_svv1 = torch.split(svv1, [1] * size, dim=-1) + split_ff, split_avir = [], [] + for vvi, svvi in zip(split_vv1, split_svv1): + # nf x nloc x 3, nf x nloc x 9 + ffi, aviri = task_deriv_one( + vvi, + svvi, + coord_ext, + do_virial=do_virial, + do_atomic_virial=do_atomic_virial, + ) + # nf x nloc x 1 x 3, nf x nloc x 1 x 9 + ffi = ffi.unsqueeze(-2) + split_ff.append(ffi) + if do_virial: + assert aviri is not None + aviri = aviri.unsqueeze(-2) + split_avir.append(aviri) + # nf x nall x v_dim x 3, nf x nall x v_dim x 9 + out_lead_shape = list(coord_ext.shape[:-1]) + vdef.shape + ff = torch.concat(split_ff, dim=-2).view(out_lead_shape + [3]) # noqa: RUF005 + if do_virial: + avir = torch.concat(split_avir, dim=-2).view(out_lead_shape + [9]) # noqa: RUF005 + else: + avir = None + return ff, avir + + +def fit_output_to_model_output( + fit_ret: Dict[str, torch.Tensor], + fit_output_def: FittingOutputDef, + coord_ext: torch.Tensor, + do_atomic_virial: bool = False, +) -> Dict[str, torch.Tensor]: + """Transform the output of the fitting network to + the model output. + + """ + redu_prec = env.GLOBAL_PT_ENER_FLOAT_PRECISION + model_ret = dict(fit_ret.items()) + for kk, vv in fit_ret.items(): + vdef = fit_output_def[kk] + shap = vdef.shape + atom_axis = -(len(shap) + 1) + if vdef.reduciable: + kk_redu = get_reduce_name(kk) + model_ret[kk_redu] = torch.sum(vv.to(redu_prec), dim=atom_axis) + if vdef.r_differentiable: + kk_derv_r, kk_derv_c = get_deriv_name(kk) + dr, dc = take_deriv( + vv, + model_ret[kk_redu], + vdef, + coord_ext, + do_virial=vdef.c_differentiable, + do_atomic_virial=do_atomic_virial, + ) + model_ret[kk_derv_r] = dr + if vdef.c_differentiable: + assert dc is not None + model_ret[kk_derv_c] = dc + model_ret[kk_derv_c + "_redu"] = torch.sum( + model_ret[kk_derv_c].to(redu_prec), dim=1 + ) + return model_ret + + +def communicate_extended_output( + model_ret: Dict[str, torch.Tensor], + model_output_def: ModelOutputDef, + mapping: torch.Tensor, # nf x nloc + do_atomic_virial: bool = False, +) -> Dict[str, torch.Tensor]: + """Transform the output of the model network defined on + local and ghost (extended) atoms to local atoms. + + """ + redu_prec = env.GLOBAL_PT_ENER_FLOAT_PRECISION + new_ret = {} + for kk in model_output_def.keys_outp(): + vv = model_ret[kk] + vdef = model_output_def[kk] + new_ret[kk] = vv + if vdef.reduciable: + kk_redu = get_reduce_name(kk) + new_ret[kk_redu] = model_ret[kk_redu] + # nf x nloc + vldims = get_leading_dims(vv, vdef) + # nf x nall + mldims = list(mapping.shape) + kk_derv_r, kk_derv_c = get_deriv_name(kk) + if vdef.r_differentiable: + # vdim x 3 + derv_r_ext_dims = list(vdef.shape) + [3] # noqa:RUF005 + mapping = mapping.view(mldims + [1] * len(derv_r_ext_dims)).expand( + [-1] * len(mldims) + derv_r_ext_dims + ) + force = torch.zeros( + vldims + derv_r_ext_dims, dtype=vv.dtype, device=vv.device + ) + # nf x nloc x nvar x 3 + new_ret[kk_derv_r] = torch.scatter_reduce( + force, + 1, + index=mapping, + src=model_ret[kk_derv_r], + reduce="sum", + ) + if vdef.c_differentiable: + assert vdef.r_differentiable + derv_c_ext_dims = list(vdef.shape) + [9] # noqa:RUF005 + # nf x nloc x nvar x 3 -> nf x nloc x nvar x 9 + mapping = torch.tile( + mapping, + [1] * (len(mldims) + len(vdef.shape)) + [3], + ) + virial = torch.zeros( + vldims + derv_c_ext_dims, dtype=vv.dtype, device=vv.device + ) + # nf x nloc x nvar x 9 + new_ret[kk_derv_c] = torch.scatter_reduce( + virial, + 1, + index=mapping, + src=model_ret[kk_derv_c], + reduce="sum", + ) + new_ret[kk_derv_c + "_redu"] = torch.sum( + new_ret[kk_derv_c].to(redu_prec), dim=1 + ) + if not do_atomic_virial: + # pop atomic virial, because it is not correctly calculated. + new_ret.pop(kk_derv_c) + return new_ret diff --git a/deepmd/pt/model/network/__init__.py b/deepmd/pt/model/network/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/pt/model/network/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py new file mode 100644 index 0000000000..762461111e --- /dev/null +++ b/deepmd/pt/model/network/mlp.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + ClassVar, + Dict, + Optional, +) + +import numpy as np +import torch +import torch.nn as nn + +from deepmd.pt.utils import ( + env, +) + +device = env.DEVICE + +from deepmd.dpmodel.utils import ( + NativeLayer, +) +from deepmd.dpmodel.utils import NetworkCollection as DPNetworkCollection +from deepmd.dpmodel.utils import ( + make_embedding_network, + make_fitting_network, + make_multilayer_network, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, + PRECISION_DICT, +) +from deepmd.pt.utils.utils import ( + ActivationFn, + to_numpy_array, + to_torch_tensor, +) + +try: + from deepmd._version import version as __version__ +except ImportError: + __version__ = "unknown" + + +def empty_t(shape, precision): + return torch.empty(shape, dtype=precision, device=device) + + +class MLPLayer(nn.Module): + def __init__( + self, + num_in, + num_out, + bias: bool = True, + use_timestep: bool = False, + activation_function: Optional[str] = None, + resnet: bool = False, + bavg: float = 0.0, + stddev: float = 1.0, + precision: str = DEFAULT_PRECISION, + ): + super().__init__() + # only use_timestep when skip connection is established. + self.use_timestep = use_timestep and ( + num_out == num_in or num_out == num_in * 2 + ) + self.activate_name = activation_function + self.activate = ActivationFn(self.activate_name) + self.precision = precision + self.prec = PRECISION_DICT[self.precision] + self.matrix = nn.Parameter(data=empty_t((num_in, num_out), self.prec)) + nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in)) + if bias: + self.bias = nn.Parameter( + data=empty_t([num_out], self.prec), + ) + nn.init.normal_(self.bias.data, mean=bavg, std=stddev) + else: + self.bias = None + if self.use_timestep: + self.idt = nn.Parameter(data=empty_t([num_out], self.prec)) + nn.init.normal_(self.idt.data, mean=0.1, std=0.001) + else: + self.idt = None + self.resnet = resnet + + def check_type_consistency(self): + precision = self.precision + + def check_var(var): + if var is not None: + # assertion "float64" == "double" would fail + assert PRECISION_DICT[var.dtype.name] is PRECISION_DICT[precision] + + check_var(self.w) + check_var(self.b) + check_var(self.idt) + + def dim_in(self) -> int: + return self.matrix.shape[0] + + def dim_out(self) -> int: + return self.matrix.shape[1] + + def forward( + self, + xx: torch.Tensor, + ) -> torch.Tensor: + """One MLP layer used by DP model. + + Parameters + ---------- + xx : torch.Tensor + The input. + + Returns + ------- + yy: torch.Tensor + The output. + """ + ori_prec = xx.dtype + xx = xx.to(self.prec) + yy = ( + torch.matmul(xx, self.matrix) + self.bias + if self.bias is not None + else torch.matmul(xx, self.matrix) + ) + yy = self.activate(yy).clone() + yy = yy * self.idt if self.idt is not None else yy + if self.resnet: + if xx.shape[-1] == yy.shape[-1]: + yy += xx + elif 2 * xx.shape[-1] == yy.shape[-1]: + yy += torch.concat([xx, xx], dim=-1) + else: + yy = yy + yy = yy.to(ori_prec) + return yy + + def serialize(self) -> dict: + """Serialize the layer to a dict. + + Returns + ------- + dict + The serialized layer. + """ + nl = NativeLayer( + self.matrix.shape[0], + self.matrix.shape[1], + bias=self.bias is not None, + use_timestep=self.idt is not None, + activation_function=self.activate_name, + resnet=self.resnet, + precision=self.precision, + ) + nl.w, nl.b, nl.idt = ( + to_numpy_array(self.matrix), + to_numpy_array(self.bias), + to_numpy_array(self.idt), + ) + return nl.serialize() + + @classmethod + def deserialize(cls, data: dict) -> "MLPLayer": + """Deserialize the layer from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + nl = NativeLayer.deserialize(data) + obj = cls( + nl["matrix"].shape[0], + nl["matrix"].shape[1], + bias=nl["bias"] is not None, + use_timestep=nl["idt"] is not None, + activation_function=nl["activation_function"], + resnet=nl["resnet"], + precision=nl["precision"], + ) + prec = PRECISION_DICT[obj.precision] + + def check_load_param(ss): + return ( + nn.Parameter(data=to_torch_tensor(nl[ss])) + if nl[ss] is not None + else None + ) + + obj.matrix = check_load_param("matrix") + obj.bias = check_load_param("bias") + obj.idt = check_load_param("idt") + return obj + + +MLP_ = make_multilayer_network(MLPLayer, nn.Module) + + +class MLP(MLP_): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.layers = torch.nn.ModuleList(self.layers) + + forward = MLP_.call + + +EmbeddingNet = make_embedding_network(MLP, MLPLayer) + +FittingNet = make_fitting_network(EmbeddingNet, MLP, MLPLayer) + + +class NetworkCollection(DPNetworkCollection, nn.Module): + """PyTorch implementation of NetworkCollection.""" + + NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = { + "network": MLP, + "embedding_network": EmbeddingNet, + "fitting_network": FittingNet, + } + + def __init__(self, *args, **kwargs): + # init both two base classes + DPNetworkCollection.__init__(self, *args, **kwargs) + nn.Module.__init__(self) + self.networks = self._networks = torch.nn.ModuleList(self._networks) diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py new file mode 100644 index 0000000000..c895f642e1 --- /dev/null +++ b/deepmd/pt/model/network/network.py @@ -0,0 +1,2035 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, +) + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from deepmd.pt.model.network.mlp import ( + EmbeddingNet, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +try: + from typing import ( + Final, + ) +except ImportError: + from torch.jit import Final + +from functools import ( + partial, +) + +import torch.utils.checkpoint + +from deepmd.pt.utils.utils import ( + ActivationFn, +) + + +def Tensor(*shape): + return torch.empty(shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE) + + +class Dropout(nn.Module): + def __init__(self, p): + super().__init__() + self.p = p + + def forward(self, x, inplace: bool = False): + if self.p > 0 and self.training: + return F.dropout(x, p=self.p, training=True, inplace=inplace) + else: + return x + + +class Identity(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class DropPath(torch.nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, prob=None): + super().__init__() + self.drop_prob = prob + + def forward(self, x): + if self.drop_prob == 0.0 or not self.training: + return x + keep_prob = 1 - self.drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(keep_prob) * random_tensor + return output + + def extra_repr(self) -> str: + return f"prob={self.drop_prob}" + + +def softmax_dropout( + input_x, dropout_prob, is_training=True, mask=None, bias=None, inplace=True +): + input_x = input_x.contiguous() + if not inplace: + input_x = input_x.clone() + if mask is not None: + input_x += mask + if bias is not None: + input_x += bias + return F.dropout(F.softmax(input_x, dim=-1), p=dropout_prob, training=is_training) + + +def checkpoint_sequential( + functions, + input_x, + enabled=True, +): + def wrap_tuple(a): + return (a,) if type(a) is not tuple else a + + def exec(func, a): + return wrap_tuple(func(*a)) + + def get_wrap_exec(func): + def wrap_exec(*a): + return exec(func, a) + + return wrap_exec + + input_x = wrap_tuple(input_x) + + is_grad_enabled = torch.is_grad_enabled() + + if enabled and is_grad_enabled: + for func in functions: + input_x = torch.utils.checkpoint.checkpoint(get_wrap_exec(func), *input_x) + else: + for func in functions: + input_x = exec(func, input_x) + return input_x + + +class ResidualLinear(nn.Module): + resnet: Final[int] + + def __init__(self, num_in, num_out, bavg=0.0, stddev=1.0, resnet_dt=False): + """Construct a residual linear layer. + + Args: + - num_in: Width of input tensor. + - num_out: Width of output tensor. + - resnet_dt: Using time-step in the ResNet construction. + """ + super().__init__() + self.num_in = num_in + self.num_out = num_out + self.resnet = resnet_dt + + self.matrix = nn.Parameter(data=Tensor(num_in, num_out)) + nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in)) + self.bias = nn.Parameter(data=Tensor(1, num_out)) + nn.init.normal_(self.bias.data, mean=bavg, std=stddev) + if self.resnet: + self.idt = nn.Parameter(data=Tensor(1, num_out)) + nn.init.normal_(self.idt.data, mean=1.0, std=0.001) + + def forward(self, inputs): + """Return X ?+ X*W+b.""" + xw_plus_b = torch.matmul(inputs, self.matrix) + self.bias + hidden = torch.tanh(xw_plus_b) + if self.resnet: + hidden = hidden * self.idt + if self.num_in == self.num_out: + return inputs + hidden + elif self.num_in * 2 == self.num_out: + return torch.cat([inputs, inputs], dim=1) + hidden + else: + return hidden + + +class TypeFilter(nn.Module): + use_tebd: Final[bool] + tebd_mode: Final[str] + + def __init__( + self, + offset, + length, + neuron, + return_G=False, + tebd_dim=0, + use_tebd=False, + tebd_mode="concat", + ): + """Construct a filter on the given element as neighbor. + + Args: + - offset: Element offset in the descriptor matrix. + - length: Atom count of this element. + - neuron: Number of neurons in each hidden layers of the embedding net. + """ + super().__init__() + self.offset = offset + self.length = length + self.tebd_dim = tebd_dim + self.use_tebd = use_tebd + self.tebd_mode = tebd_mode + supported_tebd_mode = ["concat", "dot", "dot_residual_s", "dot_residual_t"] + assert ( + tebd_mode in supported_tebd_mode + ), f"Unknown tebd_mode {tebd_mode}! Supported are {supported_tebd_mode}." + if use_tebd and tebd_mode == "concat": + self.neuron = [1 + tebd_dim * 2, *neuron] + else: + self.neuron = [1, *neuron] + + deep_layers = [] + for ii in range(1, len(self.neuron)): + one = ResidualLinear(self.neuron[ii - 1], self.neuron[ii]) + deep_layers.append(one) + self.deep_layers = nn.ModuleList(deep_layers) + + deep_layers_t = [] + if use_tebd and tebd_mode in ["dot", "dot_residual_s", "dot_residual_t"]: + self.neuron_t = [tebd_dim * 2, *neuron] + for ii in range(1, len(self.neuron_t)): + one = ResidualLinear(self.neuron_t[ii - 1], self.neuron_t[ii]) + deep_layers_t.append(one) + self.deep_layers_t = nn.ModuleList(deep_layers_t) + + self.return_G = return_G + + def forward( + self, + inputs, + atype_tebd: Optional[torch.Tensor] = None, + nlist_tebd: Optional[torch.Tensor] = None, + ): + """Calculate decoded embedding for each atom. + + Args: + - inputs: Descriptor matrix. Its shape is [nframes*natoms[0], len_descriptor]. + + Returns + ------- + - `torch.Tensor`: Embedding contributed by me. Its shape is [nframes*natoms[0], 4, self.neuron[-1]]. + """ + inputs_i = inputs[:, self.offset * 4 : (self.offset + self.length) * 4] + inputs_reshape = inputs_i.reshape( + -1, 4 + ) # shape is [nframes*natoms[0]*self.length, 4] + xyz_scatter = inputs_reshape[:, 0:1] + + # concat the tebd as input + if self.use_tebd and self.tebd_mode == "concat": + assert nlist_tebd is not None and atype_tebd is not None + nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim) + atype_tebd = atype_tebd.reshape(-1, self.tebd_dim) + # [nframes * nloc * nnei, 1 + tebd_dim * 2] + xyz_scatter = torch.concat([xyz_scatter, nlist_tebd, atype_tebd], dim=1) + + for linear in self.deep_layers: + xyz_scatter = linear(xyz_scatter) + # [nframes * nloc * nnei, out_size] + + # dot the tebd output + if self.use_tebd and self.tebd_mode in [ + "dot", + "dot_residual_s", + "dot_residual_t", + ]: + assert nlist_tebd is not None and atype_tebd is not None + nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim) + atype_tebd = atype_tebd.reshape(-1, self.tebd_dim) + # [nframes * nloc * nnei, tebd_dim * 2] + two_side_tebd = torch.concat([nlist_tebd, atype_tebd], dim=1) + for linear in self.deep_layers_t: + two_side_tebd = linear(two_side_tebd) + # [nframes * nloc * nnei, out_size] + if self.tebd_mode == "dot": + xyz_scatter = xyz_scatter * two_side_tebd + elif self.tebd_mode == "dot_residual_s": + xyz_scatter = xyz_scatter * two_side_tebd + xyz_scatter + elif self.tebd_mode == "dot_residual_t": + xyz_scatter = xyz_scatter * two_side_tebd + two_side_tebd + + xyz_scatter = xyz_scatter.view( + -1, self.length, self.neuron[-1] + ) # shape is [nframes*natoms[0], self.length, self.neuron[-1]] + if self.return_G: + return xyz_scatter + else: + # shape is [nframes*natoms[0], 4, self.length] + inputs_reshape = inputs_i.view(-1, self.length, 4).permute(0, 2, 1) + return torch.matmul(inputs_reshape, xyz_scatter) + + +class SimpleLinear(nn.Module): + use_timestep: Final[bool] + + def __init__( + self, + num_in, + num_out, + bavg=0.0, + stddev=1.0, + use_timestep=False, + activate=None, + bias: bool = True, + ): + """Construct a linear layer. + + Args: + - num_in: Width of input tensor. + - num_out: Width of output tensor. + - use_timestep: Apply time-step to weight. + - activate: type of activate func. + """ + super().__init__() + self.num_in = num_in + self.num_out = num_out + self.use_timestep = use_timestep + self.activate = ActivationFn(activate) + + self.matrix = nn.Parameter(data=Tensor(num_in, num_out)) + nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in)) + if bias: + self.bias = nn.Parameter(data=Tensor(1, num_out)) + nn.init.normal_(self.bias.data, mean=bavg, std=stddev) + else: + self.bias = None + if self.use_timestep: + self.idt = nn.Parameter(data=Tensor(1, num_out)) + nn.init.normal_(self.idt.data, mean=0.1, std=0.001) + + def forward(self, inputs): + """Return X*W+b.""" + xw = torch.matmul(inputs, self.matrix) + hidden = xw + self.bias if self.bias is not None else xw + hidden = self.activate(hidden) + if self.use_timestep: + hidden = hidden * self.idt + return hidden + + +class Linear(nn.Linear): + def __init__( + self, + d_in: int, + d_out: int, + bias: bool = True, + init: str = "default", + ): + super().__init__( + d_in, + d_out, + bias=bias, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, + ) + + self.use_bias = bias + + if self.use_bias: + with torch.no_grad(): + self.bias.fill_(0) + + if init == "default": + self._trunc_normal_init(1.0) + elif init == "relu": + self._trunc_normal_init(2.0) + elif init == "glorot": + self._glorot_uniform_init() + elif init == "gating": + self._zero_init(self.use_bias) + elif init == "normal": + self._normal_init() + elif init == "final": + self._zero_init(False) + else: + raise ValueError("Invalid init method.") + + def _trunc_normal_init(self, scale=1.0): + # Constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + TRUNCATED_NORMAL_STDDEV_FACTOR = 0.87962566103423978 + _, fan_in = self.weight.shape + scale = scale / max(1, fan_in) + std = (scale**0.5) / TRUNCATED_NORMAL_STDDEV_FACTOR + nn.init.trunc_normal_(self.weight, mean=0.0, std=std) + + def _glorot_uniform_init(self): + nn.init.xavier_uniform_(self.weight, gain=1) + + def _zero_init(self, use_bias=True): + with torch.no_grad(): + self.weight.fill_(0.0) + if use_bias: + with torch.no_grad(): + self.bias.fill_(1.0) + + def _normal_init(self): + nn.init.kaiming_normal_(self.weight, nonlinearity="linear") + + +class Transition(nn.Module): + def __init__(self, d_in, n, dropout=0.0): + super().__init__() + + self.d_in = d_in + self.n = n + + self.linear_1 = Linear(self.d_in, self.n * self.d_in, init="relu") + self.act = nn.GELU() + self.linear_2 = Linear(self.n * self.d_in, d_in, init="final") + self.dropout = dropout + + def _transition(self, x): + x = self.linear_1(x) + x = self.act(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.linear_2(x) + return x + + def forward( + self, + x: torch.Tensor, + ) -> torch.Tensor: + x = self._transition(x=x) + return x + + +class Embedding(nn.Embedding): + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int] = None, + dtype=torch.float64, + ): + super().__init__( + num_embeddings, embedding_dim, padding_idx=padding_idx, dtype=dtype + ) + self._normal_init() + + if padding_idx is not None: + self.weight.data[self.padding_idx].zero_() + + def _normal_init(self, std=0.02): + nn.init.normal_(self.weight, mean=0.0, std=std) + + +class NonLinearHead(nn.Module): + def __init__(self, input_dim, out_dim, activation_fn, hidden=None): + super().__init__() + hidden = input_dim if not hidden else hidden + self.linear1 = SimpleLinear(input_dim, hidden, activate=activation_fn) + self.linear2 = SimpleLinear(hidden, out_dim) + + def forward(self, x): + x = self.linear1(x) + x = self.linear2(x) + return x + + +class NonLinear(nn.Module): + def __init__(self, input, output_size, hidden=None): + super().__init__() + + if hidden is None: + hidden = input + self.layer1 = Linear(input, hidden, init="relu") + self.layer2 = Linear(hidden, output_size, init="final") + + def forward(self, x): + x = F.linear(x, self.layer1.weight) + # x = fused_ops.bias_torch_gelu(x, self.layer1.bias) + x = nn.GELU()(x) + self.layer1.bias + x = self.layer2(x) + return x + + def zero_init(self): + nn.init.zeros_(self.layer2.weight) + nn.init.zeros_(self.layer2.bias) + + +class MaskLMHead(nn.Module): + """Head for masked language modeling.""" + + def __init__(self, embed_dim, output_dim, activation_fn, weight=None): + super().__init__() + self.dense = SimpleLinear(embed_dim, embed_dim) + self.activation_fn = ActivationFn(activation_fn) + self.layer_norm = nn.LayerNorm(embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + + if weight is None: + weight = nn.Linear( + embed_dim, output_dim, bias=False, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ).weight + self.weight = weight + self.bias = nn.Parameter( + torch.zeros(output_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + ) + + def forward(self, features, masked_tokens: Optional[torch.Tensor] = None, **kwargs): + # Only project the masked tokens while training, + # saves both memory and computation + if masked_tokens is not None: + features = features[masked_tokens, :] + + x = self.dense(features) + x = self.activation_fn(x) + x = self.layer_norm(x) + # project back to size of vocabulary with bias + x = F.linear(x, self.weight) + self.bias + return x + + +class ResidualDeep(nn.Module): + def __init__( + self, type_id, embedding_width, neuron, bias_atom_e, out_dim=1, resnet_dt=False + ): + """Construct a filter on the given element as neighbor. + + Args: + - typei: Element ID. + - embedding_width: Embedding width per atom. + - neuron: Number of neurons in each hidden layers of the embedding net. + - resnet_dt: Using time-step in the ResNet construction. + """ + super().__init__() + self.type_id = type_id + self.neuron = [embedding_width, *neuron] + self.out_dim = out_dim + + deep_layers = [] + for ii in range(1, len(self.neuron)): + one = SimpleLinear( + num_in=self.neuron[ii - 1], + num_out=self.neuron[ii], + use_timestep=( + resnet_dt and ii > 1 and self.neuron[ii - 1] == self.neuron[ii] + ), + activate="tanh", + ) + deep_layers.append(one) + self.deep_layers = nn.ModuleList(deep_layers) + if not env.ENERGY_BIAS_TRAINABLE: + bias_atom_e = 0 + self.final_layer = SimpleLinear(self.neuron[-1], self.out_dim, bias_atom_e) + + def forward(self, inputs): + """Calculate decoded embedding for each atom. + + Args: + - inputs: Embedding net output per atom. Its shape is [nframes*nloc, self.embedding_width]. + + Returns + ------- + - `torch.Tensor`: Output layer with shape [nframes*nloc, self.neuron[-1]]. + """ + outputs = inputs + for idx, linear in enumerate(self.deep_layers): + if idx > 0 and linear.num_in == linear.num_out: + outputs = outputs + linear(outputs) + else: + outputs = linear(outputs) + outputs = self.final_layer(outputs) + return outputs + + +class TypeEmbedNet(nn.Module): + def __init__(self, type_nums, embed_dim, bavg=0.0, stddev=1.0): + """Construct a type embedding net.""" + super().__init__() + self.embedding = TypeEmbedNetConsistent( + ntypes=type_nums, + neuron=[embed_dim], + padding=True, + activation_function="Linear", + precision="default", + ) + # nn.init.normal_(self.embedding.weight[:-1], mean=bavg, std=stddev) + + def forward(self, atype): + """ + Args: + atype: Type of each input, [nframes, nloc] or [nframes, nloc, nnei]. + + Returns + ------- + type_embedding: + + """ + return self.embedding(atype.device)[atype] + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only TypeEmbedNet of the same type can share params!" + if shared_level == 0: + # the following will successfully link all the params except buffers, which need manually link. + for item in self._modules: + self._modules[item] = base_class._modules[item] + else: + raise NotImplementedError + + +class TypeEmbedNetConsistent(nn.Module): + r"""Type embedding network that is consistent with other backends. + + Parameters + ---------- + ntypes : int + Number of atom types + neuron : list[int] + Number of neurons in each hidden layers of the embedding net + resnet_dt + Time-step `dt` in the resnet construction: y = x + dt * \phi (Wx + b) + activation_function + The activation function in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + trainable + If the weights of embedding net are trainable. + seed + Random seed for initializing the network parameters. + padding + Concat the zero padding to the output, as the default embedding of empty type. + """ + + def __init__( + self, + *, + ntypes: int, + neuron: List[int], + resnet_dt: bool = False, + activation_function: str = "tanh", + precision: str = "default", + trainable: bool = True, + seed: Optional[int] = None, + padding: bool = False, + ): + """Construct a type embedding net.""" + super().__init__() + self.ntypes = ntypes + self.neuron = neuron + self.seed = seed + self.resnet_dt = resnet_dt + self.precision = precision + self.prec = env.PRECISION_DICT[self.precision] + self.activation_function = str(activation_function) + self.trainable = trainable + self.padding = padding + # no way to pass seed? + self.embedding_net = EmbeddingNet( + ntypes, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + ) + for param in self.parameters(): + param.requires_grad = trainable + + def forward(self, device: torch.device): + """Caulate type embedding network. + + Returns + ------- + type_embedding: torch.Tensor + Type embedding network. + """ + embed = self.embedding_net( + torch.eye(self.ntypes, dtype=self.prec, device=device) + ) + if self.padding: + embed = torch.cat( + [embed, torch.zeros(1, embed.shape[1], dtype=self.prec, device=device)] + ) + return embed + + @classmethod + def deserialize(cls, data: dict): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + TypeEmbedNetConsistent + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data_cls = data.pop("@class") + assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}" + + embedding_net = EmbeddingNet.deserialize(data.pop("embedding")) + type_embedding_net = cls(**data) + type_embedding_net.embedding_net = embedding_net + return type_embedding_net + + def serialize(self) -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + return { + "@class": "TypeEmbedNet", + "@version": 1, + "ntypes": self.ntypes, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "precision": self.precision, + "activation_function": self.activation_function, + "trainable": self.trainable, + "padding": self.padding, + "embedding": self.embedding_net.serialize(), + } + + +@torch.jit.script +def gaussian(x, mean, std: float): + pi = 3.14159 + a = (2 * pi) ** 0.5 + return torch.exp(-0.5 * (((x - mean) / std) ** 2)) / (a * std) + + +class GaussianKernel(nn.Module): + def __init__(self, K=128, num_pair=512, std_width=1.0, start=0.0, stop=9.0): + super().__init__() + self.K = K + std_width = std_width + start = start + stop = stop + mean = torch.linspace(start, stop, K, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.std = (std_width * (mean[1] - mean[0])).item() + self.register_buffer("mean", mean) + self.mul = Embedding( + num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.bias = Embedding( + num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + nn.init.constant_(self.bias.weight, 0) + nn.init.constant_(self.mul.weight, 1.0) + + def forward(self, x, atom_pair): + mul = self.mul(atom_pair).abs().sum(dim=-2) + bias = self.bias(atom_pair).sum(dim=-2) + x = mul * x.unsqueeze(-1) + bias + # [nframes, nloc, nnei, K] + x = x.expand(-1, -1, -1, self.K) + mean = self.mean.view(-1) + return gaussian(x, mean, self.std) + + +class GaussianEmbedding(nn.Module): + def __init__( + self, + rcut, + kernel_num, + num_pair, + embed_dim, + pair_embed_dim, + sel, + ntypes, + atomic_sum_gbf, + ): + """Construct a gaussian kernel based embedding of pair representation. + + Args: + rcut: Radial cutoff. + kernel_num: Number of gaussian kernels. + num_pair: Number of different pairs. + embed_dim: Dimension of atomic representation. + pair_embed_dim: Dimension of pair representation. + sel: Number of neighbors. + ntypes: Number of atom types. + """ + super().__init__() + self.gbf = GaussianKernel(K=kernel_num, num_pair=num_pair, stop=rcut) + self.gbf_proj = NonLinear(kernel_num, pair_embed_dim) + self.embed_dim = embed_dim + self.pair_embed_dim = pair_embed_dim + self.atomic_sum_gbf = atomic_sum_gbf + if self.atomic_sum_gbf: + if kernel_num != self.embed_dim: + self.edge_proj = torch.nn.Linear( + kernel_num, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + else: + self.edge_proj = None + self.ntypes = ntypes + self.nnei = sel + + def forward(self, coord_selected, atom_feature, edge_type_2dim, edge_feature): + ## local cluster forward + """Calculate decoded embedding for each atom. + Args: + coord_selected: Clustered atom coordinates with shape [nframes*nloc, natoms, 3]. + atom_feature: Previous calculated atomic features with shape [nframes*nloc, natoms, embed_dim]. + edge_type_2dim: Edge index for gbf calculation with shape [nframes*nloc, natoms, natoms, 2]. + edge_feature: Previous calculated edge features with shape [nframes*nloc, natoms, natoms, pair_dim]. + + Returns + ------- + atom_feature: Updated atomic features with shape [nframes*nloc, natoms, embed_dim]. + attn_bias: Updated edge features as attention bias with shape [nframes*nloc, natoms, natoms, pair_dim]. + delta_pos: Delta position for force/vector prediction with shape [nframes*nloc, natoms, natoms, 3]. + """ + ncluster, natoms, _ = coord_selected.shape + # ncluster x natoms x natoms x 3 + delta_pos = coord_selected.unsqueeze(1) - coord_selected.unsqueeze(2) + # (ncluster x natoms x natoms + dist = delta_pos.norm(dim=-1).view(-1, natoms, natoms) + # [ncluster, natoms, natoms, K] + gbf_feature = self.gbf(dist, edge_type_2dim) + if self.atomic_sum_gbf: + edge_features = gbf_feature + # [ncluster, natoms, K] + sum_edge_features = edge_features.sum(dim=-2) + if self.edge_proj is not None: + sum_edge_features = self.edge_proj(sum_edge_features) + # [ncluster, natoms, embed_dim] + atom_feature = atom_feature + sum_edge_features + + # [ncluster, natoms, natoms, pair_dim] + gbf_result = self.gbf_proj(gbf_feature) + + attn_bias = gbf_result + edge_feature + return atom_feature, attn_bias, delta_pos + + +class NeighborWiseAttention(nn.Module): + def __init__( + self, + layer_num, + nnei, + embed_dim, + hidden_dim, + dotr=False, + do_mask=False, + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + activation="tanh", + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + ): + """Construct a neighbor-wise attention net.""" + super().__init__() + self.layer_num = layer_num + attention_layers = [] + for i in range(self.layer_num): + attention_layers.append( + NeighborWiseAttentionLayer( + nnei, + embed_dim, + hidden_dim, + dotr=dotr, + do_mask=do_mask, + post_ln=post_ln, + ffn=ffn, + ffn_embed_dim=ffn_embed_dim, + activation=activation, + scaling_factor=scaling_factor, + head_num=head_num, + normalize=normalize, + temperature=temperature, + ) + ) + self.attention_layers = nn.ModuleList(attention_layers) + + def forward( + self, + input_G, + nei_mask, + input_r: Optional[torch.Tensor] = None, + sw: Optional[torch.Tensor] = None, + ): + """ + Args: + input_G: Input G, [nframes * nloc, nnei, embed_dim]. + nei_mask: neighbor mask, [nframes * nloc, nnei]. + input_r: normalized radial, [nframes, nloc, nei, 3]. + + Returns + ------- + out: Output G, [nframes * nloc, nnei, embed_dim] + + """ + out = input_G + # https://github.com/pytorch/pytorch/issues/39165#issuecomment-635472592 + for layer in self.attention_layers: + out = layer(out, nei_mask, input_r=input_r, sw=sw) + return out + + +class NeighborWiseAttentionLayer(nn.Module): + ffn: Final[bool] + + def __init__( + self, + nnei, + embed_dim, + hidden_dim, + dotr=False, + do_mask=False, + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + activation="tanh", + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + ): + """Construct a neighbor-wise attention layer.""" + super().__init__() + self.nnei = nnei + self.embed_dim = embed_dim + self.hidden_dim = hidden_dim + self.dotr = dotr + self.do_mask = do_mask + self.post_ln = post_ln + self.ffn = ffn + self.attention_layer = GatedSelfAttetion( + nnei, + embed_dim, + hidden_dim, + dotr=dotr, + do_mask=do_mask, + scaling_factor=scaling_factor, + head_num=head_num, + normalize=normalize, + temperature=temperature, + ) + self.attn_layer_norm = nn.LayerNorm( + self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + if self.ffn: + self.ffn_embed_dim = ffn_embed_dim + self.fc1 = nn.Linear( + self.embed_dim, self.ffn_embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.activation_fn = ActivationFn(activation) + self.fc2 = nn.Linear( + self.ffn_embed_dim, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.final_layer_norm = nn.LayerNorm( + self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + + def forward( + self, + x, + nei_mask, + input_r: Optional[torch.Tensor] = None, + sw: Optional[torch.Tensor] = None, + ): + residual = x + if not self.post_ln: + x = self.attn_layer_norm(x) + x = self.attention_layer(x, nei_mask, input_r=input_r, sw=sw) + x = residual + x + if self.post_ln: + x = self.attn_layer_norm(x) + if self.ffn: + residual = x + if not self.post_ln: + x = self.final_layer_norm(x) + x = self.fc1(x) + x = self.activation_fn(x) + x = self.fc2(x) + x = residual + x + if self.post_ln: + x = self.final_layer_norm(x) + return x + + +class GatedSelfAttetion(nn.Module): + def __init__( + self, + nnei, + embed_dim, + hidden_dim, + dotr=False, + do_mask=False, + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + bias=True, + smooth=True, + ): + """Construct a neighbor-wise attention net.""" + super().__init__() + self.nnei = nnei + self.embed_dim = embed_dim + self.hidden_dim = hidden_dim + self.head_num = head_num + self.dotr = dotr + self.do_mask = do_mask + if temperature is None: + self.scaling = (self.hidden_dim * scaling_factor) ** -0.5 + else: + self.scaling = temperature + self.normalize = normalize + self.in_proj = SimpleLinear( + embed_dim, + hidden_dim * 3, + bavg=0.0, + stddev=1.0, + use_timestep=False, + bias=bias, + ) + self.out_proj = SimpleLinear( + hidden_dim, embed_dim, bavg=0.0, stddev=1.0, use_timestep=False, bias=bias + ) + self.smooth = smooth + + def forward( + self, + query, + nei_mask, + input_r: Optional[torch.Tensor] = None, + sw: Optional[torch.Tensor] = None, + attnw_shift: float = 20.0, + ): + """ + Args: + query: input G, [nframes * nloc, nnei, embed_dim]. + nei_mask: neighbor mask, [nframes * nloc, nnei]. + input_r: normalized radial, [nframes, nloc, nei, 3]. + + Returns + ------- + type_embedding: + + """ + q, k, v = self.in_proj(query).chunk(3, dim=-1) + # [nframes * nloc, nnei, hidden_dim] + q = q.view(-1, self.nnei, self.hidden_dim) + k = k.view(-1, self.nnei, self.hidden_dim) + v = v.view(-1, self.nnei, self.hidden_dim) + if self.normalize: + q = F.normalize(q, dim=-1) + k = F.normalize(k, dim=-1) + v = F.normalize(v, dim=-1) + q = q * self.scaling + k = k.transpose(1, 2) + # [nframes * nloc, nnei, nnei] + attn_weights = torch.bmm(q, k) + # [nframes * nloc, nnei] + nei_mask = nei_mask.view(-1, self.nnei) + if self.smooth: + # [nframes * nloc, nnei] + assert sw is not None + sw = sw.view([-1, self.nnei]) + attn_weights = (attn_weights + attnw_shift) * sw[:, :, None] * sw[ + :, None, : + ] - attnw_shift + else: + attn_weights = attn_weights.masked_fill( + ~nei_mask.unsqueeze(1), float("-inf") + ) + attn_weights = F.softmax(attn_weights, dim=-1) + attn_weights = attn_weights.masked_fill(~nei_mask.unsqueeze(-1), 0.0) + if self.smooth: + assert sw is not None + attn_weights = attn_weights * sw[:, :, None] * sw[:, None, :] + if self.dotr: + assert input_r is not None, "input_r must be provided when dotr is True!" + angular_weight = torch.bmm(input_r, input_r.transpose(1, 2)) + attn_weights = attn_weights * angular_weight + o = torch.bmm(attn_weights, v) + output = self.out_proj(o) + return output + + +class LocalSelfMultiheadAttention(nn.Module): + def __init__(self, feature_dim, attn_head, scaling_factor=1.0): + super().__init__() + self.feature_dim = feature_dim + self.attn_head = attn_head + self.head_dim = feature_dim // attn_head + assert ( + feature_dim % attn_head == 0 + ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!" + self.scaling = (self.head_dim * scaling_factor) ** -0.5 + self.in_proj = SimpleLinear(self.feature_dim, self.feature_dim * 3) + # TODO debug + # self.out_proj = SimpleLinear(self.feature_dim, self.feature_dim) + + def forward( + self, + query, + attn_bias: Optional[torch.Tensor] = None, + nlist_mask: Optional[torch.Tensor] = None, + nlist: Optional[torch.Tensor] = None, + return_attn=True, + ): + nframes, nloc, feature_dim = query.size() + _, _, nnei = nlist.size() + assert feature_dim == self.feature_dim + # [nframes, nloc, feature_dim] + q, k, v = self.in_proj(query).chunk(3, dim=-1) + # [nframes * attn_head * nloc, 1, head_dim] + q = ( + q.view(nframes, nloc, self.attn_head, self.head_dim) + .transpose(1, 2) + .contiguous() + .view(nframes * self.attn_head * nloc, 1, self.head_dim) + * self.scaling + ) + # [nframes, nloc, feature_dim] --> [nframes, nloc + 1, feature_dim] + # with nlist [nframes, nloc, nnei] --> [nframes, nloc, nnei, feature_dim] + # padding = torch.zeros(feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION).to(k.device) + # k = torch.concat([k, padding.unsqueeze(0).unsqueeze(1)], dim=1) + # v = torch.concat([v, padding.unsqueeze(0).unsqueeze(1)], dim=1) + + # [nframes, nloc * nnei, feature_dim] + index = nlist.view(nframes, -1).unsqueeze(-1).expand(-1, -1, feature_dim) + k = torch.gather(k, dim=1, index=index) + # [nframes, nloc * nnei, feature_dim] + v = torch.gather(v, dim=1, index=index) + # [nframes * attn_head * nloc, nnei, head_dim] + k = ( + k.view(nframes, nloc, nnei, self.attn_head, self.head_dim) + .permute(0, 3, 1, 2, 4) + .contiguous() + .view(nframes * self.attn_head * nloc, nnei, self.head_dim) + ) + v = ( + v.view(nframes, nloc, nnei, self.attn_head, self.head_dim) + .permute(0, 3, 1, 2, 4) + .contiguous() + .view(nframes * self.attn_head * nloc, nnei, self.head_dim) + ) + # [nframes * attn_head * nloc, 1, nnei] + attn_weights = torch.bmm(q, k.transpose(1, 2)) + # maskfill + # [nframes, attn_head, nloc, nnei] + attn_weights = attn_weights.view( + nframes, self.attn_head, nloc, nnei + ).masked_fill(~nlist_mask.unsqueeze(1), float("-inf")) + # add bias + if return_attn: + attn_weights = attn_weights + attn_bias + # softmax + # [nframes * attn_head * nloc, 1, nnei] + attn = F.softmax(attn_weights, dim=-1).view( + nframes * self.attn_head * nloc, 1, nnei + ) + # bmm + # [nframes * attn_head * nloc, 1, head_dim] + o = torch.bmm(attn, v) + assert list(o.size()) == [nframes * self.attn_head * nloc, 1, self.head_dim] + # [nframes, nloc, feature_dim] + o = ( + o.view(nframes, self.attn_head, nloc, self.head_dim) + .transpose(1, 2) + .contiguous() + .view(nframes, nloc, self.feature_dim) + ) + # out + ## TODO debug: + # o = self.out_proj(o) + if not return_attn: + return o + else: + return o, attn_weights, attn + + +class NodeTaskHead(nn.Module): + def __init__( + self, + embed_dim: int, + pair_dim: int, + num_head: int, + ): + super().__init__() + self.layer_norm = nn.LayerNorm(embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.pair_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.embed_dim = embed_dim + self.q_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot") + self.k_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot") + self.v_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot") + self.num_heads = num_head + self.head_dim = embed_dim // num_head + self.scaling = self.head_dim**-0.5 + self.force_proj = Linear(embed_dim, 1, init="final", bias=False) + self.linear_bias = Linear(pair_dim, num_head) + self.dropout = 0.1 + + def zero_init(self): + nn.init.zeros_(self.force_proj.weight) + + def forward( + self, + query: Tensor, + pair: Tensor, + delta_pos: Tensor, + attn_mask: Tensor = None, + ) -> Tensor: + ncluster, natoms, _ = query.size() + query = self.layer_norm(query) + # [ncluster, natoms, natoms, pair_dim] + pair = self.pair_norm(pair) + + # [ncluster, attn_head, natoms, head_dim] + q = ( + self.q_proj(query) + .view(ncluster, natoms, self.num_heads, -1) + .transpose(1, 2) + * self.scaling + ) + # [ncluster, attn_head, natoms, head_dim] + k = ( + self.k_proj(query) + .view(ncluster, natoms, self.num_heads, -1) + .transpose(1, 2) + ) + v = ( + self.v_proj(query) + .view(ncluster, natoms, self.num_heads, -1) + .transpose(1, 2) + ) + # [ncluster, attn_head, natoms, natoms] + attn = q @ k.transpose(-1, -2) + del q, k + # [ncluster, attn_head, natoms, natoms] + bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous() + + # [ncluster, attn_head, natoms, natoms] + attn_probs = softmax_dropout( + attn, + self.dropout, + self.training, + mask=attn_mask, + bias=bias.contiguous(), + ).view(ncluster, self.num_heads, natoms, natoms) + + # delta_pos: [ncluster, natoms, natoms, 3] + # [ncluster, attn_head, natoms, natoms, 3] + rot_attn_probs = attn_probs.unsqueeze(-1) * delta_pos.unsqueeze(1).type_as( + attn_probs + ) + # [ncluster, attn_head, 3, natoms, natoms] + rot_attn_probs = rot_attn_probs.permute(0, 1, 4, 2, 3) + # [ncluster, attn_head, 3, natoms, head_dim] + x = rot_attn_probs @ v.unsqueeze(2) + # [ncluster, natoms, 3, embed_dim] + x = x.permute(0, 3, 2, 1, 4).contiguous().view(ncluster, natoms, 3, -1) + cur_force = self.force_proj(x).view(ncluster, natoms, 3) + return cur_force + + +class EnergyHead(nn.Module): + def __init__( + self, + input_dim, + output_dim, + ): + super().__init__() + self.layer_norm = nn.LayerNorm(input_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.linear_in = Linear(input_dim, input_dim, init="relu") + + self.linear_out = Linear(input_dim, output_dim, bias=True, init="final") + + def forward(self, x): + x = x.type(self.linear_in.weight.dtype) + x = F.gelu(self.layer_norm(self.linear_in(x))) + x = self.linear_out(x) + return x + + +class OuterProduct(nn.Module): + def __init__(self, d_atom, d_pair, d_hid=32): + super().__init__() + + self.d_atom = d_atom + self.d_pair = d_pair + self.d_hid = d_hid + + self.linear_in = nn.Linear( + d_atom, d_hid * 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.linear_out = nn.Linear( + d_hid**2, d_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.act = nn.GELU() + + def _opm(self, a, b): + # [nframes, nloc, d] + nframes, nloc, d = a.shape + a = a.view(nframes, nloc, 1, d, 1) + b = b.view(nframes, 1, nloc, 1, d) + # [nframes, nloc, nloc, d, d] + outer = a * b + outer = outer.view(outer.shape[:-2] + (-1,)) + outer = self.linear_out(outer) + return outer + + def forward( + self, + m: torch.Tensor, + nlist: torch.Tensor, + op_mask: float, + op_norm: float, + ) -> torch.Tensor: + ab = self.linear_in(m) + ab = ab * op_mask + a, b = ab.chunk(2, dim=-1) + # [ncluster, natoms, natoms, d_pair] + z = self._opm(a, b) + z *= op_norm + return z + + +class Attention(nn.Module): + def __init__( + self, + q_dim: int, + k_dim: int, + v_dim: int, + head_dim: int, + num_heads: int, + gating: bool = False, + dropout: float = 0.0, + ): + super().__init__() + + self.num_heads = num_heads + self.head_dim = head_dim + total_dim = head_dim * self.num_heads + self.total_dim = total_dim + self.q_dim = q_dim + self.gating = gating + self.linear_q = Linear(q_dim, total_dim, bias=False, init="glorot") + self.linear_k = Linear(k_dim, total_dim, bias=False, init="glorot") + self.linear_v = Linear(v_dim, total_dim, bias=False, init="glorot") + self.linear_o = Linear(total_dim, q_dim, init="final") + self.linear_g = None + if self.gating: + self.linear_g = Linear(q_dim, total_dim, init="gating") + # precompute the 1/sqrt(head_dim) + self.norm = head_dim**-0.5 + self.dropout = dropout + + def forward( + self, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + bias: torch.Tensor, + mask: torch.Tensor = None, + ) -> torch.Tensor: + nframes, nloc, embed_dim = q.size() + g = None + if self.linear_g is not None: + # gating, use raw query input + # [nframes, nloc, total_dim] + g = self.linear_g(q) + # [nframes, nloc, total_dim] + q = self.linear_q(q) + q *= self.norm + # [nframes, nloc, total_dim] + k = self.linear_k(k) + # [nframes, nloc, total_dim] + v = self.linear_v(v) + # global + # q [nframes, h, nloc, d] + # k [nframes, h, nloc, d] + # v [nframes, h, nloc, d] + # attn [nframes, h, nloc, nloc] + # o [nframes, h, nloc, d] + + # [nframes, h, nloc, d] + q = q.view(q.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous() + k = k.view(k.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous() + v = v.view(v.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3) + # [nframes, h, nloc, nloc] + attn = torch.matmul(q, k.transpose(-1, -2)) + del q, k + # [nframes, h, nloc, nloc] + attn = softmax_dropout(attn, self.dropout, self.training, mask=mask, bias=bias) + # [nframes, h, nloc, d] + o = torch.matmul(attn, v) + del attn, v + + # local + # q [nframes, h, nloc, 1, d] + # k [nframes, h, nloc, nnei, d] + # v [nframes, h, nloc, nnei, d] + # attn [nframes, h, nloc, nnei] + # o [nframes, h, nloc, d] + + assert list(o.size()) == [nframes, self.num_heads, nloc, self.head_dim] + # [nframes, nloc, total_dim] + o = o.transpose(-2, -3).contiguous() + o = o.view(*o.shape[:-2], -1) + + if g is not None: + o = torch.sigmoid(g) * o + + # merge heads + o = self.linear_o(o) + return o + + +class AtomAttention(nn.Module): + def __init__( + self, + q_dim: int, + k_dim: int, + v_dim: int, + pair_dim: int, + head_dim: int, + num_heads: int, + gating: bool = False, + dropout: float = 0.0, + ): + super().__init__() + + self.mha = Attention( + q_dim, k_dim, v_dim, head_dim, num_heads, gating=gating, dropout=dropout + ) + self.layer_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.linear_bias = Linear(pair_dim, num_heads) + + def forward( + self, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + nlist: torch.Tensor, + pair: torch.Tensor, + mask: torch.Tensor = None, + ) -> torch.Tensor: + pair = self.layer_norm(pair) + bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous() + return self.mha(q, k, v, bias=bias, mask=mask) + + +class TriangleMultiplication(nn.Module): + def __init__(self, d_pair, d_hid): + super().__init__() + + self.linear_ab_p = Linear(d_pair, d_hid * 2) + self.linear_ab_g = Linear(d_pair, d_hid * 2, init="gating") + + self.linear_g = Linear(d_pair, d_pair, init="gating") + self.linear_z = Linear(d_hid, d_pair, init="final") + + self.layer_norm_out = nn.LayerNorm(d_hid, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + + def forward( + self, + z: torch.Tensor, + mask: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # z : [nframes, nloc, nloc, pair_dim] + + # [nframes, nloc, nloc, pair_dim] + g = self.linear_g(z) + if self.training: + ab = self.linear_ab_p(z) * torch.sigmoid(self.linear_ab_g(z)) + else: + ab = self.linear_ab_p(z) + ab *= torch.sigmoid(self.linear_ab_g(z)) + # [nframes, nloc, nloc, d] + a, b = torch.chunk(ab, 2, dim=-1) + del z, ab + + # [nframes, d, nloc_i, nloc_k] row not trans + a1 = a.permute(0, 3, 1, 2) + # [nframes, d, nloc_k, nloc_j(i)] trans + b1 = b.transpose(-1, -3) + # [nframes, d, nloc_i, nloc_j] + x = torch.matmul(a1, b1) + del a1, b1 + + # [nframes, d, nloc_k, nloc_j(i)] not trans + b2 = b.permute(0, 3, 1, 2) + # [nframes, d, nloc_i, nloc_k] col trans # check TODO + a2 = a.transpose(-1, -3) + + # [nframes, d, nloc_i, nloc_j] + x = x + torch.matmul(a2, b2) + del a, b, a2, b2 + + # [nframes, nloc_i, nloc_j, d] + x = x.permute(0, 2, 3, 1) + + x = self.layer_norm_out(x) + x = self.linear_z(x) + return g * x + + +class EvoformerEncoderLayer(nn.Module): + def __init__( + self, + feature_dim: int = 768, + ffn_dim: int = 2048, + attn_head: int = 8, + activation_fn: str = "gelu", + post_ln: bool = False, + ): + super().__init__() + self.feature_dim = feature_dim + self.ffn_dim = ffn_dim + self.attn_head = attn_head + self.activation_fn = ( + ActivationFn(activation_fn) if activation_fn is not None else None + ) + self.post_ln = post_ln + self.self_attn_layer_norm = nn.LayerNorm( + self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + + self.self_attn = LocalSelfMultiheadAttention( + self.feature_dim, + self.attn_head, + ) + self.final_layer_norm = nn.LayerNorm( + self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.fc1 = SimpleLinear(self.feature_dim, self.ffn_dim) + self.fc2 = SimpleLinear(self.ffn_dim, self.feature_dim) + + def forward( + self, + x, + attn_bias: Optional[torch.Tensor] = None, + nlist_mask: Optional[torch.Tensor] = None, + nlist: Optional[torch.Tensor] = None, + return_attn=True, + ): + residual = x + if not self.post_ln: + x = self.self_attn_layer_norm(x) + x = self.self_attn( + query=x, + attn_bias=attn_bias, + nlist_mask=nlist_mask, + nlist=nlist, + return_attn=return_attn, + ) + if return_attn: + x, attn_weights, attn_probs = x + x = residual + x + if self.post_ln: + x = self.self_attn_layer_norm(x) + + residual = x + if not self.post_ln: + x = self.final_layer_norm(x) + x = self.fc1(x) + x = self.activation_fn(x) + x = self.fc2(x) + x = residual + x + if self.post_ln: + x = self.final_layer_norm(x) + if not return_attn: + return x + else: + return x, attn_weights, attn_probs + + +# output: atomic_rep, transformed_atomic_rep, pair_rep, delta_pair_rep, norm_x, norm_delta_pair_rep, +class Evoformer2bEncoder(nn.Module): + def __init__( + self, + nnei: int, + layer_num: int = 6, + attn_head: int = 8, + atomic_dim: int = 1024, + pair_dim: int = 100, + feature_dim: int = 1024, + ffn_dim: int = 2048, + post_ln: bool = False, + final_layer_norm: bool = True, + final_head_layer_norm: bool = False, + emb_layer_norm: bool = False, + atomic_residual: bool = False, + evo_residual: bool = False, + residual_factor: float = 1.0, + activation_function: str = "gelu", + ): + super().__init__() + self.nnei = nnei + self.layer_num = layer_num + self.attn_head = attn_head + self.atomic_dim = atomic_dim + self.pair_dim = pair_dim + self.feature_dim = feature_dim + self.ffn_dim = ffn_dim + self.post_ln = post_ln + self._final_layer_norm = final_layer_norm + self._final_head_layer_norm = final_head_layer_norm + self._emb_layer_norm = emb_layer_norm + self.activation_function = activation_function + self.evo_residual = evo_residual + self.residual_factor = residual_factor + if atomic_residual and atomic_dim == feature_dim: + self.atomic_residual = True + else: + self.atomic_residual = False + self.in_proj = SimpleLinear( + self.atomic_dim, + self.feature_dim, + bavg=0.0, + stddev=1.0, + use_timestep=False, + activate="tanh", + ) # TODO + self.out_proj = SimpleLinear( + self.feature_dim, + self.atomic_dim, + bavg=0.0, + stddev=1.0, + use_timestep=False, + activate="tanh", + ) + if self._emb_layer_norm: + self.emb_layer_norm = nn.LayerNorm( + self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + + ## TODO debug : self.in_proj_pair = NonLinearHead(self.pair_dim, self.attn_head, activation_fn=None) + self.in_proj_pair = SimpleLinear(self.pair_dim, self.attn_head, activate=None) + evoformer_encoder_layers = [] + for i in range(self.layer_num): + evoformer_encoder_layers.append( + EvoformerEncoderLayer( + feature_dim=self.feature_dim, + ffn_dim=self.ffn_dim, + attn_head=self.attn_head, + activation_fn=self.activation_function, + post_ln=self.post_ln, + ) + ) + self.evoformer_encoder_layers = nn.ModuleList(evoformer_encoder_layers) + if self._final_layer_norm: + self.final_layer_norm = nn.LayerNorm( + self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + if self._final_head_layer_norm: + self.final_head_layer_norm = nn.LayerNorm( + self.attn_head, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + + def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask): + """Encoder the atomic and pair representations. + + Args: + - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim]. + - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim]. + - nlist: Neighbor list with shape [nframes, nloc, nnei]. + - nlist_type: Neighbor types with shape [nframes, nloc, nnei]. + - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank. + + Returns + ------- + - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim]. + - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim]. + - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head]. + - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head]. + - norm_x: Normalization loss of atomic_rep. + - norm_delta_pair_rep: Normalization loss of delta_pair_rep. + """ + # Global branch + nframes, nloc, _ = atomic_rep.size() + nnei = pair_rep.shape[2] + input_atomic_rep = atomic_rep + # [nframes, nloc, feature_dim] + if self.atomic_residual: + atomic_rep = atomic_rep + self.in_proj(atomic_rep) + else: + atomic_rep = self.in_proj(atomic_rep) + + if self._emb_layer_norm: + atomic_rep = self.emb_layer_norm(atomic_rep) + + # Local branch + # [nframes, nloc, nnei, attn_head] + pair_rep = self.in_proj_pair(pair_rep) + # [nframes, attn_head, nloc, nnei] + pair_rep = pair_rep.permute(0, 3, 1, 2).contiguous() + input_pair_rep = pair_rep + pair_rep = pair_rep.masked_fill(~nlist_mask.unsqueeze(1), float("-inf")) + + for i in range(self.layer_num): + atomic_rep, pair_rep, _ = self.evoformer_encoder_layers[i]( + atomic_rep, + attn_bias=pair_rep, + nlist_mask=nlist_mask, + nlist=nlist, + return_attn=True, + ) + + def norm_loss(x, eps=1e-10, tolerance=1.0): + # x = x.float() + max_norm = x.shape[-1] ** 0.5 + norm = torch.sqrt(torch.sum(x**2, dim=-1) + eps) + error = F.relu((norm - max_norm).abs() - tolerance) + return error + + def masked_mean(mask, value, dim=-1, eps=1e-10): + return ( + torch.sum(mask * value, dim=dim) / (eps + torch.sum(mask, dim=dim)) + ).mean() + + # atomic_rep shape: [nframes, nloc, feature_dim] + # pair_rep shape: [nframes, attn_head, nloc, nnei] + + norm_x = torch.mean(norm_loss(atomic_rep)) + if self._final_layer_norm: + atomic_rep = self.final_layer_norm(atomic_rep) + + delta_pair_rep = pair_rep - input_pair_rep + delta_pair_rep = delta_pair_rep.masked_fill(~nlist_mask.unsqueeze(1), 0) + # [nframes, nloc, nnei, attn_head] + delta_pair_rep = ( + delta_pair_rep.view(nframes, self.attn_head, nloc, nnei) + .permute(0, 2, 3, 1) + .contiguous() + ) + + # [nframes, nloc, nnei] + norm_delta_pair_rep = norm_loss(delta_pair_rep) + norm_delta_pair_rep = masked_mean(mask=nlist_mask, value=norm_delta_pair_rep) + if self._final_head_layer_norm: + delta_pair_rep = self.final_head_layer_norm(delta_pair_rep) + + if self.atomic_residual: + transformed_atomic_rep = atomic_rep + self.out_proj(atomic_rep) + else: + transformed_atomic_rep = self.out_proj(atomic_rep) + + if self.evo_residual: + transformed_atomic_rep = ( + self.residual_factor * transformed_atomic_rep + input_atomic_rep + ) * (1 / np.sqrt(2)) + + return ( + atomic_rep, + transformed_atomic_rep, + pair_rep, + delta_pair_rep, + norm_x, + norm_delta_pair_rep, + ) + + +class Evoformer3bEncoderLayer(nn.Module): + def __init__( + self, + nnei, + embedding_dim: int = 768, + pair_dim: int = 64, + pair_hidden_dim: int = 32, + ffn_embedding_dim: int = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + droppath_prob: float = 0.0, + pair_dropout: float = 0.25, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + pre_ln: bool = True, + tri_update: bool = True, + ): + super().__init__() + # Initialize parameters + self.nnei = nnei + self.embedding_dim = embedding_dim + self.num_attention_heads = num_attention_heads + self.attention_dropout = attention_dropout + + # self.dropout = dropout + self.activation_dropout = activation_dropout + + if droppath_prob > 0.0: + self.dropout_module = DropPath(droppath_prob) + else: + self.dropout_module = Dropout(dropout) + + # self.self_attn = AtomAttentionLocal(embedding_dim, embedding_dim, embedding_dim, pair_dim, + # embedding_dim // num_attention_heads, num_attention_heads, + # gating=False, dropout=attention_dropout) + self.self_attn = AtomAttention( + embedding_dim, + embedding_dim, + embedding_dim, + pair_dim, + embedding_dim // num_attention_heads, + num_attention_heads, + gating=False, + dropout=attention_dropout, + ) + # layer norm associated with the self attention layer + self.pre_ln = pre_ln + self.self_attn_layer_norm = nn.LayerNorm( + self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.fc1 = nn.Linear( + self.embedding_dim, ffn_embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.fc2 = nn.Linear( + ffn_embedding_dim, self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.final_layer_norm = nn.LayerNorm( + self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + + self.x_layer_norm_opm = nn.LayerNorm( + self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + # self.opm = OuterProductLocal(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim) + self.opm = OuterProduct(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim) + # self.pair_layer_norm_opm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + self.pair_layer_norm_ffn = nn.LayerNorm( + pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.pair_ffn = Transition( + pair_dim, + 1, + dropout=activation_dropout, + ) + self.pair_dropout = pair_dropout + self.tri_update = tri_update + if self.tri_update: + self.pair_layer_norm_trimul = nn.LayerNorm( + pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION + ) + self.pair_tri_mul = TriangleMultiplication(pair_dim, pair_hidden_dim) + + def update_pair( + self, + x, + pair, + nlist, + op_mask, + op_norm, + ): + # local: + # [nframes, nloc, nnei, pair_dim] + # global: + # [nframes, nloc, nloc, pair_dim] + pair = pair + self.dropout_module( + self.opm(self.x_layer_norm_opm(x), nlist, op_mask, op_norm) + ) + if not self.pre_ln: + pair = self.pair_layer_norm_opm(pair) + return x, pair + + def shared_dropout(self, x, shared_dim, dropout): + shape = list(x.shape) + shape[shared_dim] = 1 + with torch.no_grad(): + mask = x.new_ones(shape) + return F.dropout(mask, p=dropout, training=self.training) * x + + def forward( + self, + x: torch.Tensor, + pair: torch.Tensor, + nlist: torch.Tensor = None, + attn_mask: Optional[torch.Tensor] = None, + pair_mask: Optional[torch.Tensor] = None, + op_mask: float = 1.0, + op_norm: float = 1.0, + ): + """Encoder the atomic and pair representations. + + Args: + - x: Atomic representation with shape [ncluster, natoms, embed_dim]. + - pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim]. + - attn_mask: Attention mask with shape [ncluster, head, natoms, natoms]. + - pair_mask: Neighbor mask with shape [ncluster, natoms, natoms]. + + """ + # [ncluster, natoms, embed_dim] + residual = x + if self.pre_ln: + x = self.self_attn_layer_norm(x) + x = self.self_attn( + x, + x, + x, + nlist=nlist, + pair=pair, + mask=attn_mask, + ) + # x = F.dropout(x, p=self.dropout, training=self.training) + x = self.dropout_module(x) + x = residual + x + if not self.pre_ln: + x = self.self_attn_layer_norm(x) + + residual = x + if self.pre_ln: + x = self.final_layer_norm(x) + x = F.linear(x, self.fc1.weight) + # x = fused_ops.bias_torch_gelu(x, self.fc1.bias) + x = nn.GELU()(x) + self.fc1.bias + x = F.dropout(x, p=self.activation_dropout, training=self.training) + x = self.fc2(x) + # x = F.dropout(x, p=self.dropout, training=self.training) + x = self.dropout_module(x) + + x = residual + x + if not self.pre_ln: + x = self.final_layer_norm(x) + + block = [ + partial( + self.update_pair, + nlist=nlist, + op_mask=op_mask, + op_norm=op_norm, + ) + ] + + x, pair = checkpoint_sequential( + block, + input_x=(x, pair), + ) + + if self.tri_update: + residual_pair = pair + if self.pre_ln: + pair = self.pair_layer_norm_trimul(pair) + + pair = self.shared_dropout( + self.pair_tri_mul(pair, pair_mask), -3, self.pair_dropout + ) + pair = residual_pair + pair + if not self.pre_ln: + pair = self.pair_layer_norm_trimul(pair) + + residual_pair = pair + if self.pre_ln: + pair = self.pair_layer_norm_ffn(pair) + pair = self.dropout_module(self.pair_ffn(pair)) + pair = residual_pair + pair + if not self.pre_ln: + pair = self.pair_layer_norm_ffn(pair) + return x, pair + + +class Evoformer3bEncoder(nn.Module): + def __init__( + self, + nnei, + layer_num=6, + attn_head=8, + atomic_dim=768, + pair_dim=64, + pair_hidden_dim=32, + ffn_embedding_dim=3072, + dropout: float = 0.1, + droppath_prob: float = 0.0, + pair_dropout: float = 0.25, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + pre_ln: bool = True, + tri_update: bool = True, + **kwargs, + ): + super().__init__() + self.nnei = nnei + if droppath_prob > 0: + droppath_probs = [ + x.item() for x in torch.linspace(0, droppath_prob, layer_num) + ] + else: + droppath_probs = None + + self.layers = nn.ModuleList( + [ + Evoformer3bEncoderLayer( + nnei, + atomic_dim, + pair_dim, + pair_hidden_dim, + ffn_embedding_dim, + num_attention_heads=attn_head, + dropout=dropout, + droppath_prob=droppath_probs[_], + pair_dropout=pair_dropout, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + pre_ln=pre_ln, + tri_update=tri_update, + ) + for _ in range(layer_num) + ] + ) + + def forward(self, x, pair, attn_mask=None, pair_mask=None, atom_mask=None): + """Encoder the atomic and pair representations. + + Args: + x: Atomic representation with shape [ncluster, natoms, atomic_dim]. + pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim]. + attn_mask: Attention mask (with -inf for softmax) with shape [ncluster, head, natoms, natoms]. + pair_mask: Pair mask (with 1 for real atom pair and 0 for padding) with shape [ncluster, natoms, natoms]. + atom_mask: Atom mask (with 1 for real atom and 0 for padding) with shape [ncluster, natoms]. + + Returns + ------- + x: Atomic representation with shape [ncluster, natoms, atomic_dim]. + pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim]. + + """ + # [ncluster, natoms, 1] + op_mask = atom_mask.unsqueeze(-1) + op_mask = op_mask * (op_mask.size(-2) ** -0.5) + eps = 1e-3 + # [ncluster, natoms, natoms, 1] + op_norm = 1.0 / (eps + torch.einsum("...bc,...dc->...bdc", op_mask, op_mask)) + for layer in self.layers: + x, pair = layer( + x, + pair, + nlist=None, + attn_mask=attn_mask, + pair_mask=pair_mask, + op_mask=op_mask, + op_norm=op_norm, + ) + return x, pair diff --git a/deepmd/pt/model/task/__init__.py b/deepmd/pt/model/task/__init__.py new file mode 100644 index 0000000000..9430ede766 --- /dev/null +++ b/deepmd/pt/model/task/__init__.py @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .atten_lcc import ( + FittingNetAttenLcc, +) +from .base_fitting import ( + BaseFitting, +) +from .denoise import ( + DenoiseNet, +) +from .dipole import ( + DipoleFittingNet, +) +from .ener import ( + EnergyFittingNet, + EnergyFittingNetDirect, +) +from .fitting import ( + Fitting, +) +from .polarizability import ( + PolarFittingNet, +) +from .type_predict import ( + TypePredictNet, +) + +__all__ = [ + "FittingNetAttenLcc", + "DenoiseNet", + "DipoleFittingNet", + "EnergyFittingNet", + "EnergyFittingNetDirect", + "Fitting", + "BaseFitting", + "TypePredictNet", + "PolarFittingNet", +] diff --git a/deepmd/pt/model/task/atten_lcc.py b/deepmd/pt/model/task/atten_lcc.py new file mode 100644 index 0000000000..e5961335ec --- /dev/null +++ b/deepmd/pt/model/task/atten_lcc.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch +import torch.nn as nn + +from deepmd.pt.model.network.network import ( + EnergyHead, + NodeTaskHead, +) +from deepmd.pt.model.task.fitting import ( + Fitting, +) +from deepmd.pt.utils import ( + env, +) + + +class FittingNetAttenLcc(Fitting): + def __init__( + self, embedding_width, bias_atom_e, pair_embed_dim, attention_heads, **kwargs + ): + super().__init__() + self.embedding_width = embedding_width + self.engergy_proj = EnergyHead(self.embedding_width, 1) + self.energe_agg_factor = nn.Embedding(4, 1, dtype=env.GLOBAL_PT_FLOAT_PRECISION) + nn.init.normal_(self.energe_agg_factor.weight, 0, 0.01) + bias_atom_e = torch.tensor(bias_atom_e) + self.register_buffer("bias_atom_e", bias_atom_e) + self.pair_embed_dim = pair_embed_dim + self.attention_heads = attention_heads + self.node_proc = NodeTaskHead( + self.embedding_width, self.pair_embed_dim, self.attention_heads + ) + self.node_proc.zero_init() + + def forward(self, output, pair, delta_pos, atype, nframes, nloc): + # [nframes x nloc x tebd_dim] + output_nloc = (output[:, 0, :]).reshape(nframes, nloc, self.embedding_width) + # Optional: GRRG or mean of gbf TODO + + # energy outut + # [nframes, nloc] + energy_out = self.engergy_proj(output_nloc).view(nframes, nloc) + # [nframes, nloc] + energy_factor = self.energe_agg_factor(torch.zeros_like(atype)).view( + nframes, nloc + ) + energy_out = (energy_out * energy_factor) + self.bias_atom_e[atype] + energy_out = energy_out.sum(dim=-1) + + # vector output + # predict_force: [(nframes x nloc) x (1 + nnei2) x 3] + predict_force = self.node_proc(output, pair, delta_pos=delta_pos) + # predict_force_nloc: [nframes x nloc x 3] + predict_force_nloc = (predict_force[:, 0, :]).reshape(nframes, nloc, 3) + return energy_out, predict_force_nloc diff --git a/deepmd/pt/model/task/base_fitting.py b/deepmd/pt/model/task/base_fitting.py new file mode 100644 index 0000000000..884a1bfe57 --- /dev/null +++ b/deepmd/pt/model/task/base_fitting.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch + +from deepmd.dpmodel.fitting import ( + make_base_fitting, +) + +BaseFitting = make_base_fitting(torch.Tensor, fwd_method_name="forward") diff --git a/deepmd/pt/model/task/denoise.py b/deepmd/pt/model/task/denoise.py new file mode 100644 index 0000000000..5f1e780de3 --- /dev/null +++ b/deepmd/pt/model/task/denoise.py @@ -0,0 +1,137 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.pt.model.network.network import ( + MaskLMHead, + NonLinearHead, +) +from deepmd.pt.model.task.fitting import ( + Fitting, +) +from deepmd.pt.utils import ( + env, +) + + +@fitting_check_output +class DenoiseNet(Fitting): + def __init__( + self, + feature_dim, + ntypes, + attn_head=8, + prefactor=[0.5, 0.5], + activation_function="gelu", + **kwargs, + ): + """Construct a denoise net. + + Args: + - ntypes: Element count. + - embedding_width: Embedding width per atom. + - neuron: Number of neurons in each hidden layers of the fitting net. + - bias_atom_e: Average enery per atom for each element. + - resnet_dt: Using time-step in the ResNet construction. + """ + super().__init__() + self.feature_dim = feature_dim + self.ntypes = ntypes + self.attn_head = attn_head + self.prefactor = torch.tensor( + prefactor, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + + self.lm_head = MaskLMHead( + embed_dim=self.feature_dim, + output_dim=ntypes, + activation_fn=activation_function, + weight=None, + ) + + if not isinstance(self.attn_head, list): + self.pair2coord_proj = NonLinearHead( + self.attn_head, 1, activation_fn=activation_function + ) + else: + self.pair2coord_proj = [] + self.ndescriptor = len(self.attn_head) + for ii in range(self.ndescriptor): + _pair2coord_proj = NonLinearHead( + self.attn_head[ii], 1, activation_fn=activation_function + ) + self.pair2coord_proj.append(_pair2coord_proj) + self.pair2coord_proj = torch.nn.ModuleList(self.pair2coord_proj) + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + "updated_coord", + [3], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ), + OutputVariableDef( + "logits", + [-1], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ), + ] + ) + + def forward( + self, + pair_weights, + diff, + nlist_mask, + features, + sw, + masked_tokens: Optional[torch.Tensor] = None, + ): + """Calculate the updated coord. + Args: + - coord: Input noisy coord with shape [nframes, nloc, 3]. + - pair_weights: Input pair weights with shape [nframes, nloc, nnei, head]. + - diff: Input pair relative coord list with shape [nframes, nloc, nnei, 3]. + - nlist_mask: Input nlist mask with shape [nframes, nloc, nnei]. + + Returns + ------- + - denoised_coord: Denoised updated coord with shape [nframes, nloc, 3]. + """ + # [nframes, nloc, nnei, 1] + logits = self.lm_head(features, masked_tokens=masked_tokens) + if not isinstance(self.attn_head, list): + attn_probs = self.pair2coord_proj(pair_weights) + out_coord = (attn_probs * diff).sum(dim=-2) / ( + sw.sum(dim=-1).unsqueeze(-1) + 1e-6 + ) + else: + assert len(self.prefactor) == self.ndescriptor + all_coord_update = [] + assert len(pair_weights) == len(diff) == len(nlist_mask) == self.ndescriptor + for ii in range(self.ndescriptor): + _attn_probs = self.pair2coord_proj[ii](pair_weights[ii]) + _coord_update = (_attn_probs * diff[ii]).sum(dim=-2) / ( + nlist_mask[ii].sum(dim=-1).unsqueeze(-1) + 1e-6 + ) + all_coord_update.append(_coord_update) + out_coord = self.prefactor[0] * all_coord_update[0] + for ii in range(self.ndescriptor - 1): + out_coord += self.prefactor[ii + 1] * all_coord_update[ii + 1] + return { + "updated_coord": out_coord, + "logits": logits, + } diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py new file mode 100644 index 0000000000..ca445c8588 --- /dev/null +++ b/deepmd/pt/model/task/dipole.py @@ -0,0 +1,197 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from typing import ( + Callable, + List, + Optional, + Union, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.model.task.fitting import ( + GeneralFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +log = logging.getLogger(__name__) + + +@GeneralFitting.register("dipole") +class DipoleFittingNet(GeneralFitting): + """Construct a dipole fitting net. + + Parameters + ---------- + var_name : str + The atomic property to fit, 'dipole'. + ntypes : int + Element count. + dim_descrpt : int + Embedding width per atom. + embedding_width : int + The dimension of rotation matrix, m1. + neuron : List[int] + Number of neurons in each hidden layers of the fitting net. + resnet_dt : bool + Using time-step in the ResNet construction. + numb_fparam : int + Number of frame parameters. + numb_aparam : int + Number of atomic parameters. + activation_function : str + Activation function. + precision : str + Numerical precision. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + rcond : float, optional + The condition number for the regression of atomic energy. + seed : int, optional + Random seed. + r_differentiable + If the variable is differentiated with respect to coordinates of atoms. + Only reduciable variable are differentiable. + c_differentiable + If the variable is differentiated with respect to the cell tensor (pbc case). + Only reduciable variable are differentiable. + """ + + def __init__( + self, + ntypes: int, + dim_descrpt: int, + embedding_width: int, + neuron: List[int] = [128, 128, 128], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + rcond: Optional[float] = None, + seed: Optional[int] = None, + exclude_types: List[int] = [], + r_differentiable: bool = True, + c_differentiable: bool = True, + **kwargs, + ): + self.embedding_width = embedding_width + self.r_differentiable = r_differentiable + self.c_differentiable = c_differentiable + super().__init__( + var_name=kwargs.pop("var_name", "dipole"), + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + rcond=rcond, + seed=seed, + exclude_types=exclude_types, + **kwargs, + ) + self.old_impl = False # this only supports the new implementation. + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return self.embedding_width + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "dipole" + data["embedding_width"] = self.embedding_width + data["old_impl"] = self.old_impl + data["r_differentiable"] = self.r_differentiable + data["c_differentiable"] = self.c_differentiable + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + return super().deserialize(data) + + def output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [3], + reduciable=True, + r_differentiable=self.r_differentiable, + c_differentiable=self.c_differentiable, + ), + ] + ) + + def compute_output_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute the output statistics (e.g. energy bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + stat_file_path : Optional[DPPath] + The path to the stat file. + + """ + pass + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + nframes, nloc, _ = descriptor.shape + assert gr is not None, "Must provide the rotation matrix for dipole fitting." + # (nframes, nloc, m1) + out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[ + self.var_name + ] + # (nframes * nloc, 1, m1) + out = out.view(-1, 1, self.embedding_width) + # (nframes * nloc, m1, 3) + gr = gr.view(nframes * nloc, -1, 3) + # (nframes, nloc, 3) + out = torch.bmm(out, gr).squeeze(-2).view(nframes, nloc, 3) + return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)} + + # make jit happy with torch 2.0.0 + exclude_types: List[int] diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py new file mode 100644 index 0000000000..196872d17c --- /dev/null +++ b/deepmd/pt/model/task/dos.py @@ -0,0 +1,194 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from typing import ( + Callable, + List, + Optional, + Union, +) + +import numpy as np +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.model.task.ener import ( + InvarFitting, +) +from deepmd.pt.model.task.fitting import ( + Fitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, +) +from deepmd.utils.out_stat import ( + compute_stats_from_atomic, + compute_stats_from_redu, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION +device = env.DEVICE + +log = logging.getLogger(__name__) + + +@Fitting.register("dos") +class DOSFittingNet(InvarFitting): + def __init__( + self, + ntypes: int, + dim_descrpt: int, + numb_dos: int = 300, + neuron: List[int] = [128, 128, 128], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + bias_dos: Optional[torch.Tensor] = None, + trainable: Union[bool, List[bool]] = True, + seed: Optional[int] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + exclude_types: List[int] = [], + mixed_types: bool = True, + ): + if bias_dos is not None: + self.bias_dos = bias_dos + else: + self.bias_dos = torch.zeros( + (ntypes, numb_dos), dtype=dtype, device=env.DEVICE + ) + super().__init__( + var_name="dos", + ntypes=ntypes, + dim_descrpt=dim_descrpt, + dim_out=numb_dos, + neuron=neuron, + bias_atom_e=bias_dos, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + rcond=rcond, + seed=seed, + exclude_types=exclude_types, + trainable=trainable, + ) + + def output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [self.dim_out], + reduciable=True, + r_differentiable=False, + c_differentiable=False, + ), + ] + ) + + def compute_output_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + stat_file_path: Optional[DPPath] = None, + ) -> None: + """ + Compute the output statistics (e.g. dos bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + stat_file_path : Optional[DPPath] + The path to the stat file. + + """ + if stat_file_path is not None: + stat_file_path = stat_file_path / "bias_dos" + if stat_file_path is not None and stat_file_path.is_file(): + bias_dos = stat_file_path.load_numpy() + else: + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + for sys in range(len(sampled)): + nframs = sampled[sys]["atype"].shape[0] + + if "atom_dos" in sampled[sys]: + bias_dos = compute_stats_from_atomic( + sampled[sys]["atom_dos"].numpy(force=True), + sampled[sys]["atype"].numpy(force=True), + )[0] + else: + sys_type_count = np.zeros( + (nframs, self.ntypes), dtype=env.GLOBAL_NP_FLOAT_PRECISION + ) + for itype in range(self.ntypes): + type_mask = sampled[sys]["atype"] == itype + sys_type_count[:, itype] = type_mask.sum(dim=1).numpy( + force=True + ) + sys_bias_redu = sampled[sys]["dos"].numpy(force=True) + + bias_dos = compute_stats_from_redu( + sys_bias_redu, sys_type_count, rcond=self.rcond + )[0] + if stat_file_path is not None: + stat_file_path.save_numpy(bias_dos) + self.bias_dos = torch.tensor(bias_dos, device=env.DEVICE) + + @classmethod + def deserialize(cls, data: dict) -> "DOSFittingNet": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("var_name", None) + data.pop("tot_ener_zero", None) + data.pop("layer_name", None) + data.pop("use_aparam_as_mask", None) + data.pop("spin", None) + data.pop("atom_ener", None) + data["numb_dos"] = data.pop("dim_out") + obj = super().deserialize(data) + + return obj + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + # dd = super(InvarFitting, self).serialize() + dd = { + **InvarFitting.serialize(self), + "type": "dos", + "dim_out": self.dim_out, + } + dd["@variables"]["bias_atom_e"] = to_numpy_array(self.bias_atom_e) + + return dd + + # make jit happy with torch 2.0.0 + exclude_types: List[int] diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py new file mode 100644 index 0000000000..12c0917dd2 --- /dev/null +++ b/deepmd/pt/model/task/ener.py @@ -0,0 +1,247 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from typing import ( + List, + Optional, + Tuple, +) + +import numpy as np +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.pt.model.network.network import ( + ResidualDeep, +) +from deepmd.pt.model.task.fitting import ( + Fitting, + GeneralFitting, +) +from deepmd.pt.model.task.invar_fitting import ( + InvarFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION +device = env.DEVICE + +log = logging.getLogger(__name__) + + +@Fitting.register("ener") +class EnergyFittingNet(InvarFitting): + def __init__( + self, + ntypes: int, + dim_descrpt: int, + neuron: List[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + **kwargs, + ): + super().__init__( + "energy", + ntypes, + dim_descrpt, + 1, + neuron=neuron, + bias_atom_e=bias_atom_e, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + **kwargs, + ) + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("var_name") + data.pop("dim_out") + return super().deserialize(data) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + **super().serialize(), + "type": "ener", + } + + # make jit happy with torch 2.0.0 + exclude_types: List[int] + + +@Fitting.register("direct_force") +@Fitting.register("direct_force_ener") +@fitting_check_output +class EnergyFittingNetDirect(Fitting): + def __init__( + self, + ntypes, + dim_descrpt, + neuron, + bias_atom_e=None, + out_dim=1, + resnet_dt=True, + use_tebd=True, + return_energy=False, + **kwargs, + ): + """Construct a fitting net for energy. + + Args: + - ntypes: Element count. + - embedding_width: Embedding width per atom. + - neuron: Number of neurons in each hidden layers of the fitting net. + - bias_atom_e: Average enery per atom for each element. + - resnet_dt: Using time-step in the ResNet construction. + """ + super().__init__() + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt + self.use_tebd = use_tebd + self.out_dim = out_dim + if bias_atom_e is None: + bias_atom_e = np.zeros([self.ntypes]) + if not use_tebd: + assert self.ntypes == len(bias_atom_e), "Element count mismatches!" + bias_atom_e = torch.tensor(bias_atom_e, device=env.DEVICE) + self.register_buffer("bias_atom_e", bias_atom_e) + + filter_layers_dipole = [] + for type_i in range(self.ntypes): + one = ResidualDeep( + type_i, + dim_descrpt, + neuron, + 0.0, + out_dim=out_dim, + resnet_dt=resnet_dt, + ) + filter_layers_dipole.append(one) + self.filter_layers_dipole = torch.nn.ModuleList(filter_layers_dipole) + + self.return_energy = return_energy + filter_layers = [] + if self.return_energy: + for type_i in range(self.ntypes): + bias_type = 0.0 if self.use_tebd else bias_atom_e[type_i] + one = ResidualDeep( + type_i, dim_descrpt, neuron, bias_type, resnet_dt=resnet_dt + ) + filter_layers.append(one) + self.filter_layers = torch.nn.ModuleList(filter_layers) + + if "seed" in kwargs: + torch.manual_seed(kwargs["seed"]) + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + "energy", + [1], + reduciable=True, + r_differentiable=False, + c_differentiable=False, + ), + OutputVariableDef( + "dforce", + [3], + reduciable=False, + r_differentiable=False, + c_differentiable=False, + ), + ] + ) + + def serialize(self) -> dict: + raise NotImplementedError + + def deserialize(cls) -> "EnergyFittingNetDirect": + raise NotImplementedError + + def forward( + self, + inputs: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ) -> Tuple[torch.Tensor, None]: + """Based on embedding net output, alculate total energy. + + Args: + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. + - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. + + Returns + ------- + - `torch.Tensor`: Total energy with shape [nframes, natoms[0]]. + """ + nframes, nloc, _ = inputs.size() + if self.use_tebd: + # if atype_tebd is not None: + # inputs = torch.concat([inputs, atype_tebd], dim=-1) + vec_out = self.filter_layers_dipole[0]( + inputs + ) # Shape is [nframes, nloc, m1] + assert list(vec_out.size()) == [nframes, nloc, self.out_dim] + # (nf x nloc) x 1 x od + vec_out = vec_out.view(-1, 1, self.out_dim) + assert gr is not None + # (nf x nloc) x od x 3 + gr = gr.view(-1, self.out_dim, 3) + vec_out = ( + torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3) + ) # Shape is [nframes, nloc, 3] + else: + vec_out = torch.zeros_like(atype).unsqueeze(-1) # jit assertion + for type_i, filter_layer in enumerate(self.filter_layers_dipole): + mask = atype == type_i + vec_out_type = filter_layer(inputs) # Shape is [nframes, nloc, m1] + vec_out_type = vec_out_type * mask.unsqueeze(-1) + vec_out = vec_out + vec_out_type # Shape is [nframes, natoms[0], 1] + + outs = torch.zeros_like(atype).unsqueeze(-1) # jit assertion + if self.return_energy: + if self.use_tebd: + atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[ + atype + ].unsqueeze(-1) + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + else: + for type_i, filter_layer in enumerate(self.filter_layers): + mask = atype == type_i + atom_energy = filter_layer(inputs) + if not env.ENERGY_BIAS_TRAINABLE: + atom_energy = atom_energy + self.bias_atom_e[type_i] + atom_energy = atom_energy * mask.unsqueeze(-1) + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + return { + "energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION), + "dforce": vec_out, + } diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py new file mode 100644 index 0000000000..00579b957f --- /dev/null +++ b/deepmd/pt/model/task/fitting.py @@ -0,0 +1,498 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from abc import ( + abstractmethod, +) +from typing import ( + List, + Optional, + Union, +) + +import numpy as np +import torch + +from deepmd.pt.model.network.mlp import ( + FittingNet, + NetworkCollection, +) +from deepmd.pt.model.network.network import ( + ResidualDeep, +) +from deepmd.pt.model.task.base_fitting import ( + BaseFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, + PRECISION_DICT, +) +from deepmd.pt.utils.exclude_mask import ( + AtomExcludeMask, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, + to_torch_tensor, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION +device = env.DEVICE + +log = logging.getLogger(__name__) + + +class Fitting(torch.nn.Module, BaseFitting): + # plugin moved to BaseFitting + + def __new__(cls, *args, **kwargs): + if cls is Fitting: + return BaseFitting.__new__(BaseFitting, *args, **kwargs) + return super().__new__(cls) + + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + assert ( + self.__class__ == base_class.__class__ + ), "Only fitting nets of the same type can share params!" + if shared_level == 0: + # link buffers + if hasattr(self, "bias_atom_e"): + self.bias_atom_e = base_class.bias_atom_e + # the following will successfully link all the params except buffers, which need manually link. + for item in self._modules: + self._modules[item] = base_class._modules[item] + elif shared_level == 1: + # only not share the bias_atom_e + # the following will successfully link all the params except buffers, which need manually link. + for item in self._modules: + self._modules[item] = base_class._modules[item] + else: + raise NotImplementedError + + +class GeneralFitting(Fitting): + """Construct a general fitting net. + + Parameters + ---------- + var_name : str + The atomic property to fit, 'energy', 'dipole', and 'polar'. + ntypes : int + Element count. + dim_descrpt : int + Embedding width per atom. + dim_out : int + The output dimension of the fitting net. + neuron : List[int] + Number of neurons in each hidden layers of the fitting net. + bias_atom_e : torch.Tensor, optional + Average enery per atom for each element. + resnet_dt : bool + Using time-step in the ResNet construction. + numb_fparam : int + Number of frame parameters. + numb_aparam : int + Number of atomic parameters. + activation_function : str + Activation function. + precision : str + Numerical precision. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + rcond : float, optional + The condition number for the regression of atomic energy. + seed : int, optional + Random seed. + exclude_types: List[int] + Atomic contributions of the excluded atom types are set zero. + trainable : Union[List[bool], bool] + If the parameters in the fitting net are trainable. + Now this only supports setting all the parameters in the fitting net at one state. + When in List[bool], the trainable will be True only if all the boolean parameters are True. + remove_vaccum_contribution: List[bool], optional + Remove vaccum contribution before the bias is added. The list assigned each + type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same + length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list. + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + neuron: List[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + rcond: Optional[float] = None, + seed: Optional[int] = None, + exclude_types: List[int] = [], + trainable: Union[bool, List[bool]] = True, + remove_vaccum_contribution: Optional[List[bool]] = None, + **kwargs, + ): + super().__init__() + self.var_name = var_name + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt + self.neuron = neuron + self.mixed_types = mixed_types + self.resnet_dt = resnet_dt + self.numb_fparam = numb_fparam + self.numb_aparam = numb_aparam + self.activation_function = activation_function + self.precision = precision + self.prec = PRECISION_DICT[self.precision] + self.rcond = rcond + # order matters, should be place after the assignment of ntypes + self.reinit_exclude(exclude_types) + self.trainable = trainable + # need support for each layer settings + self.trainable = ( + all(self.trainable) if isinstance(self.trainable, list) else self.trainable + ) + self.remove_vaccum_contribution = remove_vaccum_contribution + + net_dim_out = self._net_out_dim() + # init constants + if bias_atom_e is None: + bias_atom_e = np.zeros([self.ntypes, net_dim_out], dtype=np.float64) + bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device) + bias_atom_e = bias_atom_e.view([self.ntypes, net_dim_out]) + if not self.mixed_types: + assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!" + self.register_buffer("bias_atom_e", bias_atom_e) + + if self.numb_fparam > 0: + self.register_buffer( + "fparam_avg", + torch.zeros(self.numb_fparam, dtype=self.prec, device=device), + ) + self.register_buffer( + "fparam_inv_std", + torch.ones(self.numb_fparam, dtype=self.prec, device=device), + ) + else: + self.fparam_avg, self.fparam_inv_std = None, None + if self.numb_aparam > 0: + self.register_buffer( + "aparam_avg", + torch.zeros(self.numb_aparam, dtype=self.prec, device=device), + ) + self.register_buffer( + "aparam_inv_std", + torch.ones(self.numb_aparam, dtype=self.prec, device=device), + ) + else: + self.aparam_avg, self.aparam_inv_std = None, None + + in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam + + self.old_impl = kwargs.get("old_impl", False) + if self.old_impl: + filter_layers = [] + for type_i in range(self.ntypes if not self.mixed_types else 1): + bias_type = 0.0 + one = ResidualDeep( + type_i, + self.dim_descrpt, + self.neuron, + bias_type, + resnet_dt=self.resnet_dt, + ) + filter_layers.append(one) + self.filter_layers_old = torch.nn.ModuleList(filter_layers) + self.filter_layers = None + else: + self.filter_layers = NetworkCollection( + 1 if not self.mixed_types else 0, + self.ntypes, + network_type="fitting_network", + networks=[ + FittingNet( + in_dim, + net_dim_out, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + bias_out=True, + ) + for ii in range(self.ntypes if not self.mixed_types else 1) + ], + ) + self.filter_layers_old = None + + if seed is not None: + torch.manual_seed(seed) + # set trainable + for param in self.parameters(): + param.requires_grad = self.trainable + + def reinit_exclude( + self, + exclude_types: List[int] = [], + ): + self.exclude_types = exclude_types + self.emask = AtomExcludeMask(self.ntypes, self.exclude_types) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + "@class": "Fitting", + "@version": 1, + "var_name": self.var_name, + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "activation_function": self.activation_function, + "precision": self.precision, + "mixed_types": self.mixed_types, + "nets": self.filter_layers.serialize(), + "rcond": self.rcond, + "exclude_types": self.exclude_types, + "@variables": { + "bias_atom_e": to_numpy_array(self.bias_atom_e), + "fparam_avg": to_numpy_array(self.fparam_avg), + "fparam_inv_std": to_numpy_array(self.fparam_inv_std), + "aparam_avg": to_numpy_array(self.aparam_avg), + "aparam_inv_std": to_numpy_array(self.aparam_inv_std), + }, + # "tot_ener_zero": self.tot_ener_zero , + # "trainable": self.trainable , + # "atom_ener": self.atom_ener , + # "layer_name": self.layer_name , + # "use_aparam_as_mask": self.use_aparam_as_mask , + # "spin": self.spin , + ## NOTICE: not supported by far + "tot_ener_zero": False, + "trainable": [self.trainable] * (len(self.neuron) + 1), + "layer_name": None, + "use_aparam_as_mask": False, + "spin": None, + } + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + variables = data.pop("@variables") + nets = data.pop("nets") + obj = cls(**data) + for kk in variables.keys(): + obj[kk] = to_torch_tensor(variables[kk]) + obj.filter_layers = NetworkCollection.deserialize(nets) + return obj + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.numb_fparam + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.numb_aparam + + # make jit happy + exclude_types: List[int] + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + # make jit happy + sel_type: List[int] = [] + for ii in range(self.ntypes): + if ii not in self.exclude_types: + sel_type.append(ii) + return sel_type + + def __setitem__(self, key, value): + if key in ["bias_atom_e"]: + value = value.view([self.ntypes, self._net_out_dim()]) + self.bias_atom_e = value + elif key in ["fparam_avg"]: + self.fparam_avg = value + elif key in ["fparam_inv_std"]: + self.fparam_inv_std = value + elif key in ["aparam_avg"]: + self.aparam_avg = value + elif key in ["aparam_inv_std"]: + self.aparam_inv_std = value + elif key in ["scale"]: + self.scale = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ["bias_atom_e"]: + return self.bias_atom_e + elif key in ["fparam_avg"]: + return self.fparam_avg + elif key in ["fparam_inv_std"]: + return self.fparam_inv_std + elif key in ["aparam_avg"]: + return self.aparam_avg + elif key in ["aparam_inv_std"]: + return self.aparam_inv_std + elif key in ["scale"]: + return self.scale + else: + raise KeyError(key) + + @abstractmethod + def _net_out_dim(self): + """Set the FittingNet output dim.""" + pass + + def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor: + return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1]) + + def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor: + return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1]) + + def _forward_common( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + xx = descriptor + if self.remove_vaccum_contribution is not None: + # TODO: compute the input for vaccm when remove_vaccum_contribution is set + # Idealy, the input for vaccum should be computed; + # we consider it as always zero for convenience. + # Needs a compute_input_stats for vaccum passed from the + # descriptor. + xx_zeros = torch.zeros_like(xx) + else: + xx_zeros = None + nf, nloc, nd = xx.shape + net_dim_out = self._net_out_dim() + + if nd != self.dim_descrpt: + raise ValueError( + "get an input descriptor of dim {nd}," + "which is not consistent with {self.dim_descrpt}." + ) + # check fparam dim, concate to input descriptor + if self.numb_fparam > 0: + assert fparam is not None, "fparam should not be None" + assert self.fparam_avg is not None + assert self.fparam_inv_std is not None + if fparam.shape[-1] != self.numb_fparam: + raise ValueError( + "get an input fparam of dim {fparam.shape[-1]}, ", + "which is not consistent with {self.numb_fparam}.", + ) + fparam = fparam.view([nf, self.numb_fparam]) + nb, _ = fparam.shape + t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb) + t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb) + fparam = (fparam - t_fparam_avg) * t_fparam_inv_std + fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1]) + xx = torch.cat( + [xx, fparam], + dim=-1, + ) + if xx_zeros is not None: + xx_zeros = torch.cat( + [xx_zeros, fparam], + dim=-1, + ) + # check aparam dim, concate to input descriptor + if self.numb_aparam > 0: + assert aparam is not None, "aparam should not be None" + assert self.aparam_avg is not None + assert self.aparam_inv_std is not None + if aparam.shape[-1] != self.numb_aparam: + raise ValueError( + f"get an input aparam of dim {aparam.shape[-1]}, ", + f"which is not consistent with {self.numb_aparam}.", + ) + aparam = aparam.view([nf, -1, self.numb_aparam]) + nb, nloc, _ = aparam.shape + t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc) + t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc) + aparam = (aparam - t_aparam_avg) * t_aparam_inv_std + xx = torch.cat( + [xx, aparam], + dim=-1, + ) + if xx_zeros is not None: + xx_zeros = torch.cat( + [xx_zeros, aparam], + dim=-1, + ) + + outs = torch.zeros( + (nf, nloc, net_dim_out), + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + device=descriptor.device, + ) # jit assertion + if self.old_impl: + assert self.filter_layers_old is not None + assert xx_zeros is None + if self.mixed_types: + atom_property = self.filter_layers_old[0](xx) + self.bias_atom_e[atype] + outs = outs + atom_property # Shape is [nframes, natoms[0], 1] + else: + for type_i, filter_layer in enumerate(self.filter_layers_old): + mask = atype == type_i + atom_property = filter_layer(xx) + atom_property = atom_property + self.bias_atom_e[type_i] + atom_property = atom_property * mask.unsqueeze(-1) + outs = outs + atom_property # Shape is [nframes, natoms[0], 1] + else: + if self.mixed_types: + atom_property = ( + self.filter_layers.networks[0](xx) + self.bias_atom_e[atype] + ) + if xx_zeros is not None: + atom_property -= self.filter_layers.networks[0](xx_zeros) + outs = ( + outs + atom_property + ) # Shape is [nframes, natoms[0], net_dim_out] + else: + for type_i, ll in enumerate(self.filter_layers.networks): + mask = (atype == type_i).unsqueeze(-1) + mask = torch.tile(mask, (1, 1, net_dim_out)) + atom_property = ll(xx) + if xx_zeros is not None: + # must assert, otherwise jit is not happy + assert self.remove_vaccum_contribution is not None + if not ( + len(self.remove_vaccum_contribution) > type_i + and not self.remove_vaccum_contribution[type_i] + ): + atom_property -= ll(xx_zeros) + atom_property = atom_property + self.bias_atom_e[type_i] + atom_property = atom_property * mask + outs = ( + outs + atom_property + ) # Shape is [nframes, natoms[0], net_dim_out] + # nf x nloc + mask = self.emask(atype) + # nf x nloc x nod + outs = outs * mask[:, :, None] + return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py new file mode 100644 index 0000000000..585f697193 --- /dev/null +++ b/deepmd/pt/model/task/invar_fitting.py @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from typing import ( + Callable, + List, + Optional, + Union, +) + +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) +from deepmd.pt.model.task.fitting import ( + GeneralFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, +) +from deepmd.pt.utils.stat import ( + compute_output_stats, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION +device = env.DEVICE + +log = logging.getLogger(__name__) + + +@GeneralFitting.register("invar") +@fitting_check_output +class InvarFitting(GeneralFitting): + """Construct a fitting net for energy. + + Parameters + ---------- + var_name : str + The atomic property to fit, 'energy', 'dipole', and 'polar'. + ntypes : int + Element count. + dim_descrpt : int + Embedding width per atom. + dim_out : int + The output dimension of the fitting net. + neuron : List[int] + Number of neurons in each hidden layers of the fitting net. + bias_atom_e : torch.Tensor, optional + Average enery per atom for each element. + resnet_dt : bool + Using time-step in the ResNet construction. + numb_fparam : int + Number of frame parameters. + numb_aparam : int + Number of atomic parameters. + activation_function : str + Activation function. + precision : str + Numerical precision. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + rcond : float, optional + The condition number for the regression of atomic energy. + seed : int, optional + Random seed. + exclude_types: List[int] + Atomic contributions of the excluded atom types are set zero. + atom_ener: List[float], optional + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + dim_out: int, + neuron: List[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + rcond: Optional[float] = None, + seed: Optional[int] = None, + exclude_types: List[int] = [], + atom_ener: Optional[List[float]] = None, + **kwargs, + ): + self.dim_out = dim_out + self.atom_ener = atom_ener + super().__init__( + var_name=var_name, + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + bias_atom_e=bias_atom_e, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + rcond=rcond, + seed=seed, + exclude_types=exclude_types, + remove_vaccum_contribution=None + if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0 + else [x is not None for x in atom_ener], + **kwargs, + ) + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return self.dim_out + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "invar" + data["dim_out"] = self.dim_out + data["atom_ener"] = self.atom_ener + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + return super().deserialize(data) + + def compute_output_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + stat_file_path: Optional[DPPath] = None, + ): + """ + Compute the output statistics (e.g. energy bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + stat_file_path : Optional[DPPath] + The path to the stat file. + + """ + bias_atom_e = compute_output_stats( + merged, + self.ntypes, + keys=["energy"], + stat_file_path=stat_file_path, + rcond=self.rcond, + atom_ener=self.atom_ener, + )["energy"] + self.bias_atom_e.copy_(bias_atom_e.view([self.ntypes, self.dim_out])) + + def output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [self.dim_out], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ), + ] + ) + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + """Based on embedding net output, alculate total energy. + + Args: + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. + - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. + + Returns + ------- + - `torch.Tensor`: Total energy with shape [nframes, natoms[0]]. + """ + return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam) + + # make jit happy with torch 2.0.0 + exclude_types: List[int] diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py new file mode 100644 index 0000000000..544d23555c --- /dev/null +++ b/deepmd/pt/model/task/polarizability.py @@ -0,0 +1,321 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +import logging +from typing import ( + Callable, + List, + Optional, + Union, +) + +import numpy as np +import torch + +from deepmd.dpmodel import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.model.task.fitting import ( + GeneralFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, +) +from deepmd.utils.out_stat import ( + compute_stats_from_atomic, + compute_stats_from_redu, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +log = logging.getLogger(__name__) + + +@GeneralFitting.register("polar") +class PolarFittingNet(GeneralFitting): + """Construct a polar fitting net. + + Parameters + ---------- + var_name : str + The atomic property to fit, 'polar'. + ntypes : int + Element count. + dim_descrpt : int + Embedding width per atom. + embedding_width : int + The dimension of rotation matrix, m1. + neuron : List[int] + Number of neurons in each hidden layers of the fitting net. + resnet_dt : bool + Using time-step in the ResNet construction. + numb_fparam : int + Number of frame parameters. + numb_aparam : int + Number of atomic parameters. + activation_function : str + Activation function. + precision : str + Numerical precision. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. + rcond : float, optional + The condition number for the regression of atomic energy. + seed : int, optional + Random seed. + fit_diag : bool + Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to + normal polarizability matrix by contracting with the rotation matrix. + scale : List[float] + The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i] + shift_diag : bool + Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale. + """ + + def __init__( + self, + ntypes: int, + dim_descrpt: int, + embedding_width: int, + neuron: List[int] = [128, 128, 128], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + rcond: Optional[float] = None, + seed: Optional[int] = None, + exclude_types: List[int] = [], + fit_diag: bool = True, + scale: Optional[Union[List[float], float]] = None, + shift_diag: bool = True, + **kwargs, + ): + self.embedding_width = embedding_width + self.fit_diag = fit_diag + self.scale = scale + if self.scale is None: + self.scale = [1.0 for _ in range(ntypes)] + else: + if isinstance(self.scale, list): + assert ( + len(self.scale) == ntypes + ), "Scale should be a list of length ntypes." + elif isinstance(self.scale, float): + self.scale = [self.scale for _ in range(ntypes)] + else: + raise ValueError( + "Scale must be a list of float of length ntypes or a float." + ) + self.scale = torch.tensor( + self.scale, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ).view(ntypes, 1) + self.shift_diag = shift_diag + self.constant_matrix = torch.zeros( + ntypes, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + ) + super().__init__( + var_name=kwargs.pop("var_name", "polar"), + ntypes=ntypes, + dim_descrpt=dim_descrpt, + neuron=neuron, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + rcond=rcond, + seed=seed, + exclude_types=exclude_types, + **kwargs, + ) + self.old_impl = False # this only supports the new implementation. + + def _net_out_dim(self): + """Set the FittingNet output dim.""" + return ( + self.embedding_width + if self.fit_diag + else self.embedding_width * self.embedding_width + ) + + def __setitem__(self, key, value): + if key in ["constant_matrix"]: + self.constant_matrix = value + else: + super().__setitem__(key, value) + + def __getitem__(self, key): + if key in ["constant_matrix"]: + return self.constant_matrix + else: + return super().__getitem__(key) + + def serialize(self) -> dict: + data = super().serialize() + data["type"] = "polar" + data["@version"] = 2 + data["embedding_width"] = self.embedding_width + data["old_impl"] = self.old_impl + data["fit_diag"] = self.fit_diag + data["shift_diag"] = self.shift_diag + data["@variables"]["scale"] = to_numpy_array(self.scale) + data["@variables"]["constant_matrix"] = to_numpy_array(self.constant_matrix) + return data + + @classmethod + def deserialize(cls, data: dict) -> "GeneralFitting": + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 2, 1) + return super().deserialize(data) + + def output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [3, 3], + reduciable=True, + r_differentiable=False, + c_differentiable=False, + ), + ] + ) + + def compute_output_stats( + self, + merged: Union[Callable[[], List[dict]], List[dict]], + stat_file_path: Optional[DPPath] = None, + ) -> None: + """ + Compute the output statistics (e.g. energy bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + stat_file_path : Optional[DPPath] + The path to the stat file. + + """ + if self.shift_diag: + if stat_file_path is not None: + stat_file_path = stat_file_path / "constant_matrix" + if stat_file_path is not None and stat_file_path.is_file(): + constant_matrix = stat_file_path.load_numpy() + else: + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + + sys_constant_matrix = [] + for sys in range(len(sampled)): + nframs = sampled[sys]["atype"].shape[0] + + if sampled[sys]["find_atomic_polarizability"] > 0.0: + sys_atom_polar = compute_stats_from_atomic( + sampled[sys]["atomic_polarizability"].numpy(force=True), + sampled[sys]["atype"].numpy(force=True), + )[0] + else: + if not sampled[sys]["find_polarizability"] > 0.0: + continue + sys_type_count = np.zeros( + (nframs, self.ntypes), dtype=env.GLOBAL_NP_FLOAT_PRECISION + ) + for itype in range(self.ntypes): + type_mask = sampled[sys]["atype"] == itype + sys_type_count[:, itype] = type_mask.sum(dim=1).numpy( + force=True + ) + + sys_bias_redu = sampled[sys]["polarizability"].numpy(force=True) + + sys_atom_polar = compute_stats_from_redu( + sys_bias_redu, sys_type_count, rcond=self.rcond + )[0] + cur_constant_matrix = np.zeros( + self.ntypes, dtype=env.GLOBAL_NP_FLOAT_PRECISION + ) + + for itype in range(self.ntypes): + cur_constant_matrix[itype] = np.mean( + np.diagonal(sys_atom_polar[itype].reshape(3, 3)) + ) + sys_constant_matrix.append(cur_constant_matrix) + constant_matrix = np.stack(sys_constant_matrix).mean(axis=0) + + # handle nan values. + constant_matrix = np.nan_to_num(constant_matrix) + if stat_file_path is not None: + stat_file_path.save_numpy(constant_matrix) + self.constant_matrix = torch.tensor(constant_matrix, device=env.DEVICE) + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + nframes, nloc, _ = descriptor.shape + assert ( + gr is not None + ), "Must provide the rotation matrix for polarizability fitting." + # (nframes, nloc, _net_out_dim) + out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[ + self.var_name + ] + out = out * self.scale[atype] + gr = gr.view(nframes * nloc, -1, 3) # (nframes * nloc, m1, 3) + + if self.fit_diag: + out = out.reshape(-1, self.embedding_width) + out = torch.einsum("ij,ijk->ijk", out, gr) + else: + out = out.reshape(-1, self.embedding_width, self.embedding_width) + out = (out + out.transpose(1, 2)) / 2 + out = torch.einsum("bim,bmj->bij", out, gr) # (nframes * nloc, m1, 3) + out = torch.einsum( + "bim,bmj->bij", gr.transpose(1, 2), out + ) # (nframes * nloc, 3, 3) + out = out.view(nframes, nloc, 3, 3) + if self.shift_diag: + bias = self.constant_matrix[atype] + + # (nframes, nloc, 1) + bias = bias.unsqueeze(-1) * self.scale[atype] + + eye = torch.eye(3, device=env.DEVICE) + eye = eye.repeat(nframes, nloc, 1, 1) + # (nframes, nloc, 3, 3) + bias = bias.unsqueeze(-1) * eye + out = out + bias + + return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)} + + # make jit happy with torch 2.0.0 + exclude_types: List[int] diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/pt/model/task/task.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/pt/model/task/type_predict.py b/deepmd/pt/model/task/type_predict.py new file mode 100644 index 0000000000..c696590043 --- /dev/null +++ b/deepmd/pt/model/task/type_predict.py @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, +) + +import torch + +from deepmd.pt.model.network.network import ( + MaskLMHead, +) +from deepmd.pt.model.task import ( + Fitting, +) + + +class TypePredictNet(Fitting): + def __init__(self, feature_dim, ntypes, activation_function="gelu", **kwargs): + """Construct a type predict net. + + Args: + - feature_dim: Input dm. + - ntypes: Numer of types to predict. + - activation_function: Activate function. + """ + super().__init__() + self.feature_dim = feature_dim + self.ntypes = ntypes + self.lm_head = MaskLMHead( + embed_dim=self.feature_dim, + output_dim=ntypes, + activation_fn=activation_function, + weight=None, + ) + + def forward(self, features, masked_tokens: Optional[torch.Tensor] = None): + """Calculate the predicted logits. + Args: + - features: Input features with shape [nframes, nloc, feature_dim]. + - masked_tokens: Input masked tokens with shape [nframes, nloc]. + + Returns + ------- + - logits: Predicted probs with shape [nframes, nloc, ntypes]. + """ + # [nframes, nloc, ntypes] + logits = self.lm_head(features, masked_tokens=masked_tokens) + return logits diff --git a/deepmd/pt/optimizer/KFWrapper.py b/deepmd/pt/optimizer/KFWrapper.py new file mode 100644 index 0000000000..3ab7ffe7a9 --- /dev/null +++ b/deepmd/pt/optimizer/KFWrapper.py @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import math + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +from torch.optim.optimizer import ( + Optimizer, +) + + +class KFOptimizerWrapper: + def __init__( + self, + model: nn.Module, + optimizer: Optimizer, + atoms_selected: int, + atoms_per_group: int, + is_distributed: bool = False, + ) -> None: + self.model = model + self.optimizer = optimizer + self.atoms_selected = atoms_selected # 24 + self.atoms_per_group = atoms_per_group # 6 + self.is_distributed = is_distributed + + def update_energy( + self, inputs: dict, Etot_label: torch.Tensor, update_prefactor: float = 1 + ) -> None: + model_pred, _, _ = self.model(**inputs, inference_only=True) + Etot_predict = model_pred["energy"] + natoms_sum = int(inputs["atype"].shape[-1]) + self.optimizer.set_grad_prefactor(natoms_sum) + + self.optimizer.zero_grad() + bs = Etot_label.shape[0] + error = Etot_label - Etot_predict + error = error / natoms_sum + mask = error < 0 + + error = error * update_prefactor + error[mask] = -1 * error[mask] + error = error.mean() + + if self.is_distributed: + dist.all_reduce(error) + error /= dist.get_world_size() + + Etot_predict = update_prefactor * Etot_predict + Etot_predict[mask] = -Etot_predict[mask] + + Etot_predict.sum().backward() + error = error * math.sqrt(bs) + self.optimizer.step(error) + return Etot_predict + + def update_force( + self, inputs: dict, Force_label: torch.Tensor, update_prefactor: float = 1 + ) -> None: + natoms_sum = int(inputs["atype"].shape[-1]) + bs = Force_label.shape[0] + self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3) + + index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum) + + for i in range(index.shape[0]): + self.optimizer.zero_grad() + model_pred, _, _ = self.model(**inputs, inference_only=True) + Etot_predict = model_pred["energy"] + natoms_sum = int(inputs["atype"].shape[-1]) + force_predict = model_pred["force"] + error_tmp = Force_label[:, index[i]] - force_predict[:, index[i]] + error_tmp = update_prefactor * error_tmp + mask = error_tmp < 0 + error_tmp[mask] = -1 * error_tmp[mask] + error = error_tmp.mean() / natoms_sum + + if self.is_distributed: + dist.all_reduce(error) + error /= dist.get_world_size() + + tmp_force_predict = force_predict[:, index[i]] * update_prefactor + tmp_force_predict[mask] = -tmp_force_predict[mask] + + # In order to solve a pytorch bug, reference: https://github.com/pytorch/pytorch/issues/43259 + (tmp_force_predict.sum() + Etot_predict.sum() * 0).backward() + error = error * math.sqrt(bs) + self.optimizer.step(error) + return Etot_predict, force_predict + + def update_denoise_coord( + self, + inputs: dict, + clean_coord: torch.Tensor, + update_prefactor: float = 1, + mask_loss_coord: bool = True, + coord_mask: torch.Tensor = None, + ) -> None: + natoms_sum = int(inputs["atype"].shape[-1]) + bs = clean_coord.shape[0] + self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3) + + index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum) + + for i in range(index.shape[0]): + self.optimizer.zero_grad() + model_pred, _, _ = self.model(**inputs, inference_only=True) + updated_coord = model_pred["updated_coord"] + natoms_sum = int(inputs["atype"].shape[-1]) + error_tmp = clean_coord[:, index[i]] - updated_coord[:, index[i]] + error_tmp = update_prefactor * error_tmp + if mask_loss_coord: + error_tmp[~coord_mask[:, index[i]]] = 0 + mask = error_tmp < 0 + error_tmp[mask] = -1 * error_tmp[mask] + error = error_tmp.mean() / natoms_sum + + if self.is_distributed: + dist.all_reduce(error) + error /= dist.get_world_size() + + tmp_coord_predict = updated_coord[:, index[i]] * update_prefactor + tmp_coord_predict[mask] = -update_prefactor * tmp_coord_predict[mask] + + # In order to solve a pytorch bug, reference: https://github.com/pytorch/pytorch/issues/43259 + (tmp_coord_predict.sum() + updated_coord.sum() * 0).backward() + error = error * math.sqrt(bs) + self.optimizer.step(error) + return model_pred + + def __sample( + self, atoms_selected: int, atoms_per_group: int, natoms: int + ) -> np.ndarray: + if atoms_selected % atoms_per_group: + raise Exception("divider") + index = range(natoms) + rng = np.random.default_rng() + res = rng.choice(index, atoms_selected).reshape(-1, atoms_per_group) + return res + + +# with torch.autograd.profiler.profile(enabled=True, use_cuda=True, record_shapes=False) as prof: +# the code u wanna profile +# print(prof.key_averages().table(sort_by="self_cpu_time_total")) diff --git a/deepmd/pt/optimizer/LKF.py b/deepmd/pt/optimizer/LKF.py new file mode 100644 index 0000000000..06b341d987 --- /dev/null +++ b/deepmd/pt/optimizer/LKF.py @@ -0,0 +1,320 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +import math + +import torch +import torch.distributed as dist +from torch.optim.optimizer import ( + Optimizer, +) + + +def distribute_indices(total_length, num_workers): + indices_per_worker = total_length // num_workers + remainder = total_length % num_workers + + indices = [] + start = 0 + + for i in range(num_workers): + end = start + indices_per_worker + (1 if i < remainder else 0) + indices.append((start, end)) + start = end + + return indices, remainder + + +class LKFOptimizer(Optimizer): + def __init__( + self, + params, + kalman_lambda=0.98, + kalman_nue=0.9987, + block_size=5120, + ): + defaults = {"lr": 0.1, "kalman_nue": kalman_nue, "block_size": block_size} + + super().__init__(params, defaults) + + self._params = self.param_groups[0]["params"] + + if len(self.param_groups) != 1 or len(self._params) == 0: + raise ValueError( + "LKF doesn't support per-parameter options " "(parameter groups)" + ) + + # NOTE: LKF has only global state, but we register it as state for + # the first param, because this helps with casting in load_state_dict + self._state = self.state[self._params[0]] + self._state.setdefault("kalman_lambda", kalman_lambda) + self.dist_init = dist.is_initialized() + self.rank = dist.get_rank() if self.dist_init else 0 + self.dindex = [] + self.remainder = 0 + self.__init_P() + + def __init_P(self): + param_nums = [] + param_sum = 0 + block_size = self.__get_blocksize() + data_type = self._params[0].dtype + device = self._params[0].device + + for param_group in self.param_groups: + params = param_group["params"] + for param in params: + param_num = param.data.nelement() + if param_sum + param_num > block_size: + if param_sum > 0: + param_nums.append(param_sum) + param_sum = param_num + else: + param_sum += param_num + + param_nums.append(param_sum) + + P = [] + params_packed_index = [] + logging.info("LKF parameter nums: %s" % param_nums) + if self.dist_init: + block_num = 0 + for param_num in param_nums: + if param_num >= block_size: + block_num += math.ceil(param_num / block_size) + else: + block_num += 1 + num_workers = dist.get_world_size() + self.dindex, self.remainder = distribute_indices(block_num, num_workers) + index = 0 + for param_num in param_nums: + if param_num >= block_size: + block_num = math.ceil(param_num / block_size) + for i in range(block_num): + device_id = self.get_device_id(index) + index += 1 + dist_device = torch.device("cuda:" + str(device_id)) + if i != block_num - 1: + params_packed_index.append(block_size) + if self.rank == device_id: + P.append( + torch.eye( + block_size, + dtype=data_type, + device=dist_device, + ) + ) + else: + continue + else: + params_packed_index.append(param_num - block_size * i) + if self.rank == device_id: + P.append( + torch.eye( + param_num - block_size * i, + dtype=data_type, + device=dist_device, + ) + ) + else: + continue + + else: + device_id = self.get_device_id(index) + index += 1 + params_packed_index.append(param_num) + if self.rank == device_id: + dist_device = torch.device("cuda:" + str(device_id)) + P.append( + torch.eye(param_num, dtype=data_type, device=dist_device) + ) + else: + for param_num in param_nums: + if param_num >= block_size: + block_num = math.ceil(param_num / block_size) + for i in range(block_num): + if i != block_num - 1: + P.append( + torch.eye( + block_size, + dtype=data_type, + device=device, + ) + ) + params_packed_index.append(block_size) + else: + P.append( + torch.eye( + param_num - block_size * i, + dtype=data_type, + device=device, + ) + ) + params_packed_index.append(param_num - block_size * i) + else: + P.append(torch.eye(param_num, dtype=data_type, device=device)) + params_packed_index.append(param_num) + + self._state.setdefault("P", P) + self._state.setdefault("weights_num", len(P)) + self._state.setdefault("params_packed_index", params_packed_index) + + def __get_blocksize(self): + return self.param_groups[0]["block_size"] + + def __get_nue(self): + return self.param_groups[0]["kalman_nue"] + + def __split_weights(self, weight): + block_size = self.__get_blocksize() + param_num = weight.nelement() + res = [] + if param_num < block_size: + res.append(weight) + else: + block_num = math.ceil(param_num / block_size) + for i in range(block_num): + if i != block_num - 1: + res.append(weight[i * block_size : (i + 1) * block_size]) + else: + res.append(weight[i * block_size :]) + return res + + def __update(self, H, error, weights): + P = self._state.get("P") + kalman_lambda = self._state.get("kalman_lambda") + weights_num = self._state.get("weights_num") + params_packed_index = self._state.get("params_packed_index") + + block_size = self.__get_blocksize() + kalman_nue = self.__get_nue() + + tmp = 0 + for i in range(weights_num): + tmp = tmp + (kalman_lambda + torch.matmul(torch.matmul(H[i].T, P[i]), H[i])) + if self.dist_init: + dist.all_reduce(tmp, op=dist.ReduceOp.SUM) + A = 1 / tmp + for i in range(weights_num): + K = torch.matmul(P[i], H[i]) + + weights[i] = weights[i] + A * error * K + + P[i] = (1 / kalman_lambda) * (P[i] - A * torch.matmul(K, K.T)) + if self.dist_init: + device = torch.device("cuda:" + str(self.rank)) + local_shape = [tensor.shape[0] for tensor in weights] + shape_list = [ + torch.zeros_like(torch.empty(1), dtype=torch.float64, device=device) + for _ in range(dist.get_world_size()) + ] + dist.all_gather_object(shape_list, local_shape) + weight_tensor = torch.cat(weights) + world_shape = [sum(inner_list) for inner_list in shape_list] + weight_list = [None] * len(world_shape) + for i in range(len(world_shape)): + weight_list[i] = torch.zeros( + world_shape[i], dtype=torch.float64, device=device + ) + dist.all_gather(weight_list, weight_tensor) + result = [] + for i in range(dist.get_world_size()): + result = result + list(torch.split(weight_list[i], shape_list[i])) + weights = result + kalman_lambda = kalman_nue * kalman_lambda + 1 - kalman_nue + self._state.update({"kalman_lambda": kalman_lambda}) + + i = 0 + param_sum = 0 + for param_group in self.param_groups: + params = param_group["params"] + for param in params: + param_num = param.nelement() + weight_tmp = weights[i][param_sum : param_sum + param_num] + if param_num < block_size: + if param.ndim > 1: + param.data = weight_tmp.reshape( + param.data.T.shape + ).T.contiguous() + else: + param.data = weight_tmp.reshape(param.data.shape) + + param_sum += param_num + + if param_sum == params_packed_index[i]: + i += 1 + param_sum = 0 + else: + block_num = math.ceil(param_num / block_size) + for j in range(block_num): + if j == 0: + tmp_weight = weights[i] + else: + tmp_weight = torch.concat([tmp_weight, weights[i]], dim=0) + i += 1 + param.data = tmp_weight.reshape(param.data.T.shape).T.contiguous() + + def set_grad_prefactor(self, grad_prefactor): + self.grad_prefactor = grad_prefactor + + def step(self, error): + params_packed_index = self._state.get("params_packed_index") + + weights = [] + H = [] + param_index = 0 + param_sum = 0 + + for param in self._params: + if param.ndim > 1: + tmp = param.data.T.contiguous().reshape(param.data.nelement(), 1) + if param.grad is None: + tmp_grad = torch.zeros_like(tmp) + else: + tmp_grad = ( + (param.grad / self.grad_prefactor) + .T.contiguous() + .reshape(param.grad.nelement(), 1) + ) + else: + tmp = param.data.reshape(param.data.nelement(), 1) + if param.grad is None: + tmp_grad = torch.zeros_like(tmp) + else: + tmp_grad = (param.grad / self.grad_prefactor).reshape( + param.grad.nelement(), 1 + ) + + tmp = self.__split_weights(tmp) + tmp_grad = self.__split_weights(tmp_grad) + + for split_grad, split_weight in zip(tmp_grad, tmp): + nelement = split_grad.nelement() + + if param_sum == 0: + res_grad = split_grad + res = split_weight + else: + res_grad = torch.concat((res_grad, split_grad), dim=0) + res = torch.concat((res, split_weight), dim=0) + + param_sum += nelement + + if param_sum == params_packed_index[param_index]: + param_sum = 0 + if self.dist_init: + device_id = self.get_device_id(param_index) + if self.rank == device_id: + weights.append(res) + H.append(res_grad) + else: + weights.append(res) + H.append(res_grad) + param_index += 1 + + self.__update(H, error, weights) + + def get_device_id(self, index): + for i, (start, end) in enumerate(self.dindex): + if start <= index < end: + return i + return None diff --git a/deepmd/pt/optimizer/__init__.py b/deepmd/pt/optimizer/__init__.py new file mode 100644 index 0000000000..db340b3bb9 --- /dev/null +++ b/deepmd/pt/optimizer/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .KFWrapper import ( + KFOptimizerWrapper, +) +from .LKF import ( + LKFOptimizer, +) + +__all__ = ["KFOptimizerWrapper", "LKFOptimizer"] diff --git a/deepmd/pt/train/__init__.py b/deepmd/pt/train/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/pt/train/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py new file mode 100644 index 0000000000..ff1c350f47 --- /dev/null +++ b/deepmd/pt/train/training.py @@ -0,0 +1,1150 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +import logging +import time +from copy import ( + deepcopy, +) +from pathlib import ( + Path, +) +from typing import ( + Any, + Dict, +) + +import numpy as np +import torch + +from deepmd.common import ( + symlink_prefix_files, +) +from deepmd.loggers.training import ( + format_training_message, + format_training_message_per_task, +) +from deepmd.pt.loss import ( + DenoiseLoss, + DOSLoss, + EnergySpinLoss, + EnergyStdLoss, + TensorLoss, +) +from deepmd.pt.model.model import ( + EnergyModel, + get_model, + get_zbl_model, +) +from deepmd.pt.optimizer import ( + KFOptimizerWrapper, + LKFOptimizer, +) +from deepmd.pt.train.wrapper import ( + ModelWrapper, +) +from deepmd.pt.utils import ( + dp_random, +) +from deepmd.pt.utils.dataloader import ( + BufferedIterator, + get_weighted_sampler, +) +from deepmd.pt.utils.env import ( + DEVICE, + JIT, + LOCAL_RANK, + NUM_WORKERS, + SAMPLER_RECORD, +) +from deepmd.pt.utils.learning_rate import ( + LearningRateExp, +) +from deepmd.pt.utils.stat import ( + make_stat_input, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, +) +from deepmd.utils.data import ( + DataRequirementItem, +) + +if torch.__version__.startswith("2"): + import torch._dynamo + +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import ( + DataLoader, +) + +from deepmd.utils.path import ( + DPH5Path, +) + +log = logging.getLogger(__name__) + + +class Trainer: + def __init__( + self, + config: Dict[str, Any], + training_data, + stat_file_path=None, + validation_data=None, + init_model=None, + restart_model=None, + finetune_model=None, + force_load=False, + shared_links=None, + finetune_links=None, + init_frz_model=None, + ): + """Construct a DeePMD trainer. + + Args: + - config: The Dict-like configuration with training options. + """ + if init_model is not None: + resume_model = init_model + elif restart_model is not None: + resume_model = restart_model + elif finetune_model is not None: + resume_model = finetune_model + else: + resume_model = None + resuming = resume_model is not None + self.restart_training = restart_model is not None + model_params = config["model"] + training_params = config["training"] + self.multi_task = "model_dict" in model_params + self.finetune_links = finetune_links + self.model_keys = ( + list(model_params["model_dict"]) if self.multi_task else ["Default"] + ) + self.rank = dist.get_rank() if dist.is_initialized() else 0 + self.world_size = dist.get_world_size() if dist.is_initialized() else 1 + self.num_model = len(self.model_keys) + + # Iteration config + self.num_steps = training_params["numb_steps"] + self.disp_file = training_params.get("disp_file", "lcurve.out") + self.disp_freq = training_params.get("disp_freq", 1000) + self.save_ckpt = training_params.get("save_ckpt", "model.ckpt") + self.save_freq = training_params.get("save_freq", 1000) + self.max_ckpt_keep = training_params.get("max_ckpt_keep", 5) + self.lcurve_should_print_header = True + + def get_opt_param(params): + opt_type = params.get("opt_type", "Adam") + opt_param = { + "kf_blocksize": params.get("kf_blocksize", 5120), + "kf_start_pref_e": params.get("kf_start_pref_e", 1), + "kf_limit_pref_e": params.get("kf_limit_pref_e", 1), + "kf_start_pref_f": params.get("kf_start_pref_f", 1), + "kf_limit_pref_f": params.get("kf_limit_pref_f", 1), + } + return opt_type, opt_param + + def get_data_loader(_training_data, _validation_data, _training_params): + def get_dataloader_and_buffer(_data, _params): + if "auto_prob" in _training_params["training_data"]: + _sampler = get_weighted_sampler( + _data, _params["training_data"]["auto_prob"] + ) + elif "sys_probs" in _training_params["training_data"]: + _sampler = get_weighted_sampler( + _data, + _params["training_data"]["sys_probs"], + sys_prob=True, + ) + else: + _sampler = get_weighted_sampler(_data, "prob_sys_size") + + if _sampler is None: + log.warning( + "Sampler not specified!" + ) # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration. + _dataloader = DataLoader( + _data, + sampler=_sampler, + batch_size=None, + num_workers=NUM_WORKERS, # setting to 0 diverges the behavior of its iterator; should be >=1 + drop_last=False, + pin_memory=True, + ) + with torch.device("cpu"): + _data_buffered = BufferedIterator(iter(_dataloader)) + return _dataloader, _data_buffered + + training_dataloader, training_data_buffered = get_dataloader_and_buffer( + _training_data, _training_params + ) + + if _validation_data is not None: + ( + validation_dataloader, + validation_data_buffered, + ) = get_dataloader_and_buffer(_validation_data, _training_params) + valid_numb_batch = _training_params["validation_data"].get( + "numb_btch", 1 + ) + else: + validation_dataloader = None + validation_data_buffered = None + valid_numb_batch = 1 + return ( + training_dataloader, + training_data_buffered, + validation_dataloader, + validation_data_buffered, + valid_numb_batch, + ) + + def single_model_stat( + _model, + _data_stat_nbatch, + _training_data, + _validation_data, + _stat_file_path, + _data_requirement, + ): + if _model.get_dim_fparam() > 0: + fparam_requirement_items = [ + DataRequirementItem( + "fparam", _model.get_dim_fparam(), atomic=False, must=True + ) + ] + _data_requirement += fparam_requirement_items + if _model.get_dim_aparam() > 0: + aparam_requirement_items = [ + DataRequirementItem( + "aparam", _model.get_dim_aparam(), atomic=True, must=True + ) + ] + _data_requirement += aparam_requirement_items + has_spin = getattr(_model, "has_spin", False) + if callable(has_spin): + has_spin = has_spin() + if has_spin: + spin_requirement_items = [ + DataRequirementItem("spin", ndof=3, atomic=True, must=True) + ] + _data_requirement += spin_requirement_items + _training_data.add_data_requirement(_data_requirement) + if _validation_data is not None: + _validation_data.add_data_requirement(_data_requirement) + + @functools.lru_cache + def get_sample(): + sampled = make_stat_input( + _training_data.systems, + _training_data.dataloaders, + _data_stat_nbatch, + ) + return sampled + + if not resuming and self.rank == 0: + _model.compute_or_load_stat( + sampled_func=get_sample, + stat_file_path=_stat_file_path, + ) + if isinstance(_stat_file_path, DPH5Path): + _stat_file_path.root.close() + return get_sample + + def get_single_model( + _model_params, + ): + if "use_srtab" in _model_params: + model = get_zbl_model(deepcopy(_model_params)).to(DEVICE) + else: + model = get_model(deepcopy(_model_params)).to(DEVICE) + return model + + def get_lr(lr_params): + assert ( + lr_params.get("type", "exp") == "exp" + ), "Only learning rate `exp` is supported!" + lr_params["stop_steps"] = self.num_steps - self.warmup_steps + lr_exp = LearningRateExp(**lr_params) + return lr_exp + + def get_loss(loss_params, start_lr, _ntypes, _model): + loss_type = loss_params.get("type", "ener") + if loss_type == "ener": + loss_params["starter_learning_rate"] = start_lr + return EnergyStdLoss(**loss_params) + elif loss_type == "dos": + loss_params["starter_learning_rate"] = start_lr + loss_params["numb_dos"] = _model.model_output_def()["dos"].output_size + return DOSLoss(**loss_params) + elif loss_type == "ener_spin": + loss_params["starter_learning_rate"] = start_lr + return EnergySpinLoss(**loss_params) + elif loss_type == "denoise": + loss_params["ntypes"] = _ntypes + return DenoiseLoss(**loss_params) + elif loss_type == "tensor": + model_output_type = _model.model_output_type() + if "mask" in model_output_type: + model_output_type.pop(model_output_type.index("mask")) + tensor_name = model_output_type[0] + loss_params["tensor_name"] = tensor_name + loss_params["tensor_size"] = _model.model_output_def()[ + tensor_name + ].output_size + label_name = tensor_name + if label_name == "polar": + label_name = "polarizability" + loss_params["label_name"] = label_name + return TensorLoss(**loss_params) + else: + raise NotImplementedError + + # Optimizer + if self.multi_task and training_params.get("optim_dict", None) is not None: + self.optim_dict = training_params.get("optim_dict") + missing_keys = [ + key for key in self.model_keys if key not in self.optim_dict + ] + assert ( + not missing_keys + ), f"These keys are not in optim_dict: {missing_keys}!" + self.opt_type = {} + self.opt_param = {} + for model_key in self.model_keys: + self.opt_type[model_key], self.opt_param[model_key] = get_opt_param( + self.optim_dict[model_key] + ) + else: + self.opt_type, self.opt_param = get_opt_param(training_params) + + # Model + dp_random.seed(training_params["seed"]) + if not self.multi_task: + self.model = get_single_model( + model_params, + ) + else: + self.model = {} + for model_key in self.model_keys: + self.model[model_key] = get_single_model( + model_params["model_dict"][model_key], + ) + + # Loss + if not self.multi_task: + self.loss = get_loss( + config["loss"], + config["learning_rate"]["start_lr"], + len(model_params["type_map"]), + self.model, + ) + else: + self.loss = {} + for model_key in self.model_keys: + loss_param = config["loss_dict"][model_key] + if config.get("learning_rate_dict", None) is not None: + lr_param = config["learning_rate_dict"][model_key]["start_lr"] + else: + lr_param = config["learning_rate"]["start_lr"] + ntypes = len(model_params["model_dict"][model_key]["type_map"]) + self.loss[model_key] = get_loss( + loss_param, lr_param, ntypes, self.model[model_key] + ) + + # Data + dp_random.seed(training_params["seed"]) + if not self.multi_task: + self.get_sample_func = single_model_stat( + self.model, + model_params.get("data_stat_nbatch", 10), + training_data, + validation_data, + stat_file_path, + self.loss.label_requirement, + ) + ( + self.training_dataloader, + self.training_data, + self.validation_dataloader, + self.validation_data, + self.valid_numb_batch, + ) = get_data_loader(training_data, validation_data, training_params) + training_data.print_summary( + "training", to_numpy_array(self.training_dataloader.sampler.weights) + ) + if validation_data is not None: + validation_data.print_summary( + "validation", + to_numpy_array(self.validation_dataloader.sampler.weights), + ) + else: + ( + self.training_dataloader, + self.training_data, + self.validation_dataloader, + self.validation_data, + self.valid_numb_batch, + self.get_sample_func, + ) = {}, {}, {}, {}, {}, {} + for model_key in self.model_keys: + self.get_sample_func[model_key] = single_model_stat( + self.model[model_key], + model_params["model_dict"][model_key].get("data_stat_nbatch", 10), + training_data[model_key], + validation_data[model_key], + stat_file_path[model_key], + self.loss[model_key].label_requirement, + ) + ( + self.training_dataloader[model_key], + self.training_data[model_key], + self.validation_dataloader[model_key], + self.validation_data[model_key], + self.valid_numb_batch[model_key], + ) = get_data_loader( + training_data[model_key], + validation_data[model_key], + training_params["data_dict"][model_key], + ) + + training_data[model_key].print_summary( + f"training in {model_key}", + to_numpy_array(self.training_dataloader[model_key].sampler.weights), + ) + if ( + validation_data is not None + and validation_data[model_key] is not None + ): + validation_data[model_key].print_summary( + f"validation in {model_key}", + to_numpy_array( + self.validation_dataloader[model_key].sampler.weights + ), + ) + + # Learning rate + self.warmup_steps = training_params.get("warmup_steps", 0) + self.gradient_max_norm = training_params.get("gradient_max_norm", 0.0) + assert ( + self.num_steps - self.warmup_steps > 0 or self.warmup_steps == 0 + ), "Warm up steps must be less than total training steps!" + if self.multi_task and config.get("learning_rate_dict", None) is not None: + self.lr_exp = {} + for model_key in self.model_keys: + self.lr_exp[model_key] = get_lr(config["learning_rate_dict"][model_key]) + else: + self.lr_exp = get_lr(config["learning_rate"]) + + # JIT + if JIT: + self.model = torch.jit.script(self.model) + + # Model Wrapper + self.wrapper = ModelWrapper(self.model, self.loss, model_params=model_params) + self.start_step = 0 + + # resuming and finetune + optimizer_state_dict = None + if resuming: + ntest = model_params.get("data_bias_nsample", 1) + origin_model = ( + finetune_model if finetune_model is not None else resume_model + ) + log.info(f"Resuming from {origin_model}.") + state_dict = torch.load(origin_model, map_location=DEVICE) + if "model" in state_dict: + optimizer_state_dict = ( + state_dict["optimizer"] if finetune_model is None else None + ) + state_dict = state_dict["model"] + self.start_step = ( + state_dict["_extra_state"]["train_infos"]["step"] + if self.restart_training + else 0 + ) + if self.rank == 0: + if force_load: + input_keys = list(state_dict.keys()) + target_keys = list(self.wrapper.state_dict().keys()) + missing_keys = [ + item for item in target_keys if item not in input_keys + ] + if missing_keys: + target_state_dict = self.wrapper.state_dict() + slim_keys = [] + for item in missing_keys: + state_dict[item] = target_state_dict[item].clone().detach() + new_key = True + for slim_key in slim_keys: + if slim_key in item: + new_key = False + break + if new_key: + tmp_keys = ".".join(item.split(".")[:3]) + slim_keys.append(tmp_keys) + slim_keys = [i + ".*" for i in slim_keys] + log.warning( + f"Force load mode allowed! These keys are not in ckpt and will re-init: {slim_keys}" + ) + + if finetune_model is not None: + new_state_dict = {} + target_state_dict = self.wrapper.state_dict() + + def update_single_finetune_params( + _model_key, + _model_key_from, + _new_state_dict, + _origin_state_dict, + _random_state_dict, + _new_fitting=False, + ): + target_keys = [ + i + for i in _random_state_dict.keys() + if i != "_extra_state" and f".{_model_key}." in i + ] + for item_key in target_keys: + if _new_fitting and ".fitting_net." in item_key: + # print(f'Keep {item_key} in old model!') + _new_state_dict[item_key] = ( + _random_state_dict[item_key].clone().detach() + ) + else: + new_key = item_key.replace( + f".{_model_key}.", f".{_model_key_from}." + ) + # print(f'Replace {item_key} with {new_key} in pretrained_model!') + _new_state_dict[item_key] = ( + _origin_state_dict[new_key].clone().detach() + ) + + if not self.multi_task: + model_key = "Default" + model_key_from = self.finetune_links[model_key] + new_fitting = model_params.pop("new_fitting", False) + update_single_finetune_params( + model_key, + model_key_from, + new_state_dict, + state_dict, + target_state_dict, + _new_fitting=new_fitting, + ) + else: + for model_key in self.model_keys: + if model_key in self.finetune_links: + model_key_from = self.finetune_links[model_key] + new_fitting = model_params["model_dict"][model_key].pop( + "new_fitting", False + ) + else: + model_key_from = model_key + new_fitting = False + update_single_finetune_params( + model_key, + model_key_from, + new_state_dict, + state_dict, + target_state_dict, + _new_fitting=new_fitting, + ) + state_dict = new_state_dict + state_dict["_extra_state"] = self.wrapper.state_dict()[ + "_extra_state" + ] + self.wrapper.load_state_dict(state_dict) + + if finetune_model is not None: + + def single_model_finetune( + _model, + _model_params, + _sample_func, + ): + old_type_map, new_type_map = ( + _model_params["type_map"], + _model_params["new_type_map"], + ) + if isinstance(_model, EnergyModel): + _model.change_out_bias( + _sample_func, + bias_adjust_mode=_model_params.get( + "bias_adjust_mode", "change-by-statistic" + ), + origin_type_map=new_type_map, + full_type_map=old_type_map, + ) + else: + # need to updated + pass + + # finetune + if not self.multi_task: + single_model_finetune( + self.model, model_params, self.get_sample_func + ) + else: + for model_key in self.model_keys: + if model_key in self.finetune_links: + log.info( + f"Model branch {model_key} will be fine-tuned. This may take a long time..." + ) + single_model_finetune( + self.model[model_key], + model_params["model_dict"][model_key], + self.get_sample_func[model_key], + ) + else: + log.info( + f"Model branch {model_key} will resume training." + ) + + if init_frz_model is not None: + frz_model = torch.jit.load(init_frz_model, map_location=DEVICE) + self.model.load_state_dict(frz_model.state_dict()) + + # Multi-task share params + if shared_links is not None: + self.wrapper.share_params(shared_links, resume=resuming or self.rank != 0) + + if dist.is_initialized(): + torch.cuda.set_device(LOCAL_RANK) + # DDP will guarantee the model parameters are identical across all processes + self.wrapper = DDP( + self.wrapper, + device_ids=[LOCAL_RANK], + find_unused_parameters=True, + output_device=LOCAL_RANK, + ) + + # TODO add lr warmups for multitask + # author: iProzd + def warm_up_linear(step, warmup_steps): + if step < warmup_steps: + return step / warmup_steps + else: + return self.lr_exp.value(step - warmup_steps) / self.lr_exp.start_lr + + # TODO add optimizers for multitask + # author: iProzd + if self.opt_type == "Adam": + self.optimizer = torch.optim.Adam( + self.wrapper.parameters(), lr=self.lr_exp.start_lr + ) + if optimizer_state_dict is not None and self.restart_training: + self.optimizer.load_state_dict(optimizer_state_dict) + self.scheduler = torch.optim.lr_scheduler.LambdaLR( + self.optimizer, + lambda step: warm_up_linear(step + self.start_step, self.warmup_steps), + ) + elif self.opt_type == "LKF": + self.optimizer = LKFOptimizer( + self.wrapper.parameters(), 0.98, 0.99870, self.opt_param["kf_blocksize"] + ) + else: + raise ValueError("Not supported optimizer type '%s'" % self.opt_type) + + # Get model prob for multi-task + if self.multi_task: + self.model_prob = np.array([0.0 for key in self.model_keys]) + if training_params.get("model_prob", None) is not None: + model_prob = training_params["model_prob"] + for ii, model_key in enumerate(self.model_keys): + if model_key in model_prob: + self.model_prob[ii] += float(model_prob[model_key]) + else: + for ii, model_key in enumerate(self.model_keys): + self.model_prob[ii] += float(len(self.training_data[model_key])) + sum_prob = np.sum(self.model_prob) + assert sum_prob > 0.0, "Sum of model prob must be larger than 0!" + self.model_prob = self.model_prob / sum_prob + + # Tensorboard + self.enable_tensorboard = training_params.get("tensorboard", False) + self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log") + self.tensorboard_freq = training_params.get("tensorboard_freq", 1) + self.enable_profiler = training_params.get("enable_profiler", False) + + def run(self): + fout = ( + open(self.disp_file, mode="w", buffering=1) if self.rank == 0 else None + ) # line buffered + if SAMPLER_RECORD: + record_file = f"Sample_rank_{self.rank}.txt" + fout1 = open(record_file, mode="w", buffering=1) + log.info("Start to train %d steps.", self.num_steps) + if dist.is_initialized(): + log.info(f"Rank: {dist.get_rank()}/{dist.get_world_size()}") + if self.enable_tensorboard: + from torch.utils.tensorboard import ( + SummaryWriter, + ) + + writer = SummaryWriter(log_dir=self.tensorboard_log_dir) + if self.enable_profiler: + prof = torch.profiler.profile( + schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=1), + on_trace_ready=torch.profiler.tensorboard_trace_handler( + self.tensorboard_log_dir + ), + record_shapes=True, + with_stack=True, + ) + prof.start() + + def step(_step_id, task_key="Default"): + # PyTorch Profiler + if self.enable_profiler: + prof.step() + self.wrapper.train() + if isinstance(self.lr_exp, dict): + _lr = self.lr_exp[task_key] + else: + _lr = self.lr_exp + cur_lr = _lr.value(_step_id) + pref_lr = cur_lr + self.optimizer.zero_grad(set_to_none=True) + input_dict, label_dict, log_dict = self.get_data( + is_train=True, task_key=task_key + ) + if SAMPLER_RECORD: + print_str = f"Step {_step_id}: sample system{log_dict['sid']} frame{log_dict['fid']}\n" + fout1.write(print_str) + fout1.flush() + if self.opt_type == "Adam": + cur_lr = self.scheduler.get_last_lr()[0] + if _step_id < self.warmup_steps: + pref_lr = _lr.start_lr + else: + pref_lr = cur_lr + model_pred, loss, more_loss = self.wrapper( + **input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key + ) + loss.backward() + if self.gradient_max_norm > 0.0: + grad_norm = torch.nn.utils.clip_grad_norm_( + self.wrapper.parameters(), self.gradient_max_norm + ) + if not torch.isfinite(grad_norm).all(): + # check local gradnorm single GPU case, trigger NanDetector + raise FloatingPointError("gradients are Nan/Inf") + with torch.device("cpu"): + self.optimizer.step() + self.scheduler.step() + elif self.opt_type == "LKF": + if isinstance(self.loss, EnergyStdLoss): + KFOptWrapper = KFOptimizerWrapper( + self.wrapper, self.optimizer, 24, 6, dist.is_initialized() + ) + pref_e = self.opt_param["kf_start_pref_e"] * ( + self.opt_param["kf_limit_pref_e"] + / self.opt_param["kf_start_pref_e"] + ) ** (_step_id / self.num_steps) + _ = KFOptWrapper.update_energy( + input_dict, label_dict["energy"], pref_e + ) + pref_f = self.opt_param["kf_start_pref_f"] * ( + self.opt_param["kf_limit_pref_f"] + / self.opt_param["kf_start_pref_f"] + ) ** (_step_id / self.num_steps) + p_energy, p_force = KFOptWrapper.update_force( + input_dict, label_dict["force"], pref_f + ) + # [coord, atype, natoms, mapping, shift, nlist, box] + model_pred = {"energy": p_energy, "force": p_force} + module = ( + self.wrapper.module if dist.is_initialized() else self.wrapper + ) + + def fake_model(): + return model_pred + + _, loss, more_loss = module.loss[task_key]( + {}, + fake_model, + label_dict, + int(input_dict["atype"].shape[-1]), + learning_rate=pref_lr, + ) + elif isinstance(self.loss, DenoiseLoss): + KFOptWrapper = KFOptimizerWrapper( + self.wrapper, self.optimizer, 24, 6, dist.is_initialized() + ) + module = ( + self.wrapper.module if dist.is_initialized() else self.wrapper + ) + model_pred = KFOptWrapper.update_denoise_coord( + input_dict, + label_dict["clean_coord"], + 1, + module.loss[task_key].mask_loss_coord, + label_dict["coord_mask"], + ) + loss, more_loss = module.loss[task_key]( + model_pred, + label_dict, + input_dict["natoms"], + learning_rate=pref_lr, + ) + else: + raise ValueError("Not supported optimizer type '%s'" % self.opt_type) + + # Log and persist + if _step_id % self.disp_freq == 0: + self.wrapper.eval() + + def log_loss_train(_loss, _more_loss, _task_key="Default"): + results = {} + rmse_val = { + item: _more_loss[item] + for item in _more_loss + if "l2_" not in item + } + for item in sorted(rmse_val.keys()): + results[item] = rmse_val[item] + return results + + def log_loss_valid(_task_key="Default"): + single_results = {} + sum_natoms = 0 + if not self.multi_task: + valid_numb_batch = self.valid_numb_batch + else: + valid_numb_batch = self.valid_numb_batch[_task_key] + for ii in range(valid_numb_batch): + self.optimizer.zero_grad() + input_dict, label_dict, _ = self.get_data( + is_train=False, task_key=_task_key + ) + if input_dict == {}: + # no validation data + return {} + _, loss, more_loss = self.wrapper( + **input_dict, + cur_lr=pref_lr, + label=label_dict, + task_key=_task_key, + ) + # more_loss.update({"rmse": math.sqrt(loss)}) + natoms = int(input_dict["atype"].shape[-1]) + sum_natoms += natoms + for k, v in more_loss.items(): + if "l2_" not in k: + single_results[k] = ( + single_results.get(k, 0.0) + v * natoms + ) + results = {k: v / sum_natoms for k, v in single_results.items()} + return results + + if not self.multi_task: + train_results = log_loss_train(loss, more_loss) + valid_results = log_loss_valid() + if self.rank == 0: + log.info( + format_training_message_per_task( + batch=_step_id, + task_name="trn", + rmse=train_results, + learning_rate=cur_lr, + ) + ) + if valid_results: + log.info( + format_training_message_per_task( + batch=_step_id, + task_name="val", + rmse=valid_results, + learning_rate=None, + ) + ) + else: + train_results = {_key: {} for _key in self.model_keys} + valid_results = {_key: {} for _key in self.model_keys} + train_results[task_key] = log_loss_train( + loss, more_loss, _task_key=task_key + ) + for _key in self.model_keys: + if _key != task_key: + self.optimizer.zero_grad() + input_dict, label_dict, _ = self.get_data( + is_train=True, task_key=_key + ) + _, loss, more_loss = self.wrapper( + **input_dict, + cur_lr=pref_lr, + label=label_dict, + task_key=_key, + ) + train_results[_key] = log_loss_train( + loss, more_loss, _task_key=_key + ) + valid_results[_key] = log_loss_valid(_task_key=_key) + if self.rank == 0: + log.info( + format_training_message_per_task( + batch=_step_id, + task_name=_key + "_trn", + rmse=train_results[_key], + learning_rate=cur_lr, + ) + ) + if valid_results is not None and valid_results[_key]: + log.info( + format_training_message_per_task( + batch=_step_id, + task_name=_key + "_val", + rmse=valid_results[_key], + learning_rate=None, + ) + ) + + current_time = time.time() + train_time = current_time - self.t0 + self.t0 = current_time + if self.rank == 0: + log.info( + format_training_message( + batch=_step_id, + wall_time=train_time, + ) + ) + + if fout: + if self.lcurve_should_print_header: + self.print_header(fout, train_results, valid_results) + self.lcurve_should_print_header = False + self.print_on_training( + fout, _step_id, cur_lr, train_results, valid_results + ) + + if ( + ((_step_id + 1) % self.save_freq == 0 and _step_id != self.start_step) + or (_step_id + 1) == self.num_steps + ) and (self.rank == 0 or dist.get_rank() == 0): + # Handle the case if rank 0 aborted and re-assigned + self.latest_model = Path(self.save_ckpt + f"-{_step_id + 1}.pt") + + module = self.wrapper.module if dist.is_initialized() else self.wrapper + self.save_model(self.latest_model, lr=cur_lr, step=_step_id) + log.info(f"Saved model to {self.latest_model}") + symlink_prefix_files(self.latest_model.stem, self.save_ckpt) + with open("checkpoint", "w") as f: + f.write(str(self.latest_model)) + + # tensorboard + if self.enable_tensorboard and _step_id % self.tensorboard_freq == 0: + writer.add_scalar(f"{task_key}/lr", cur_lr, _step_id) + writer.add_scalar(f"{task_key}/loss", loss, _step_id) + for item in more_loss: + writer.add_scalar(f"{task_key}/{item}", more_loss[item], _step_id) + + self.t0 = time.time() + for step_id in range(self.num_steps): + if step_id < self.start_step: + continue + if self.multi_task: + chosen_index_list = dp_random.choice( + np.arange(self.num_model), + p=np.array(self.model_prob), + size=self.world_size, + replace=True, + ) + assert chosen_index_list.size == self.world_size + model_index = chosen_index_list[self.rank] + model_key = self.model_keys[model_index] + else: + model_key = "Default" + step(step_id, model_key) + if JIT: + break + + if ( + self.rank == 0 or dist.get_rank() == 0 + ): # Handle the case if rank 0 aborted and re-assigned + if self.num_steps == 0: + # when num_steps is 0, the checkpoint is never not saved + self.latest_model = Path(self.save_ckpt + "-0.pt") + self.save_model(self.latest_model, lr=0, step=0) + log.info(f"Saved model to {self.latest_model}") + symlink_prefix_files(self.latest_model.stem, self.save_ckpt) + with open("checkpoint", "w") as f: + f.write(str(self.latest_model)) + + if JIT: + pth_model_path = ( + "frozen_model.pth" # We use .pth to denote the frozen model + ) + self.model.save(pth_model_path) + log.info( + f"Frozen model for inferencing has been saved to {pth_model_path}" + ) + log.info(f"Trained model has been saved to: {self.save_ckpt}") + + if fout: + fout.close() + if SAMPLER_RECORD: + fout1.close() + if self.enable_tensorboard: + writer.close() + if self.enable_profiler: + prof.stop() + + def save_model(self, save_path, lr=0.0, step=0): + module = self.wrapper.module if dist.is_initialized() else self.wrapper + module.train_infos["lr"] = lr + module.train_infos["step"] = step + torch.save( + {"model": module.state_dict(), "optimizer": self.optimizer.state_dict()}, + save_path, + ) + checkpoint_dir = save_path.parent + checkpoint_files = [ + f + for f in checkpoint_dir.glob("*.pt") + if not f.is_symlink() and f.name.startswith(self.save_ckpt) + ] + if len(checkpoint_files) > self.max_ckpt_keep: + checkpoint_files.sort(key=lambda x: x.stat().st_mtime) + checkpoint_files[0].unlink() + + def get_data(self, is_train=True, task_key="Default"): + if not self.multi_task: + if is_train: + try: + batch_data = next(iter(self.training_data)) + except StopIteration: + # Refresh the status of the dataloader to start from a new epoch + with torch.device("cpu"): + self.training_data = BufferedIterator( + iter(self.training_dataloader) + ) + batch_data = next(iter(self.training_data)) + else: + if self.validation_data is None: + return {}, {}, {} + try: + batch_data = next(iter(self.validation_data)) + except StopIteration: + self.validation_data = BufferedIterator( + iter(self.validation_dataloader) + ) + batch_data = next(iter(self.validation_data)) + else: + if is_train: + try: + batch_data = next(iter(self.training_data[task_key])) + except StopIteration: + # Refresh the status of the dataloader to start from a new epoch + self.training_data[task_key] = BufferedIterator( + iter(self.training_dataloader[task_key]) + ) + batch_data = next(iter(self.training_data[task_key])) + else: + if self.validation_data[task_key] is None: + return {}, {}, {} + try: + batch_data = next(iter(self.validation_data[task_key])) + except StopIteration: + self.validation_data[task_key] = BufferedIterator( + iter(self.validation_dataloader[task_key]) + ) + batch_data = next(iter(self.validation_data[task_key])) + + for key in batch_data.keys(): + if key == "sid" or key == "fid" or key == "box": + continue + elif not isinstance(batch_data[key], list): + if batch_data[key] is not None: + batch_data[key] = batch_data[key].to(DEVICE, non_blocking=True) + else: + batch_data[key] = [ + item.to(DEVICE, non_blocking=True) for item in batch_data[key] + ] + # we may need a better way to classify which are inputs and which are labels + # now wrapper only supports the following inputs: + input_keys = [ + "coord", + "atype", + "spin", + "box", + "fparam", + "aparam", + ] + input_dict = {item_key: None for item_key in input_keys} + label_dict = {} + for item_key in batch_data: + if item_key in input_keys: + input_dict[item_key] = batch_data[item_key] + else: + if item_key not in ["sid", "fid"]: + label_dict[item_key] = batch_data[item_key] + log_dict = {} + if "fid" in batch_data: + log_dict["fid"] = batch_data["fid"] + log_dict["sid"] = batch_data["sid"] + return input_dict, label_dict, log_dict + + def print_header(self, fout, train_results, valid_results): + train_keys = sorted(train_results.keys()) + print_str = "" + print_str += "# %5s" % "step" + if not self.multi_task: + if valid_results is not None: + prop_fmt = " %11s %11s" + for k in train_keys: + print_str += prop_fmt % (k + "_val", k + "_trn") + else: + prop_fmt = " %11s" + for k in train_keys: + print_str += prop_fmt % (k + "_trn") + else: + for model_key in self.model_keys: + if valid_results[model_key] is not None: + prop_fmt = " %11s %11s" + for k in sorted(train_results[model_key].keys()): + print_str += prop_fmt % ( + k + f"_val_{model_key}", + k + f"_trn_{model_key}", + ) + else: + prop_fmt = " %11s" + for k in sorted(train_results[model_key].keys()): + print_str += prop_fmt % (k + f"_trn_{model_key}") + print_str += " %8s\n" % "lr" + print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n" + fout.write(print_str) + fout.flush() + + def print_on_training(self, fout, step_id, cur_lr, train_results, valid_results): + train_keys = sorted(train_results.keys()) + print_str = "" + print_str += "%7d" % step_id + if not self.multi_task: + if valid_results: + prop_fmt = " %11.2e %11.2e" + for k in train_keys: + print_str += prop_fmt % (valid_results[k], train_results[k]) + else: + prop_fmt = " %11.2e" + for k in train_keys: + print_str += prop_fmt % (train_results[k]) + else: + for model_key in self.model_keys: + if valid_results[model_key]: + prop_fmt = " %11.2e %11.2e" + for k in sorted(valid_results[model_key].keys()): + print_str += prop_fmt % ( + valid_results[model_key][k], + train_results[model_key][k], + ) + else: + prop_fmt = " %11.2e" + for k in sorted(train_results[model_key].keys()): + print_str += prop_fmt % (train_results[model_key][k]) + print_str += " %8.1e\n" % cur_lr + fout.write(print_str) + fout.flush() diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py new file mode 100644 index 0000000000..6bc7cdc87a --- /dev/null +++ b/deepmd/pt/train/wrapper.py @@ -0,0 +1,196 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + Dict, + Optional, + Union, +) + +import torch + +if torch.__version__.startswith("2"): + import torch._dynamo + + +log = logging.getLogger(__name__) + + +class ModelWrapper(torch.nn.Module): + def __init__( + self, + model: Union[torch.nn.Module, Dict], + loss: Union[torch.nn.Module, Dict] = None, + model_params=None, + shared_links=None, + ): + """Construct a DeePMD model wrapper. + + Args: + - config: The Dict-like configuration with training options. + """ + super().__init__() + self.model_params = model_params if model_params is not None else {} + self.train_infos = { + "lr": 0, + "step": 0, + } + self.multi_task = False + self.model = torch.nn.ModuleDict() + # Model + if isinstance(model, torch.nn.Module): + self.model["Default"] = model + elif isinstance(model, dict): + self.multi_task = True + for task_key in model: + assert isinstance( + model[task_key], torch.nn.Module + ), f"{task_key} in model_dict is not a torch.nn.Module!" + self.model[task_key] = model[task_key] + # Loss + self.loss = None + if loss is not None: + self.loss = torch.nn.ModuleDict() + if isinstance(loss, torch.nn.Module): + self.loss["Default"] = loss + elif isinstance(loss, dict): + for task_key in loss: + assert isinstance( + loss[task_key], torch.nn.Module + ), f"{task_key} in loss_dict is not a torch.nn.Module!" + self.loss[task_key] = loss[task_key] + self.inference_only = self.loss is None + + def share_params(self, shared_links, resume=False): + """ + Share the parameters of classes following rules defined in shared_links during multitask training. + If not start from checkpoint (resume is False), + some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + supported_types = ["descriptor", "fitting_net"] + for shared_item in shared_links: + class_name = shared_links[shared_item]["type"] + shared_base = shared_links[shared_item]["links"][0] + class_type_base = shared_base["shared_type"] + model_key_base = shared_base["model_key"] + shared_level_base = shared_base["shared_level"] + if "descriptor" in class_type_base: + if class_type_base == "descriptor": + base_class = self.model[model_key_base].get_descriptor() + elif "hybrid" in class_type_base: + hybrid_index = int(class_type_base.split("_")[-1]) + base_class = ( + self.model[model_key_base] + .get_descriptor() + .descriptor_list[hybrid_index] + ) + else: + raise RuntimeError(f"Unknown class_type {class_type_base}!") + for link_item in shared_links[shared_item]["links"][1:]: + class_type_link = link_item["shared_type"] + model_key_link = link_item["model_key"] + shared_level_link = int(link_item["shared_level"]) + assert ( + shared_level_link >= shared_level_base + ), "The shared_links must be sorted by shared_level!" + assert ( + "descriptor" in class_type_link + ), f"Class type mismatched: {class_type_base} vs {class_type_link}!" + if class_type_link == "descriptor": + link_class = self.model[model_key_link].get_descriptor() + elif "hybrid" in class_type_link: + hybrid_index = int(class_type_link.split("_")[-1]) + link_class = ( + self.model[model_key_link] + .get_descriptor() + .descriptor_list[hybrid_index] + ) + else: + raise RuntimeError(f"Unknown class_type {class_type_link}!") + link_class.share_params( + base_class, shared_level_link, resume=resume + ) + log.warning( + f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!" + ) + else: + if hasattr(self.model[model_key_base], class_type_base): + base_class = self.model[model_key_base].__getattr__(class_type_base) + for link_item in shared_links[shared_item]["links"][1:]: + class_type_link = link_item["shared_type"] + model_key_link = link_item["model_key"] + shared_level_link = int(link_item["shared_level"]) + assert ( + shared_level_link >= shared_level_base + ), "The shared_links must be sorted by shared_level!" + assert ( + class_type_base == class_type_link + ), f"Class type mismatched: {class_type_base} vs {class_type_link}!" + link_class = self.model[model_key_link].__getattr__( + class_type_link + ) + link_class.share_params( + base_class, shared_level_link, resume=resume + ) + log.warning( + f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!" + ) + + def forward( + self, + coord, + atype, + spin: Optional[torch.Tensor] = None, + box: Optional[torch.Tensor] = None, + cur_lr: Optional[torch.Tensor] = None, + label: Optional[torch.Tensor] = None, + task_key: Optional[torch.Tensor] = None, + inference_only=False, + do_atomic_virial=False, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + if not self.multi_task: + task_key = "Default" + else: + assert ( + task_key is not None + ), f"Multitask model must specify the inference task! Supported tasks are {list(self.model.keys())}." + input_dict = { + "coord": coord, + "atype": atype, + "box": box, + "do_atomic_virial": do_atomic_virial, + "fparam": fparam, + "aparam": aparam, + } + has_spin = getattr(self.model[task_key], "has_spin", False) + if callable(has_spin): + has_spin = has_spin() + if has_spin: + input_dict["spin"] = spin + + if self.inference_only or inference_only: + model_pred = self.model[task_key](**input_dict) + return model_pred, None, None + else: + natoms = atype.shape[-1] + model_pred, loss, more_loss = self.loss[task_key]( + input_dict, + self.model[task_key], + label, + natoms=natoms, + learning_rate=cur_lr, + ) + return model_pred, loss, more_loss + + def set_extra_state(self, state: Dict): + self.model_params = state["model_params"] + self.train_infos = state["train_infos"] + return None + + def get_extra_state(self) -> Dict: + state = { + "model_params": self.model_params, + "train_infos": self.train_infos, + } + return state diff --git a/deepmd/pt/utils/__init__.py b/deepmd/pt/utils/__init__.py new file mode 100644 index 0000000000..7e1043eda4 --- /dev/null +++ b/deepmd/pt/utils/__init__.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +from .exclude_mask import ( + AtomExcludeMask, + PairExcludeMask, +) + +__all__ = [ + "PairExcludeMask", + "AtomExcludeMask", +] diff --git a/deepmd/pt/utils/ase_calc.py b/deepmd/pt/utils/ase_calc.py new file mode 100644 index 0000000000..6bcb9cdc5e --- /dev/null +++ b/deepmd/pt/utils/ase_calc.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.calculator import DP as DPCalculator + +__all__ = [ + "DPCalculator", +] diff --git a/deepmd/pt/utils/auto_batch_size.py b/deepmd/pt/utils/auto_batch_size.py new file mode 100644 index 0000000000..13264a336c --- /dev/null +++ b/deepmd/pt/utils/auto_batch_size.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase + + +class AutoBatchSize(AutoBatchSizeBase): + """Auto batch size. + + Parameters + ---------- + initial_batch_size : int, default: 1024 + initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE + is not set + factor : float, default: 2. + increased factor + + """ + + def __init__( + self, + initial_batch_size: int = 1024, + factor: float = 2.0, + ): + super().__init__( + initial_batch_size=initial_batch_size, + factor=factor, + ) + + def is_gpu_available(self) -> bool: + """Check if GPU is available. + + Returns + ------- + bool + True if GPU is available + """ + return torch.cuda.is_available() + + def is_oom_error(self, e: Exception) -> bool: + """Check if the exception is an OOM error. + + Parameters + ---------- + e : Exception + Exception + """ + return isinstance(e, RuntimeError) and "CUDA out of memory." in e.args[0] + + def execute_all( + self, callable: Callable, total_size: int, natoms: int, *args, **kwargs + ) -> Tuple[Union[np.ndarray, torch.Tensor]]: + """Excuate a method with all given data. + + Parameters + ---------- + callable : Callable + The method should accept *args and **kwargs as input and return the similiar array. + total_size : int + Total size + natoms : int + The number of atoms + *args + Variable length argument list. + **kwargs + If 2D np.ndarray or torch.Tensor, assume the first axis is batch; otherwise do nothing. + """ + + def execute_with_batch_size( + batch_size: int, start_index: int + ) -> Tuple[int, Tuple[torch.Tensor]]: + end_index = start_index + batch_size + end_index = min(end_index, total_size) + return (end_index - start_index), callable( + *[ + ( + vv[start_index:end_index] + if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor)) + and vv.ndim > 1 + else vv + ) + for vv in args + ], + **{ + kk: ( + vv[start_index:end_index] + if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor)) + and vv.ndim > 1 + else vv + ) + for kk, vv in kwargs.items() + }, + ) + + index = 0 + results = None + returned_dict = None + while index < total_size: + n_batch, result = self.execute(execute_with_batch_size, index, natoms) + returned_dict = ( + isinstance(result, dict) if returned_dict is None else returned_dict + ) + if not returned_dict: + result = (result,) if not isinstance(result, tuple) else result + index += n_batch + + def append_to_list(res_list, res): + if n_batch: + res_list.append(res) + return res_list + + if not returned_dict: + results = [] if results is None else results + results = append_to_list(results, result) + else: + results = ( + {kk: [] for kk in result.keys()} if results is None else results + ) + results = { + kk: append_to_list(results[kk], result[kk]) for kk in result.keys() + } + assert results is not None + assert returned_dict is not None + + def concate_result(r): + if isinstance(r[0], np.ndarray): + ret = np.concatenate(r, axis=0) + elif isinstance(r[0], torch.Tensor): + ret = torch.cat(r, dim=0) + else: + raise RuntimeError(f"Unexpected result type {type(r[0])}") + return ret + + if not returned_dict: + r_list = [concate_result(r) for r in zip(*results)] + r = tuple(r_list) + if len(r) == 1: + # avoid returning tuple if callable doesn't return tuple + r = r[0] + else: + r = {kk: concate_result(vv) for kk, vv in results.items()} + return r diff --git a/deepmd/pt/utils/cache.py b/deepmd/pt/utils/cache.py new file mode 100644 index 0000000000..c40c4050b7 --- /dev/null +++ b/deepmd/pt/utils/cache.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy as copy_lib +import functools + + +def lru_cache(maxsize=16, typed=False, copy=False, deepcopy=False): + if deepcopy: + + def decorator(f): + cached_func = functools.lru_cache(maxsize, typed)(f) + + @functools.wraps(f) + def wrapper(*args, **kwargs): + return copy_lib.deepcopy(cached_func(*args, **kwargs)) + + return wrapper + + elif copy: + + def decorator(f): + cached_func = functools.lru_cache(maxsize, typed)(f) + + @functools.wraps(f) + def wrapper(*args, **kwargs): + return copy_lib.copy(cached_func(*args, **kwargs)) + + return wrapper + + else: + decorator = functools.lru_cache(maxsize, typed) + return decorator diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py new file mode 100644 index 0000000000..361bc4b0b6 --- /dev/null +++ b/deepmd/pt/utils/dataloader.py @@ -0,0 +1,305 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +import os +import queue +import time +from multiprocessing.dummy import ( + Pool, +) +from threading import ( + Thread, +) +from typing import ( + List, +) + +import h5py +import numpy as np +import torch +import torch.distributed as dist +import torch.multiprocessing +from torch.utils.data import ( + DataLoader, + Dataset, + WeightedRandomSampler, +) +from torch.utils.data._utils.collate import ( + collate_tensor_fn, +) +from torch.utils.data.distributed import ( + DistributedSampler, +) + +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.dataset import ( + DeepmdDataSetForLoader, +) +from deepmd.utils.data import ( + DataRequirementItem, +) +from deepmd.utils.data_system import ( + print_summary, + prob_sys_size_ext, + process_sys_probs, +) + +log = logging.getLogger(__name__) +torch.multiprocessing.set_sharing_strategy("file_system") + + +def setup_seed(seed): + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + + +class DpLoaderSet(Dataset): + """A dataset for storing DataLoaders to multiple Systems. + + Parameters + ---------- + sys_path + Path to the data system + batch_size + Max frame count in a batch. + type_map + Gives the name of different atom types + seed + Random seed for dataloader + shuffle + If the data are shuffled (Only effective in serial mode. Always shuffle in distributed data parallelism) + """ + + def __init__( + self, + systems, + batch_size, + type_map, + seed=10, + shuffle=True, + ): + setup_seed(seed) + if isinstance(systems, str): + with h5py.File(systems) as file: + systems = [os.path.join(systems, item) for item in file.keys()] + + self.systems: List[DeepmdDataSetForLoader] = [] + if len(systems) >= 100: + log.info(f"Constructing DataLoaders from {len(systems)} systems") + + def construct_dataset(system): + return DeepmdDataSetForLoader( + system=system, + type_map=type_map, + ) + + with Pool( + os.cpu_count() + // (int(os.environ["LOCAL_WORLD_SIZE"]) if dist.is_initialized() else 1) + ) as pool: + self.systems = pool.map(construct_dataset, systems) + + self.sampler_list: List[DistributedSampler] = [] + self.index = [] + self.total_batch = 0 + + self.dataloaders = [] + self.batch_sizes = [] + if isinstance(batch_size, str): + if batch_size == "auto": + rule = 32 + elif batch_size.startswith("auto:"): + rule = int(batch_size.split(":")[1]) + else: + rule = None + log.error("Unsupported batch size type") + for ii in self.systems: + ni = ii._natoms + bsi = rule // ni + if bsi * ni < rule: + bsi += 1 + self.batch_sizes.append(bsi) + elif isinstance(batch_size, list): + self.batch_sizes = batch_size + else: + self.batch_sizes = batch_size * np.ones(len(systems), dtype=int) + assert len(self.systems) == len(self.batch_sizes) + for system, batch_size in zip(self.systems, self.batch_sizes): + if dist.is_initialized(): + system_sampler = DistributedSampler(system) + self.sampler_list.append(system_sampler) + else: + system_sampler = None + system_dataloader = DataLoader( + dataset=system, + batch_size=int(batch_size), + num_workers=0, # Should be 0 to avoid too many threads forked + sampler=system_sampler, + collate_fn=collate_batch, + shuffle=(not dist.is_initialized()) and shuffle, + ) + self.dataloaders.append(system_dataloader) + self.index.append(len(system_dataloader)) + self.total_batch += len(system_dataloader) + # Initialize iterator instances for DataLoader + self.iters = [] + with torch.device("cpu"): + for item in self.dataloaders: + self.iters.append(iter(item)) + + def set_noise(self, noise_settings): + # noise_settings['noise_type'] # "trunc_normal", "normal", "uniform" + # noise_settings['noise'] # float, default 1.0 + # noise_settings['noise_mode'] # "prob", "fix_num" + # noise_settings['mask_num'] # if "fix_num", int + # noise_settings['mask_prob'] # if "prob", float + # noise_settings['same_mask'] # coord and type same mask? + for system in self.systems: + system.set_noise(noise_settings) + + def __len__(self): + return len(self.dataloaders) + + def __getitem__(self, idx): + # log.warning(str(torch.distributed.get_rank())+" idx: "+str(idx)+" index: "+str(self.index[idx])) + try: + batch = next(self.iters[idx]) + except StopIteration: + self.iters[idx] = iter(self.dataloaders[idx]) + batch = next(self.iters[idx]) + batch["sid"] = idx + return batch + + def add_data_requirement(self, data_requirement: List[DataRequirementItem]): + """Add data requirement for each system in multiple systems.""" + for system in self.systems: + system.add_data_requirement(data_requirement) + + def print_summary( + self, + name: str, + prob: List[float], + ): + print_summary( + name, + len(self.systems), + [ss.system for ss in self.systems], + [ss._natoms for ss in self.systems], + self.batch_sizes, + [ + ss._data_system.get_sys_numb_batch(self.batch_sizes[ii]) + for ii, ss in enumerate(self.systems) + ], + prob, + [ss._data_system.pbc for ss in self.systems], + ) + + +_sentinel = object() +QUEUESIZE = 32 + + +class BackgroundConsumer(Thread): + def __init__(self, queue, source, max_len): + Thread.__init__(self) + self._queue = queue + self._source = source # Main DL iterator + self._max_len = max_len # + + def run(self): + for item in self._source: + self._queue.put(item) # Blocking if the queue is full + + # Signal the consumer we are done. + self._queue.put(_sentinel) + + +class BufferedIterator: + def __init__(self, iterable): + self._queue = queue.Queue(QUEUESIZE) + self._iterable = iterable + self._consumer = None + + self.start_time = time.time() + self.warning_time = None + self.total = len(iterable) + + def _create_consumer(self): + self._consumer = BackgroundConsumer(self._queue, self._iterable, self.total) + self._consumer.daemon = True + self._consumer.start() + + def __iter__(self): + return self + + def __len__(self): + return self.total + + def __next__(self): + # Create consumer if not created yet + if self._consumer is None: + self._create_consumer() + # Notify the user if there is a data loading bottleneck + if self._queue.qsize() < min(2, max(1, self._queue.maxsize // 2)): + if time.time() - self.start_time > 5 * 60: + if ( + self.warning_time is None + or time.time() - self.warning_time > 15 * 60 + ): + log.warning( + "Data loading buffer is empty or nearly empty. This may " + "indicate a data loading bottleneck, and increasing the " + "number of workers (--num-workers) may help." + ) + self.warning_time = time.time() + + # Get next example + item = self._queue.get() + if isinstance(item, Exception): + raise item + if item is _sentinel: + raise StopIteration + return item + + +def collate_batch(batch): + example = batch[0] + result = {} + for key in example.keys(): + if "find_" in key: + result[key] = batch[0][key] + else: + if batch[0][key] is None: + result[key] = None + elif key == "fid": + result[key] = [d[key] for d in batch] + elif key == "type": + continue + else: + result[key] = collate_tensor_fn( + [torch.as_tensor(d[key]) for d in batch] + ) + return result + + +def get_weighted_sampler(training_data, prob_style, sys_prob=False): + if sys_prob is False: + if prob_style == "prob_uniform": + prob_v = 1.0 / float(training_data.__len__()) + probs = [prob_v for ii in range(training_data.__len__())] + else: # prob_sys_size;A:B:p1;C:D:p2 or prob_sys_size = prob_sys_size;0:nsys:1.0 + if prob_style == "prob_sys_size": + style = f"prob_sys_size;0:{len(training_data)}:1.0" + else: + style = prob_style + probs = prob_sys_size_ext(style, len(training_data), training_data.index) + else: + probs = process_sys_probs(prob_style, training_data.index) + log.debug("Generated weighted sampler with prob array: " + str(probs)) + # training_data.total_batch is the size of one epoch, you can increase it to avoid too many rebuilding of iteraters + len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1) + with torch.device("cpu"): + sampler = WeightedRandomSampler(probs, len_sampler, replacement=True) + return sampler diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py new file mode 100644 index 0000000000..dbe4d92a0f --- /dev/null +++ b/deepmd/pt/utils/dataset.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from typing import ( + List, + Optional, +) + +from torch.utils.data import ( + Dataset, +) + +from deepmd.utils.data import ( + DataRequirementItem, + DeepmdData, +) + + +class DeepmdDataSetForLoader(Dataset): + def __init__(self, system: str, type_map: Optional[List[str]] = None): + """Construct DeePMD-style dataset containing frames cross different systems. + + Args: + - systems: Paths to systems. + - type_map: Atom types. + """ + self.system = system + self._type_map = type_map + self._data_system = DeepmdData(sys_path=system, type_map=self._type_map) + self.mixed_type = self._data_system.mixed_type + self._ntypes = self._data_system.get_ntypes() + self._natoms = self._data_system.get_natoms() + self._natoms_vec = self._data_system.get_natoms_vec(self._ntypes) + + def __len__(self): + return self._data_system.nframes + + def __getitem__(self, index): + """Get a frame from the selected system.""" + b_data = self._data_system.get_item_torch(index) + b_data["natoms"] = self._natoms_vec + return b_data + + def add_data_requirement(self, data_requirement: List[DataRequirementItem]): + """Add data requirement for this data system.""" + for data_item in data_requirement: + self._data_system.add( + data_item["key"], + data_item["ndof"], + atomic=data_item["atomic"], + must=data_item["must"], + high_prec=data_item["high_prec"], + type_sel=data_item["type_sel"], + repeat=data_item["repeat"], + default=data_item["default"], + dtype=data_item["dtype"], + output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"], + ) diff --git a/deepmd/pt/utils/dp_random.py b/deepmd/pt/utils/dp_random.py new file mode 100644 index 0000000000..e81488c506 --- /dev/null +++ b/deepmd/pt/utils/dp_random.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.utils.random import ( + choice, + random, + seed, + shuffle, +) + +__all__ = [ + "choice", + "random", + "seed", + "shuffle", +] diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py new file mode 100644 index 0000000000..d841a9b73c --- /dev/null +++ b/deepmd/pt/utils/env.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import os + +import numpy as np +import torch + +from deepmd.common import ( + VALID_PRECISION, +) +from deepmd.env import ( + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, + get_default_nthreads, + set_default_nthreads, +) + +SAMPLER_RECORD = os.environ.get("SAMPLER_RECORD", False) +try: + # only linux + ncpus = len(os.sched_getaffinity(0)) +except AttributeError: + ncpus = os.cpu_count() +NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(8, ncpus))) +# Make sure DDP uses correct device if applicable +LOCAL_RANK = os.environ.get("LOCAL_RANK") +LOCAL_RANK = int(0 if LOCAL_RANK is None else LOCAL_RANK) + +if os.environ.get("DEVICE") == "cpu" or torch.cuda.is_available() is False: + DEVICE = torch.device("cpu") +else: + DEVICE = torch.device(f"cuda:{LOCAL_RANK}") + +JIT = False +CACHE_PER_SYS = 5 # keep at most so many sets per sys in memory +ENERGY_BIAS_TRAINABLE = True + +PRECISION_DICT = { + "float16": torch.float16, + "float32": torch.float32, + "float64": torch.float64, + "half": torch.float16, + "single": torch.float32, + "double": torch.float64, + "int32": torch.int32, + "int64": torch.int64, + "bfloat16": torch.bfloat16, +} +GLOBAL_PT_FLOAT_PRECISION = PRECISION_DICT[np.dtype(GLOBAL_NP_FLOAT_PRECISION).name] +GLOBAL_PT_ENER_FLOAT_PRECISION = PRECISION_DICT[ + np.dtype(GLOBAL_ENER_FLOAT_PRECISION).name +] +PRECISION_DICT["default"] = GLOBAL_PT_FLOAT_PRECISION +assert VALID_PRECISION.issubset(PRECISION_DICT.keys()) +# cannot automatically generated +RESERVED_PRECISON_DICT = { + torch.float16: "float16", + torch.float32: "float32", + torch.float64: "float64", + torch.int32: "int32", + torch.int64: "int64", + torch.bfloat16: "bfloat16", +} +assert set(PRECISION_DICT.values()) == set(RESERVED_PRECISON_DICT.keys()) +DEFAULT_PRECISION = "float64" + +# throw warnings if threads not set +set_default_nthreads() +inter_nthreads, intra_nthreads = get_default_nthreads() +if inter_nthreads > 0: # the behavior of 0 is not documented + torch.set_num_interop_threads(inter_nthreads) +if intra_nthreads > 0: + torch.set_num_threads(intra_nthreads) + +__all__ = [ + "GLOBAL_ENER_FLOAT_PRECISION", + "GLOBAL_NP_FLOAT_PRECISION", + "GLOBAL_PT_FLOAT_PRECISION", + "GLOBAL_PT_ENER_FLOAT_PRECISION", + "DEFAULT_PRECISION", + "PRECISION_DICT", + "RESERVED_PRECISON_DICT", + "SAMPLER_RECORD", + "NUM_WORKERS", + "DEVICE", + "JIT", + "CACHE_PER_SYS", + "ENERGY_BIAS_TRAINABLE", + "LOCAL_RANK", +] diff --git a/deepmd/pt/utils/env_mat_stat.py b/deepmd/pt/utils/env_mat_stat.py new file mode 100644 index 0000000000..47e17e9eaa --- /dev/null +++ b/deepmd/pt/utils/env_mat_stat.py @@ -0,0 +1,234 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + Dict, + Iterator, + List, + Tuple, + Union, +) + +import numpy as np +import torch + +from deepmd.common import ( + get_hash, +) +from deepmd.pt.model.descriptor.env_mat import ( + prod_env_mat, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.exclude_mask import ( + PairExcludeMask, +) +from deepmd.pt.utils.nlist import ( + extend_input_and_build_neighbor_list, +) +from deepmd.utils.env_mat_stat import EnvMatStat as BaseEnvMatStat +from deepmd.utils.env_mat_stat import ( + StatItem, +) + +if TYPE_CHECKING: + from deepmd.pt.model.descriptor import ( + DescriptorBlock, + ) + + +class EnvMatStat(BaseEnvMatStat): + def compute_stat(self, env_mat: Dict[str, torch.Tensor]) -> Dict[str, StatItem]: + """Compute the statistics of the environment matrix for a single system. + + Parameters + ---------- + env_mat : torch.Tensor + The environment matrix. + + Returns + ------- + Dict[str, StatItem] + The statistics of the environment matrix. + """ + stats = {} + for kk, vv in env_mat.items(): + stats[kk] = StatItem( + number=vv.numel(), + sum=vv.sum().item(), + squared_sum=torch.square(vv).sum().item(), + ) + return stats + + +class EnvMatStatSe(EnvMatStat): + """Environmental matrix statistics for the se_a/se_r environemntal matrix. + + Parameters + ---------- + descriptor : DescriptorBlock + The descriptor of the model. + """ + + def __init__(self, descriptor: "DescriptorBlock"): + super().__init__() + self.descriptor = descriptor + self.last_dim = ( + self.descriptor.ndescrpt // self.descriptor.nnei + ) # se_r=1, se_a=4 + + def iter( + self, data: List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]] + ) -> Iterator[Dict[str, StatItem]]: + """Get the iterator of the environment matrix. + + Parameters + ---------- + data : List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]] + The data. + + Yields + ------ + Dict[str, StatItem] + The statistics of the environment matrix. + """ + zero_mean = torch.zeros( + self.descriptor.get_ntypes(), + self.descriptor.get_nsel(), + self.last_dim, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, + ) + one_stddev = torch.ones( + self.descriptor.get_ntypes(), + self.descriptor.get_nsel(), + self.last_dim, + dtype=env.GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, + ) + if self.last_dim == 4: + radial_only = False + elif self.last_dim == 1: + radial_only = True + else: + raise ValueError( + "last_dim should be 1 for raial-only or 4 for full descriptor." + ) + for system in data: + coord, atype, box, natoms = ( + system["coord"], + system["atype"], + system["box"], + system["natoms"], + ) + ( + extended_coord, + extended_atype, + mapping, + nlist, + ) = extend_input_and_build_neighbor_list( + coord, + atype, + self.descriptor.get_rcut(), + self.descriptor.get_sel(), + mixed_types=self.descriptor.mixed_types(), + box=box, + ) + env_mat, _, _ = prod_env_mat( + extended_coord, + nlist, + atype, + zero_mean, + one_stddev, + self.descriptor.get_rcut(), + # TODO: export rcut_smth from DescriptorBlock + self.descriptor.rcut_smth, + radial_only, + protection=self.descriptor.env_protection, + ) + # reshape to nframes * nloc at the atom level, + # so nframes/mixed_type do not matter + env_mat = env_mat.view( + coord.shape[0] * coord.shape[1], + self.descriptor.get_nsel(), + self.last_dim, + ) + atype = atype.view(coord.shape[0] * coord.shape[1]) + # (1, nloc) eq (ntypes, 1), so broadcast is possible + # shape: (ntypes, nloc) + type_idx = torch.eq( + atype.view(1, -1), + torch.arange( + self.descriptor.get_ntypes(), device=env.DEVICE, dtype=torch.int32 + ).view(-1, 1), + ) + if "pair_exclude_types" in system: + # shape: (1, nloc, nnei) + exclude_mask = PairExcludeMask( + self.descriptor.get_ntypes(), system["pair_exclude_types"] + )(nlist, extended_atype).view(1, coord.shape[0] * coord.shape[1], -1) + # shape: (ntypes, nloc, nnei) + type_idx = torch.logical_and(type_idx.unsqueeze(-1), exclude_mask) + for type_i in range(self.descriptor.get_ntypes()): + dd = env_mat[type_idx[type_i]] + dd = dd.reshape([-1, self.last_dim]) # typen_atoms * unmasked_nnei, 4 + env_mats = {} + env_mats[f"r_{type_i}"] = dd[:, :1] + if self.last_dim == 4: + env_mats[f"a_{type_i}"] = dd[:, 1:] + yield self.compute_stat(env_mats) + + def get_hash(self) -> str: + """Get the hash of the environment matrix. + + Returns + ------- + str + The hash of the environment matrix. + """ + dscpt_type = "se_a" if self.last_dim == 4 else "se_r" + return get_hash( + { + "type": dscpt_type, + "ntypes": self.descriptor.get_ntypes(), + "rcut": round(self.descriptor.get_rcut(), 2), + "rcut_smth": round(self.descriptor.rcut_smth, 2), + "nsel": self.descriptor.get_nsel(), + "sel": self.descriptor.get_sel(), + "mixed_types": self.descriptor.mixed_types(), + } + ) + + def __call__(self): + avgs = self.get_avg() + stds = self.get_std() + + all_davg = [] + all_dstd = [] + + for type_i in range(self.descriptor.get_ntypes()): + if self.last_dim == 4: + davgunit = [[avgs[f"r_{type_i}"], 0, 0, 0]] + dstdunit = [ + [ + stds[f"r_{type_i}"], + stds[f"a_{type_i}"], + stds[f"a_{type_i}"], + stds[f"a_{type_i}"], + ] + ] + elif self.last_dim == 1: + davgunit = [[avgs[f"r_{type_i}"]]] + dstdunit = [ + [ + stds[f"r_{type_i}"], + ] + ] + davg = np.tile(davgunit, [self.descriptor.get_nsel(), 1]) + dstd = np.tile(dstdunit, [self.descriptor.get_nsel(), 1]) + all_davg.append(davg) + all_dstd.append(dstd) + + mean = np.stack(all_davg) + stddev = np.stack(all_dstd) + return mean, stddev diff --git a/deepmd/pt/utils/exclude_mask.py b/deepmd/pt/utils/exclude_mask.py new file mode 100644 index 0000000000..9ddae3a416 --- /dev/null +++ b/deepmd/pt/utils/exclude_mask.py @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Set, + Tuple, +) + +import numpy as np +import torch + +from deepmd.pt.utils.utils import ( + to_torch_tensor, +) + + +class AtomExcludeMask(torch.nn.Module): + """Computes the type exclusion mask for atoms.""" + + def __init__( + self, + ntypes: int, + exclude_types: List[int] = [], + ): + super().__init__() + self.reinit(ntypes, exclude_types) + + def reinit( + self, + ntypes: int, + exclude_types: List[int] = [], + ): + self.ntypes = ntypes + self.exclude_types = exclude_types + self.type_mask = np.array( + [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)], + dtype=np.int32, + ) + self.type_mask = to_torch_tensor(self.type_mask).view([-1]) + + def get_exclude_types(self): + return self.exclude_types + + def get_type_mask(self): + return self.type_mask + + def forward( + self, + atype: torch.Tensor, + ) -> torch.Tensor: + """Compute type exclusion mask for atoms. + + Parameters + ---------- + atype + The extended atom types. shape: nf x natom + + Returns + ------- + mask + The type exclusion mask for atoms. shape: nf x natom + Element [ff,ii] being 0 if type(ii) is excluded, + otherwise being 1. + + """ + nf, natom = atype.shape + return self.type_mask[atype].view(nf, natom) + + +class PairExcludeMask(torch.nn.Module): + """Computes the type exclusion mask for atom pairs.""" + + def __init__( + self, + ntypes: int, + exclude_types: List[Tuple[int, int]] = [], + ): + super().__init__() + self.reinit(ntypes, exclude_types) + + def reinit( + self, + ntypes: int, + exclude_types: List[Tuple[int, int]] = [], + ): + self.ntypes = ntypes + self._exclude_types: Set[Tuple[int, int]] = set() + for tt in exclude_types: + assert len(tt) == 2 + self._exclude_types.add((tt[0], tt[1])) + self._exclude_types.add((tt[1], tt[0])) + # ntypes + 1 for nlist masks + self.type_mask = np.array( + [ + [ + 1 if (tt_i, tt_j) not in self._exclude_types else 0 + for tt_i in range(ntypes + 1) + ] + for tt_j in range(ntypes + 1) + ], + dtype=np.int32, + ) + # (ntypes+1 x ntypes+1) + self.type_mask = to_torch_tensor(self.type_mask).view([-1]) + self.no_exclusion = len(self._exclude_types) == 0 + + def get_exclude_types(self): + return self._exclude_types + + # may have a better place for this method... + def forward( + self, + nlist: torch.Tensor, + atype_ext: torch.Tensor, + ) -> torch.Tensor: + """Compute type exclusion mask. + + Parameters + ---------- + nlist + The neighbor list. shape: nf x nloc x nnei + atype_ext + The extended aotm types. shape: nf x nall + + Returns + ------- + mask + The type exclusion mask of shape: nf x nloc x nnei. + Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded, + otherwise being 1. + + """ + if self.no_exclusion: + # safely return 1 if nothing is excluded. + return torch.ones_like(nlist, dtype=torch.int32, device=nlist.device) + nf, nloc, nnei = nlist.shape + nall = atype_ext.shape[1] + # add virtual atom of type ntypes. nf x nall+1 + ae = torch.cat( + [ + atype_ext, + self.ntypes + * torch.ones([nf, 1], dtype=atype_ext.dtype, device=atype_ext.device), + ], + dim=-1, + ) + type_i = atype_ext[:, :nloc].view(nf, nloc) * (self.ntypes + 1) + # nf x nloc x nnei + index = torch.where(nlist == -1, nall, nlist).view(nf, nloc * nnei) + type_j = torch.gather(ae, 1, index).view(nf, nloc, nnei) + type_ij = type_i[:, :, None] + type_j + # nf x (nloc x nnei) + type_ij = type_ij.view(nf, nloc * nnei) + mask = self.type_mask[type_ij].view(nf, nloc, nnei) + return mask diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py new file mode 100644 index 0000000000..2de4214070 --- /dev/null +++ b/deepmd/pt/utils/finetune.py @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from copy import ( + deepcopy, +) + +import torch + +from deepmd.pt.utils import ( + env, +) + +log = logging.getLogger(__name__) + + +def change_finetune_model_params_single( + _single_param_target, + _model_param_pretrained, + from_multitask=False, + model_branch="Default", + model_branch_from="", +): + single_config = deepcopy(_single_param_target) + trainable_param = { + "descriptor": True, + "fitting_net": True, + } + for net_type in trainable_param: + if net_type in single_config: + trainable_param[net_type] = single_config[net_type].get("trainable", True) + if not from_multitask: + old_type_map, new_type_map = ( + _model_param_pretrained["type_map"], + single_config["type_map"], + ) + assert set(new_type_map).issubset( + old_type_map + ), "Only support for smaller type map when finetuning or resuming." + single_config = deepcopy(_model_param_pretrained) + log.info( + f"Change the '{model_branch}' model configurations according to the pretrained one..." + ) + single_config["new_type_map"] = new_type_map + else: + model_dict_params = _model_param_pretrained["model_dict"] + new_fitting = False + if model_branch_from == "": + model_branch_chosen = next(iter(model_dict_params.keys())) + new_fitting = True + single_config["bias_adjust_mode"] = ( + "set-by-statistic" # fitting net re-init + ) + log.warning( + "The fitting net will be re-init instead of using that in the pretrained model! " + "The bias_adjust_mode will be set-by-statistic!" + ) + else: + model_branch_chosen = model_branch_from + assert model_branch_chosen in model_dict_params, ( + f"No model branch named '{model_branch_chosen}'! " + f"Available ones are {list(model_dict_params.keys())}." + ) + single_config_chosen = deepcopy(model_dict_params[model_branch_chosen]) + old_type_map, new_type_map = ( + single_config_chosen["type_map"], + single_config["type_map"], + ) + assert set(new_type_map).issubset( + old_type_map + ), "Only support for smaller type map when finetuning or resuming." + for key_item in ["type_map", "descriptor"]: + if key_item in single_config_chosen: + single_config[key_item] = single_config_chosen[key_item] + if not new_fitting: + single_config["fitting_net"] = single_config_chosen["fitting_net"] + log.info( + f"Change the '{model_branch}' model configurations according to the model branch " + f"'{model_branch_chosen}' in the pretrained one..." + ) + single_config["new_type_map"] = new_type_map + single_config["model_branch_chosen"] = model_branch_chosen + single_config["new_fitting"] = new_fitting + for net_type in trainable_param: + if net_type in single_config: + single_config[net_type]["trainable"] = trainable_param[net_type] + else: + single_config[net_type] = {"trainable": trainable_param[net_type]} + return single_config + + +def change_finetune_model_params(finetune_model, model_config, model_branch=""): + """ + Load model_params according to the pretrained one. + This function modifies the fine-tuning input in different modes as follows: + 1. Single-task fine-tuning from a single-task pretrained model: + - Updates the model parameters based on the pretrained model. + 2. Single-task fine-tuning from a multi-task pretrained model: + - Updates the model parameters based on the selected branch in the pretrained model. + - The chosen branch can be defined from the command-line or `finetune_head` input parameter. + - If not defined, model parameters in the fitting network will be randomly initialized. + 3. Multi-task fine-tuning from a single-task pretrained model: + - Updates model parameters in each branch based on the single branch ('Default') in the pretrained model. + - If `finetune_head` is not set to 'Default', + model parameters in the fitting network of the branch will be randomly initialized. + 4. Multi-task fine-tuning from a multi-task pretrained model: + - Updates model parameters in each branch based on the selected branch in the pretrained model. + - The chosen branches can be defined from the `finetune_head` input parameter of each model. + - If `finetune_head` is not defined and the model_key is the same as in the pretrained model, + it will resume from the model_key branch without fine-tuning. + - If `finetune_head` is not defined and a new model_key is used, + model parameters in the fitting network of the branch will be randomly initialized. + + Parameters + ---------- + finetune_model + The pretrained model. + model_config + The fine-tuning input parameters. + model_branch + The model branch chosen in command-line mode, only for single-task fine-tuning. + + Returns + ------- + model_config: + Updated model parameters. + finetune_links: + Fine-tuning rules in a dict format, with `model_branch`: `model_branch_from` pairs. + If `model_key` is not in this dict, it will do just resuming instead of fine-tuning. + """ + multi_task = "model_dict" in model_config + state_dict = torch.load(finetune_model, map_location=env.DEVICE) + if "model" in state_dict: + state_dict = state_dict["model"] + last_model_params = state_dict["_extra_state"]["model_params"] + finetune_from_multi_task = "model_dict" in last_model_params + finetune_links = {} + if not multi_task: + # use command-line first + if model_branch == "" and "finetune_head" in model_config: + model_branch = model_config["finetune_head"] + model_config = change_finetune_model_params_single( + model_config, + last_model_params, + from_multitask=finetune_from_multi_task, + model_branch="Default", + model_branch_from=model_branch, + ) + finetune_links["Default"] = ( + model_config["model_branch_chosen"] + if finetune_from_multi_task + else "Default" + ) + else: + assert model_branch == "", ( + "Multi-task fine-tuning does not support command-line branches chosen!" + "Please define the 'finetune_head' in each model params!" + ) + target_keys = model_config["model_dict"].keys() + if not finetune_from_multi_task: + pretrained_keys = ["Default"] + else: + pretrained_keys = last_model_params["model_dict"].keys() + for model_key in target_keys: + if "finetune_head" in model_config["model_dict"][model_key]: + pretrained_key = model_config["model_dict"][model_key]["finetune_head"] + assert pretrained_key in pretrained_keys, ( + f"'{pretrained_key}' head chosen to finetune not exist in the pretrained model!" + f"Available heads are: {list(pretrained_keys)}" + ) + model_branch_from = pretrained_key + finetune_links[model_key] = model_branch_from + elif model_key in pretrained_keys: + # not do anything if not defined "finetune_head" in heads that exist in the pretrained model + # this will just do resuming + model_branch_from = model_key + else: + # if not defined "finetune_head" in new heads, the fitting net will bre randomly initialized + model_branch_from = "" + finetune_links[model_key] = next(iter(pretrained_keys)) + model_config["model_dict"][model_key] = change_finetune_model_params_single( + model_config["model_dict"][model_key], + last_model_params, + from_multitask=finetune_from_multi_task, + model_branch=model_key, + model_branch_from=model_branch_from, + ) + return model_config, finetune_links diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py new file mode 100644 index 0000000000..94c657abd4 --- /dev/null +++ b/deepmd/pt/utils/learning_rate.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + + +class LearningRateExp: + def __init__( + self, + start_lr, + stop_lr, + decay_steps, + stop_steps, + decay_rate=None, + **kwargs, + ): + """ + Construct an exponential-decayed learning rate. + + Parameters + ---------- + start_lr + The learning rate at the start of the training. + stop_lr + The desired learning rate at the end of the training. + When decay_rate is explicitly set, this value will serve as + the minimum learning rate during training. In other words, + if the learning rate decays below stop_lr, stop_lr will be applied instead. + decay_steps + The learning rate is decaying every this number of training steps. + stop_steps + The total training steps for learning rate scheduler. + decay_rate + The decay rate for the learning rate. + If provided, the decay rate will be set instead of + calculating it through interpolation between start_lr and stop_lr. + """ + self.start_lr = start_lr + default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1 + self.decay_steps = decay_steps + if self.decay_steps >= stop_steps: + self.decay_steps = default_ds + self.decay_rate = np.exp( + np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps) + ) + if decay_rate is not None: + self.decay_rate = decay_rate + self.min_lr = stop_lr + + def value(self, step): + """Get the learning rate at the given step.""" + step_lr = self.start_lr * np.power(self.decay_rate, step // self.decay_steps) + if step_lr < self.min_lr: + step_lr = self.min_lr + return step_lr diff --git a/deepmd/pt/utils/multi_task.py b/deepmd/pt/utils/multi_task.py new file mode 100644 index 0000000000..e2076b3b2b --- /dev/null +++ b/deepmd/pt/utils/multi_task.py @@ -0,0 +1,162 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from copy import ( + deepcopy, +) + +from deepmd.pt.model.descriptor import ( + BaseDescriptor, +) +from deepmd.pt.model.task import ( + BaseFitting, +) + + +def preprocess_shared_params(model_config): + """Preprocess the model params for multitask model, and generate the links dict for further sharing. + + Args: + model_config: Model params of multitask model. + + Returns + ------- + model_config: Preprocessed model params of multitask model. + Those string names are replaced with real params in `shared_dict` of model params. + shared_links: Dict of link infos for further sharing. + Each item, whose key must be in `shared_dict`, is a dict with following keys: + - "type": The real class type of this item. + - "links": List of shared settings, each sub-item is a dict with following keys: + - "model_key": Model key in the `model_dict` to share this item. + - "shared_type": Type of this shard item. + - "shared_level": Shared level (int) of this item in this model. + Lower for more params to share, 0 means to share all params in this item. + This list are sorted by "shared_level". + For example, if one has `model_config` like this: + "model": { + "shared_dict": { + "my_type_map": ["foo", "bar"], + "my_des1": { + "type": "se_e2_a", + "neuron": [10, 20, 40] + }, + }, + "model_dict": { + "model_1": { + "type_map": "my_type_map", + "descriptor": "my_des1", + "fitting_net": { + "neuron": [100, 100, 100] + } + }, + "model_2": { + "type_map": "my_type_map", + "descriptor": "my_des1", + "fitting_net": { + "neuron": [100, 100, 100] + } + } + "model_3": { + "type_map": "my_type_map", + "descriptor": "my_des1:1", + "fitting_net": { + "neuron": [100, 100, 100] + } + } + } + } + The above config will init three model branches named `model_1` and `model_2` and `model_3`, + in which: + - `model_2` and `model_3` will have the same `type_map` as that in `model_1`. + - `model_2` will share all the parameters of `descriptor` with `model_1`, + while `model_3` will share part of parameters of `descriptor` with `model_1` + on human-defined share-level `1` (default is `0`, meaning share all the parameters). + - `model_1`, `model_2` and `model_3` have three different `fitting_net`s. + The returned `model_config` will automatically fulfill the input `model_config` as if there's no sharing, + and the `shared_links` will keep all the sharing information with looking: + { + 'my_des1': { + 'type': 'DescrptSeA', + 'links': [ + {'model_key': 'model_1', + 'shared_type': 'descriptor', + 'shared_level': 0}, + {'model_key': 'model_2', + 'shared_type': 'descriptor', + 'shared_level': 0}, + {'model_key': 'model_3', + 'shared_type': 'descriptor', + 'shared_level': 1} + ] + } + } + + """ + assert "model_dict" in model_config, "only multi-task model can use this method!" + supported_types = ["type_map", "descriptor", "fitting_net"] + shared_dict = model_config.get("shared_dict", {}) + shared_links = {} + type_map_keys = [] + + def replace_one_item(params_dict, key_type, key_in_dict, suffix="", index=None): + shared_type = key_type + shared_key = key_in_dict + shared_level = 0 + if ":" in key_in_dict: + shared_key = key_in_dict.split(":")[0] + shared_level = int(key_in_dict.split(":")[1]) + assert ( + shared_key in shared_dict + ), f"Appointed {shared_type} {shared_key} are not in the shared_dict! Please check the input params." + if index is None: + params_dict[shared_type] = deepcopy(shared_dict[shared_key]) + else: + params_dict[index] = deepcopy(shared_dict[shared_key]) + if shared_type == "type_map": + if key_in_dict not in type_map_keys: + type_map_keys.append(key_in_dict) + else: + if shared_key not in shared_links: + class_name = get_class_name(shared_type, shared_dict[shared_key]) + shared_links[shared_key] = {"type": class_name, "links": []} + link_item = { + "model_key": model_key, + "shared_type": shared_type + suffix, + "shared_level": shared_level, + } + shared_links[shared_key]["links"].append(link_item) + + for model_key in model_config["model_dict"]: + model_params_item = model_config["model_dict"][model_key] + for item_key in model_params_item: + if item_key in supported_types: + item_params = model_params_item[item_key] + if isinstance(item_params, str): + replace_one_item(model_params_item, item_key, item_params) + elif item_params.get("type", "") == "hybrid": + for ii, hybrid_item in enumerate(item_params["list"]): + if isinstance(hybrid_item, str): + replace_one_item( + model_params_item[item_key]["list"], + item_key, + hybrid_item, + suffix=f"_hybrid_{ii}", + index=ii, + ) + for shared_key in shared_links: + shared_links[shared_key]["links"] = sorted( + shared_links[shared_key]["links"], + key=lambda x: x["shared_level"] + - ("spin" in model_config["model_dict"][x["model_key"]]) * 100, + ) + # little trick to make spin models in the front to be the base models, + # because its type embeddings are more general. + assert len(type_map_keys) == 1, "Multitask model must have only one type_map!" + return model_config, shared_links + + +def get_class_name(item_key, item_params): + if item_key == "descriptor": + return BaseDescriptor.get_class_by_type(item_params.get("type", "se_e2_a")) + elif item_key == "fitting_net": + return BaseFitting.get_class_by_type(item_params.get("type", "ener")) + else: + raise RuntimeError(f"Unknown class_name type {item_key}") diff --git a/deepmd/pt/utils/neighbor_stat.py b/deepmd/pt/utils/neighbor_stat.py new file mode 100644 index 0000000000..d5b5c74bdc --- /dev/null +++ b/deepmd/pt/utils/neighbor_stat.py @@ -0,0 +1,192 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Iterator, + Optional, + Tuple, +) + +import numpy as np +import torch + +from deepmd.pt.utils.auto_batch_size import ( + AutoBatchSize, +) +from deepmd.pt.utils.env import ( + DEVICE, +) +from deepmd.pt.utils.nlist import ( + extend_coord_with_ghosts, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat + + +class NeighborStatOP(torch.nn.Module): + """Class for getting neighbor statics data information. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + mixed_types : bool, optional + If True, treat neighbors of all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_types: bool, + ) -> None: + super().__init__() + self.rcut = rcut + self.ntypes = ntypes + self.mixed_types = mixed_types + + def forward( + self, + coord: torch.Tensor, + atype: torch.Tensor, + cell: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Calculate the neareest neighbor distance between atoms, maximum nbor size of + atoms and the output data range of the environment matrix. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + + Returns + ------- + torch.Tensor + The minimal squared distance between two atoms, in the shape of (nframes,) + torch.Tensor + The maximal number of neighbors + """ + nframes = coord.shape[0] + coord = coord.view(nframes, -1, 3) + nloc = coord.shape[1] + coord = coord.view(nframes, nloc * 3) + extend_coord, extend_atype, _ = extend_coord_with_ghosts( + coord, atype, cell, self.rcut + ) + + coord1 = extend_coord.reshape(nframes, -1) + nall = coord1.shape[1] // 3 + coord0 = coord1[:, : nloc * 3] + diff = ( + coord1.reshape([nframes, -1, 3])[:, None, :, :] + - coord0.reshape([nframes, -1, 3])[:, :, None, :] + ) + assert list(diff.shape) == [nframes, nloc, nall, 3] + # remove the diagonal elements + mask = torch.eye(nloc, nall, dtype=torch.bool, device=diff.device) + diff[:, mask] = torch.inf + rr2 = torch.sum(torch.square(diff), dim=-1) + min_rr2, _ = torch.min(rr2, dim=-1) + # count the number of neighbors + if not self.mixed_types: + mask = rr2 < self.rcut**2 + nnei = torch.zeros( + (nframes, nloc, self.ntypes), dtype=torch.int32, device=mask.device + ) + for ii in range(self.ntypes): + nnei[:, :, ii] = torch.sum( + mask & extend_atype.eq(ii)[:, None, :], dim=-1 + ) + else: + mask = rr2 < self.rcut**2 + # virtual types (<0) are not counted + nnei = torch.sum(mask & extend_atype.ge(0)[:, None, :], dim=-1).view( + nframes, nloc, 1 + ) + max_nnei, _ = torch.max(nnei, dim=1) + return min_rr2, max_nnei + + +class NeighborStat(BaseNeighborStat): + """Neighbor statistics using pure NumPy. + + Parameters + ---------- + ntypes : int + The num of atom types + rcut : float + The cut-off radius + mixed_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_type: bool = False, + ) -> None: + super().__init__(ntypes, rcut, mixed_type) + op = NeighborStatOP(ntypes, rcut, mixed_type) + self.op = torch.jit.script(op) + self.auto_batch_size = AutoBatchSize() + + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system + """ + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii] + data_set_data = data_set._load_set(jj) + minrr2, max_nnei = self.auto_batch_size.execute_all( + self._execute, + data_set_data["coord"].shape[0], + data_set.get_natoms(), + data_set_data["coord"], + data_set_data["type"], + data_set_data["box"] if data_set.pbc else None, + ) + yield np.max(max_nnei, axis=0), np.min(minrr2), jj + + def _execute( + self, + coord: np.ndarray, + atype: np.ndarray, + cell: Optional[np.ndarray], + ): + """Execute the operation. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + """ + minrr2, max_nnei = self.op( + torch.from_numpy(coord).to(DEVICE), + torch.from_numpy(atype).to(DEVICE), + torch.from_numpy(cell).to(DEVICE) if cell is not None else None, + ) + minrr2 = minrr2.detach().cpu().numpy() + max_nnei = max_nnei.detach().cpu().numpy() + return minrr2, max_nnei diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py new file mode 100644 index 0000000000..cdee6e3722 --- /dev/null +++ b/deepmd/pt/utils/nlist.py @@ -0,0 +1,356 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Dict, + List, + Optional, + Union, +) + +import torch + +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.region import ( + normalize_coord, + to_face_distance, +) + + +def extend_input_and_build_neighbor_list( + coord, + atype, + rcut: float, + sel: List[int], + mixed_types: bool = False, + box: Optional[torch.Tensor] = None, +): + nframes, nloc = atype.shape[:2] + if box is not None: + box_gpu = box.to(coord.device, non_blocking=True) + coord_normalized = normalize_coord( + coord.view(nframes, nloc, 3), + box_gpu.reshape(nframes, 3, 3), + ) + else: + box_gpu = None + coord_normalized = coord.clone() + extended_coord, extended_atype, mapping = extend_coord_with_ghosts( + coord_normalized, atype, box_gpu, rcut, box + ) + nlist = build_neighbor_list( + extended_coord, + extended_atype, + nloc, + rcut, + sel, + distinguish_types=(not mixed_types), + ) + extended_coord = extended_coord.view(nframes, -1, 3) + return extended_coord, extended_atype, mapping, nlist + + +def build_neighbor_list( + coord: torch.Tensor, + atype: torch.Tensor, + nloc: int, + rcut: float, + sel: Union[int, List[int]], + distinguish_types: bool = True, +) -> torch.Tensor: + """Build neightbor list for a single frame. keeps nsel neighbors. + + Parameters + ---------- + coord : torch.Tensor + exptended coordinates of shape [batch_size, nall x 3] + atype : torch.Tensor + extended atomic types of shape [batch_size, nall] + if type < 0 the atom is treat as virtual atoms. + nloc : int + number of local atoms. + rcut : float + cut-off radius + sel : int or List[int] + maximal number of neighbors (of each type). + if distinguish_types==True, nsel should be list and + the length of nsel should be equal to number of + types. + distinguish_types : bool + distinguish different types. + + Returns + ------- + neighbor_list : torch.Tensor + Neighbor list of shape [batch_size, nloc, nsel], the neighbors + are stored in an ascending order. If the number of + neighbors is less than nsel, the positions are masked + with -1. The neighbor list of an atom looks like + |------ nsel ------| + xx xx xx xx -1 -1 -1 + if distinguish_types==True and we have two types + |---- nsel[0] -----| |---- nsel[1] -----| + xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1 + For virtual atoms all neighboring positions are filled with -1. + + """ + batch_size = coord.shape[0] + coord = coord.view(batch_size, -1) + nall = coord.shape[1] // 3 + # fill virtual atoms with large coords so they are not neighbors of any + # real atom. + xmax = torch.max(coord) + 2.0 * rcut + # nf x nall + is_vir = atype < 0 + coord1 = torch.where(is_vir[:, :, None], xmax, coord.view(-1, nall, 3)).view( + -1, nall * 3 + ) + if isinstance(sel, int): + sel = [sel] + nsel = sum(sel) + # nloc x 3 + coord0 = coord1[:, : nloc * 3] + # nloc x nall x 3 + diff = coord1.view([batch_size, -1, 3]).unsqueeze(1) - coord0.view( + [batch_size, -1, 3] + ).unsqueeze(2) + assert list(diff.shape) == [batch_size, nloc, nall, 3] + # nloc x nall + rr = torch.linalg.norm(diff, dim=-1) + # if central atom has two zero distances, sorting sometimes can not exclude itself + rr -= torch.eye(nloc, nall, dtype=rr.dtype, device=rr.device).unsqueeze(0) + rr, nlist = torch.sort(rr, dim=-1) + # nloc x (nall-1) + rr = rr[:, :, 1:] + nlist = nlist[:, :, 1:] + # nloc x nsel + nnei = rr.shape[2] + if nsel <= nnei: + rr = rr[:, :, :nsel] + nlist = nlist[:, :, :nsel] + else: + rr = torch.cat( + [rr, torch.ones([batch_size, nloc, nsel - nnei], device=rr.device) + rcut], + dim=-1, + ) + nlist = torch.cat( + [ + nlist, + torch.ones( + [batch_size, nloc, nsel - nnei], dtype=nlist.dtype, device=rr.device + ), + ], + dim=-1, + ) + assert list(nlist.shape) == [batch_size, nloc, nsel] + nlist = torch.where( + torch.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist + ) + + if distinguish_types: + return nlist_distinguish_types(nlist, atype, sel) + else: + return nlist + + +def nlist_distinguish_types( + nlist: torch.Tensor, + atype: torch.Tensor, + sel: List[int], +): + """Given a nlist that does not distinguish atom types, return a nlist that + distinguish atom types. + + """ + nf, nloc, nnei = nlist.shape + ret_nlist = [] + # nloc x nall + tmp_atype = torch.tile(atype.unsqueeze(1), [1, nloc, 1]) + mask = nlist == -1 + # nloc x s(nsel) + tnlist = torch.gather( + tmp_atype, + 2, + nlist.masked_fill(mask, 0), + ) + tnlist = tnlist.masked_fill(mask, -1) + snsel = tnlist.shape[2] + for ii, ss in enumerate(sel): + # nloc x s(nsel) + # to int because bool cannot be sort on GPU + pick_mask = (tnlist == ii).to(torch.int32) + # nloc x s(nsel), stable sort, nearer neighbors first + pick_mask, imap = torch.sort(pick_mask, dim=-1, descending=True, stable=True) + # nloc x s(nsel) + inlist = torch.gather(nlist, 2, imap) + inlist = inlist.masked_fill(~(pick_mask.to(torch.bool)), -1) + # nloc x nsel[ii] + ret_nlist.append(torch.split(inlist, [ss, snsel - ss], dim=-1)[0]) + return torch.concat(ret_nlist, dim=-1) + + +# build_neighbor_list = torch.vmap( +# build_neighbor_list_lower, +# in_dims=(0,0,None,None,None), +# out_dims=(0), +# ) + + +def get_multiple_nlist_key( + rcut: float, + nsel: int, +) -> str: + return str(rcut) + "_" + str(nsel) + + +def build_multiple_neighbor_list( + coord: torch.Tensor, + nlist: torch.Tensor, + rcuts: List[float], + nsels: List[int], +) -> Dict[str, torch.Tensor]: + """Input one neighbor list, and produce multiple neighbor lists with + different cutoff radius and numbers of selection out of it. The + required rcuts and nsels should be smaller or equal to the input nlist. + + Parameters + ---------- + coord : torch.Tensor + exptended coordinates of shape [batch_size, nall x 3] + nlist : torch.Tensor + Neighbor list of shape [batch_size, nloc, nsel], the neighbors + should be stored in an ascending order. + rcuts : List[float] + list of cut-off radius in ascending order. + nsels : List[int] + maximal number of neighbors in ascending order. + + Returns + ------- + nlist_dict : Dict[str, torch.Tensor] + A dict of nlists, key given by get_multiple_nlist_key(rc, nsel) + value being the corresponding nlist. + + """ + assert len(rcuts) == len(nsels) + if len(rcuts) == 0: + return {} + nb, nloc, nsel = nlist.shape + if nsel < nsels[-1]: + pad = -1 * torch.ones( + [nb, nloc, nsels[-1] - nsel], + dtype=nlist.dtype, + device=nlist.device, + ) + # nb x nloc x nsel + nlist = torch.cat([nlist, pad], dim=-1) + nsel = nsels[-1] + # nb x nall x 3 + coord1 = coord.view(nb, -1, 3) + nall = coord1.shape[1] + # nb x nloc x 3 + coord0 = coord1[:, :nloc, :] + nlist_mask = nlist == -1 + # nb x (nloc x nsel) x 3 + index = ( + nlist.masked_fill(nlist_mask, 0) + .view(nb, nloc * nsel) + .unsqueeze(-1) + .expand(-1, -1, 3) + ) + # nb x nloc x nsel x 3 + coord2 = torch.gather(coord1, dim=1, index=index).view(nb, nloc, nsel, 3) + # nb x nloc x nsel x 3 + diff = coord2 - coord0[:, :, None, :] + # nb x nloc x nsel + rr = torch.linalg.norm(diff, dim=-1) + rr.masked_fill(nlist_mask, float("inf")) + nlist0 = nlist + ret = {} + for rc, ns in zip(rcuts[::-1], nsels[::-1]): + nlist0 = nlist0[:, :, :ns].masked_fill(rr[:, :, :ns] > rc, -1) + ret[get_multiple_nlist_key(rc, ns)] = nlist0 + return ret + + +def extend_coord_with_ghosts( + coord: torch.Tensor, + atype: torch.Tensor, + cell: Optional[torch.Tensor], + rcut: float, + cell_cpu: Optional[torch.Tensor] = None, +): + """Extend the coordinates of the atoms by appending peridoc images. + The number of images is large enough to ensure all the neighbors + within rcut are appended. + + Parameters + ---------- + coord : torch.Tensor + original coordinates of shape [-1, nloc*3]. + atype : torch.Tensor + atom type of shape [-1, nloc]. + cell : torch.Tensor + simulation cell tensor of shape [-1, 9]. + rcut : float + the cutoff radius + cell_cpu : torch.Tensor + cell on cpu for performance + + Returns + ------- + extended_coord: torch.Tensor + extended coordinates of shape [-1, nall*3]. + extended_atype: torch.Tensor + extended atom type of shape [-1, nall]. + index_mapping: torch.Tensor + maping extended index to the local index + + """ + device = coord.device + nf, nloc = atype.shape + aidx = torch.tile(torch.arange(nloc, device=device).unsqueeze(0), [nf, 1]) + if cell is None: + nall = nloc + extend_coord = coord.clone() + extend_atype = atype.clone() + extend_aidx = aidx.clone() + else: + coord = coord.view([nf, nloc, 3]) + cell = cell.view([nf, 3, 3]) + cell_cpu = cell_cpu.view([nf, 3, 3]) if cell_cpu is not None else cell + # nf x 3 + to_face = to_face_distance(cell_cpu) + # nf x 3 + # *2: ghost copies on + and - directions + # +1: central cell + nbuff = torch.ceil(rcut / to_face).to(torch.long) + # 3 + nbuff = torch.max(nbuff, dim=0, keepdim=False).values + nbuff_cpu = nbuff.cpu() + xi = torch.arange(-nbuff_cpu[0], nbuff_cpu[0] + 1, 1, device="cpu") + yi = torch.arange(-nbuff_cpu[1], nbuff_cpu[1] + 1, 1, device="cpu") + zi = torch.arange(-nbuff_cpu[2], nbuff_cpu[2] + 1, 1, device="cpu") + eye_3 = torch.eye(3, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device="cpu") + xyz = xi.view(-1, 1, 1, 1) * eye_3[0] + xyz = xyz + yi.view(1, -1, 1, 1) * eye_3[1] + xyz = xyz + zi.view(1, 1, -1, 1) * eye_3[2] + xyz = xyz.view(-1, 3) + xyz = xyz.to(device=device, non_blocking=True) + # ns x 3 + shift_idx = xyz[torch.argsort(torch.norm(xyz, dim=1))] + ns, _ = shift_idx.shape + nall = ns * nloc + # nf x ns x 3 + shift_vec = torch.einsum("sd,fdk->fsk", shift_idx, cell) + # nf x ns x nloc x 3 + extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :] + # nf x ns x nloc + extend_atype = torch.tile(atype.unsqueeze(-2), [1, ns, 1]) + # nf x ns x nloc + extend_aidx = torch.tile(aidx.unsqueeze(-2), [1, ns, 1]) + return ( + extend_coord.reshape([nf, nall * 3]).to(device), + extend_atype.view([nf, nall]).to(device), + extend_aidx.view([nf, nall]).to(device), + ) diff --git a/deepmd/pt/utils/plugin.py b/deepmd/pt/utils/plugin.py new file mode 100644 index 0000000000..aa901c06e8 --- /dev/null +++ b/deepmd/pt/utils/plugin.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Base of plugin systems.""" + +from deepmd.utils.plugin import ( + Plugin, + PluginVariant, + VariantABCMeta, + VariantMeta, +) + +__all__ = [ + "Plugin", + "VariantMeta", + "VariantABCMeta", + "PluginVariant", +] diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py new file mode 100644 index 0000000000..ed46292f84 --- /dev/null +++ b/deepmd/pt/utils/preprocess.py @@ -0,0 +1,305 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + Union, +) + +import torch + +from deepmd.pt.utils import ( + env, +) + +log = logging.getLogger(__name__) + + +class Region3D: + def __init__(self, boxt): + """Construct a simulation box.""" + boxt = boxt.reshape([3, 3]) + self.boxt = boxt # convert physical coordinates to internal ones + self.rec_boxt = torch.linalg.inv( + self.boxt + ) # convert internal coordinates to physical ones + + self.volume = torch.linalg.det(self.boxt) # compute the volume + + # boxt = boxt.permute(1, 0) + c_yz = torch.cross(boxt[1], boxt[2]) + self._h2yz = self.volume / torch.linalg.norm(c_yz) + c_zx = torch.cross(boxt[2], boxt[0]) + self._h2zx = self.volume / torch.linalg.norm(c_zx) + c_xy = torch.cross(boxt[0], boxt[1]) + self._h2xy = self.volume / torch.linalg.norm(c_xy) + + def phys2inter(self, coord): + """Convert physical coordinates to internal ones.""" + return coord @ self.rec_boxt + + def inter2phys(self, coord): + """Convert internal coordinates to physical ones.""" + return coord @ self.boxt + + def get_face_distance(self): + """Return face distinces to each surface of YZ, ZX, XY.""" + return torch.stack([self._h2yz, self._h2zx, self._h2xy]) + + +def normalize_coord(coord, region: Region3D, nloc: int): + """Move outer atoms into region by mirror. + + Args: + - coord: shape is [nloc*3] + """ + tmp_coord = coord.clone() + inter_cood = torch.remainder(region.phys2inter(tmp_coord), 1.0) + tmp_coord = region.inter2phys(inter_cood) + return tmp_coord + + +def compute_serial_cid(cell_offset, ncell): + """Tell the sequential cell ID in its 3D space. + + Args: + - cell_offset: shape is [3] + - ncell: shape is [3] + """ + cell_offset[:, 0] *= ncell[1] * ncell[2] + cell_offset[:, 1] *= ncell[2] + return cell_offset.sum(-1) + + +def compute_pbc_shift(cell_offset, ncell): + """Tell shift count to move the atom into region.""" + shift = torch.zeros_like(cell_offset) + shift = shift + (cell_offset < 0) * -( + torch.div(cell_offset, ncell, rounding_mode="floor") + ) + shift = shift + (cell_offset >= ncell) * -( + torch.div((cell_offset - ncell), ncell, rounding_mode="floor") + 1 + ) + assert torch.all(cell_offset + shift * ncell >= 0) + assert torch.all(cell_offset + shift * ncell < ncell) + return shift + + +def build_inside_clist(coord, region: Region3D, ncell): + """Build cell list on atoms inside region. + + Args: + - coord: shape is [nloc*3] + - ncell: shape is [3] + """ + loc_ncell = int(torch.prod(ncell)) # num of local cells + nloc = coord.numel() // 3 # num of local atoms + inter_cell_size = 1.0 / ncell + + inter_cood = region.phys2inter(coord.view(-1, 3)) + cell_offset = torch.floor(inter_cood / inter_cell_size).to(torch.long) + # numerical error brought by conversion from phys to inter back and force + # may lead to negative value + cell_offset[cell_offset < 0] = 0 + delta = cell_offset - ncell + a2c = compute_serial_cid(cell_offset, ncell) # cell id of atoms + arange = torch.arange(0, loc_ncell, 1) + cellid = a2c == arange.unsqueeze(-1) # one hot cellid + c2a = cellid.nonzero() + lst = [] + cnt = 0 + bincount = torch.bincount(a2c, minlength=loc_ncell) + for i in range(loc_ncell): + n = bincount[i] + lst.append(c2a[cnt : cnt + n, 1]) + cnt += n + return a2c, lst + + +def append_neighbors(coord, region: Region3D, atype, rcut: float): + """Make ghost atoms who are valid neighbors. + + Args: + - coord: shape is [nloc*3] + - atype: shape is [nloc] + """ + to_face = region.get_face_distance() + + # compute num and size of local cells + ncell = torch.floor(to_face / rcut).to(torch.long) + ncell[ncell == 0] = 1 + cell_size = to_face / ncell + ngcell = ( + torch.floor(rcut / cell_size).to(torch.long) + 1 + ) # num of cells out of local, which contain ghost atoms + + # add ghost atoms + a2c, c2a = build_inside_clist(coord, region, ncell) + xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1) + yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1) + zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1) + xyz = xi.view(-1, 1, 1, 1) * torch.tensor([1, 0, 0], dtype=torch.long) + xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor([0, 1, 0], dtype=torch.long) + xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor([0, 0, 1], dtype=torch.long) + xyz = xyz.view(-1, 3) + mask_a = (xyz >= 0).all(dim=-1) + mask_b = (xyz < ncell).all(dim=-1) + mask = ~torch.logical_and(mask_a, mask_b) + xyz = xyz[mask] # cell coord + shift = compute_pbc_shift(xyz, ncell) + coord_shift = region.inter2phys(shift.to(env.GLOBAL_PT_FLOAT_PRECISION)) + mirrored = shift * ncell + xyz + cid = compute_serial_cid(mirrored, ncell) + + n_atoms = coord.shape[0] + aid = [c2a[ci] + i * n_atoms for i, ci in enumerate(cid)] + aid = torch.cat(aid) + tmp = torch.div(aid, n_atoms, rounding_mode="trunc") + aid = aid % n_atoms + tmp_coord = coord[aid] - coord_shift[tmp] + tmp_atype = atype[aid] + + # merge local and ghost atoms + merged_coord = torch.cat([coord, tmp_coord]) + merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]]) + merged_atype = torch.cat([atype, tmp_atype]) + merged_mapping = torch.cat([torch.arange(atype.numel()), aid]) + return merged_coord_shift, merged_atype, merged_mapping + + +def build_neighbor_list( + nloc: int, coord, atype, rcut: float, sec, mapping, type_split=True, min_check=False +): + """For each atom inside region, build its neighbor list. + + Args: + - coord: shape is [nall*3] + - atype: shape is [nall] + """ + nall = coord.numel() // 3 + coord = coord.float() + nlist = [[] for _ in range(nloc)] + coord_l = coord.view(-1, 1, 3)[:nloc] + coord_r = coord.view(1, -1, 3) + distance = coord_l - coord_r + distance = torch.linalg.norm(distance, dim=-1) + DISTANCE_INF = distance.max().detach() + rcut + distance[:nloc, :nloc] += torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF + if min_check: + if distance.min().abs() < 1e-6: + RuntimeError("Atom dist too close!") + if not type_split: + sec = sec[-1:] + lst = [] + nlist = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_loc = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_type = torch.zeros((nloc, sec[-1].item())).long() - 1 + for i, nnei in enumerate(sec): + if i > 0: + nnei = nnei - sec[i - 1] + if not type_split: + tmp = distance + else: + mask = atype.unsqueeze(0) == i + tmp = distance + (~mask) * DISTANCE_INF + if tmp.shape[1] >= nnei: + _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False) + else: + # when nnei > nall + indices = torch.zeros((nloc, nnei)).long() - 1 + _sorted = torch.ones((nloc, nnei)).long() * DISTANCE_INF + _sorted_nnei, indices_nnei = torch.topk( + tmp, tmp.shape[1], dim=1, largest=False + ) + _sorted[:, : tmp.shape[1]] = _sorted_nnei + indices[:, : tmp.shape[1]] = indices_nnei + mask = (_sorted < rcut).to(torch.long) + indices_loc = mapping[indices] + indices = indices * mask + -1 * (1 - mask) # -1 for padding + indices_loc = indices_loc * mask + -1 * (1 - mask) # -1 for padding + if i == 0: + start = 0 + else: + start = sec[i - 1] + end = min(sec[i], start + indices.shape[1]) + nlist[:, start:end] = indices[:, :nnei] + nlist_loc[:, start:end] = indices_loc[:, :nnei] + nlist_type[:, start:end] = atype[indices[:, :nnei]] * mask + -1 * (1 - mask) + return nlist, nlist_loc, nlist_type + + +def compute_smooth_weight(distance, rmin: float, rmax: float): + """Compute smooth weight for descriptor elements.""" + if rmin >= rmax: + raise ValueError("rmin should be less than rmax.") + min_mask = distance <= rmin + max_mask = distance >= rmax + mid_mask = torch.logical_not(torch.logical_or(min_mask, max_mask)) + uu = (distance - rmin) / (rmax - rmin) + vv = uu * uu * uu * (-6 * uu * uu + 15 * uu - 10) + 1 + return vv * mid_mask + min_mask + + +def make_env_mat( + coord, + atype, + region, + rcut: Union[float, list], + sec, + pbc=True, + type_split=True, + min_check=False, +): + """Based on atom coordinates, return environment matrix. + + Returns + ------- + nlist: nlist, [nloc, nnei] + merged_coord_shift: shift on nall atoms, [nall, 3] + merged_mapping: mapping from nall index to nloc index, [nall] + """ + # move outer atoms into cell + hybrid = isinstance(rcut, list) + _rcut = rcut + if hybrid: + _rcut = max(rcut) + if pbc: + merged_coord_shift, merged_atype, merged_mapping = append_neighbors( + coord, region, atype, _rcut + ) + merged_coord = coord[merged_mapping] - merged_coord_shift + if merged_coord.shape[0] <= coord.shape[0]: + log.warning("No ghost atom is added for system ") + else: + merged_coord_shift = torch.zeros_like(coord) + merged_atype = atype.clone() + merged_mapping = torch.arange(atype.numel()) + merged_coord = coord.clone() + + # build nlist + if not hybrid: + nlist, nlist_loc, nlist_type = build_neighbor_list( + coord.shape[0], + merged_coord, + merged_atype, + rcut, + sec, + merged_mapping, + type_split=type_split, + min_check=min_check, + ) + else: + nlist, nlist_loc, nlist_type = [], [], [] + for ii, single_rcut in enumerate(rcut): + nlist_tmp, nlist_loc_tmp, nlist_type_tmp = build_neighbor_list( + coord.shape[0], + merged_coord, + merged_atype, + single_rcut, + sec[ii], + merged_mapping, + type_split=type_split, + min_check=min_check, + ) + nlist.append(nlist_tmp) + nlist_loc.append(nlist_loc_tmp) + nlist_type.append(nlist_type_tmp) + return nlist, nlist_loc, nlist_type, merged_coord_shift, merged_mapping diff --git a/deepmd/pt/utils/region.py b/deepmd/pt/utils/region.py new file mode 100644 index 0000000000..9d811acb9b --- /dev/null +++ b/deepmd/pt/utils/region.py @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import torch + + +def phys2inter( + coord: torch.Tensor, + cell: torch.Tensor, +) -> torch.Tensor: + """Convert physical coordinates to internal(direct) coordinates. + + Parameters + ---------- + coord : torch.Tensor + physical coordinates of shape [*, na, 3]. + cell : torch.Tensor + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + inter_coord: torch.Tensor + the internal coordinates + + """ + rec_cell, _ = torch.linalg.inv_ex(cell) + return torch.matmul(coord, rec_cell) + + +def inter2phys( + coord: torch.Tensor, + cell: torch.Tensor, +) -> torch.Tensor: + """Convert internal(direct) coordinates to physical coordinates. + + Parameters + ---------- + coord : torch.Tensor + internal coordinates of shape [*, na, 3]. + cell : torch.Tensor + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + phys_coord: torch.Tensor + the physical coordinates + + """ + return torch.matmul(coord, cell) + + +def to_face_distance( + cell: torch.Tensor, +) -> torch.Tensor: + """Compute the to-face-distance of the simulation cell. + + Parameters + ---------- + cell : torch.Tensor + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + dist: torch.Tensor + the to face distances of shape [*, 3] + + """ + cshape = cell.shape + dist = b_to_face_distance(cell.view([-1, 3, 3])) + return dist.view(list(cshape[:-2]) + [3]) # noqa:RUF005 + + +def _to_face_distance(cell): + volume = torch.linalg.det(cell) + c_yz = torch.cross(cell[1], cell[2]) + _h2yz = volume / torch.linalg.norm(c_yz) + c_zx = torch.cross(cell[2], cell[0]) + _h2zx = volume / torch.linalg.norm(c_zx) + c_xy = torch.cross(cell[0], cell[1]) + _h2xy = volume / torch.linalg.norm(c_xy) + return torch.stack([_h2yz, _h2zx, _h2xy]) + + +def b_to_face_distance(cell): + volume = torch.linalg.det(cell) + c_yz = torch.cross(cell[:, 1], cell[:, 2], dim=-1) + _h2yz = volume / torch.linalg.norm(c_yz, dim=-1) + c_zx = torch.cross(cell[:, 2], cell[:, 0], dim=-1) + _h2zx = volume / torch.linalg.norm(c_zx, dim=-1) + c_xy = torch.cross(cell[:, 0], cell[:, 1], dim=-1) + _h2xy = volume / torch.linalg.norm(c_xy, dim=-1) + return torch.stack([_h2yz, _h2zx, _h2xy], dim=1) + + +# b_to_face_distance = torch.vmap( +# _to_face_distance, in_dims=(0), out_dims=(0)) + + +def normalize_coord( + coord: torch.Tensor, + cell: torch.Tensor, +) -> torch.Tensor: + """Apply PBC according to the atomic coordinates. + + Parameters + ---------- + coord : torch.Tensor + orignal coordinates of shape [*, na, 3]. + + Returns + ------- + wrapped_coord: torch.Tensor + wrapped coordinates of shape [*, na, 3]. + + """ + icoord = phys2inter(coord, cell) + icoord = torch.remainder(icoord, 1.0) + return inter2phys(icoord, cell) diff --git a/deepmd/pt/utils/serialization.py b/deepmd/pt/utils/serialization.py new file mode 100644 index 0000000000..c99ddbb3c6 --- /dev/null +++ b/deepmd/pt/utils/serialization.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json + +import torch + +from deepmd.pt.model.model import ( + get_model, +) +from deepmd.pt.model.model.model import ( + BaseModel, +) +from deepmd.pt.train.wrapper import ( + ModelWrapper, +) + + +def serialize_from_file(model_file: str) -> dict: + """Serialize the model file to a dictionary. + + Parameters + ---------- + model_file : str + The model file to be serialized. + + Returns + ------- + dict + The serialized model data. + """ + if model_file.endswith(".pth"): + saved_model = torch.jit.load(model_file, map_location="cpu") + model_def_script = json.loads(saved_model.model_def_script) + model = get_model(model_def_script) + model.load_state_dict(saved_model.state_dict()) + elif model_file.endswith(".pt"): + state_dict = torch.load(model_file, map_location="cpu") + if "model" in state_dict: + state_dict = state_dict["model"] + model_def_script = state_dict["_extra_state"]["model_params"] + model = get_model(model_def_script) + modelwrapper = ModelWrapper(model) + modelwrapper.load_state_dict(state_dict) + model = modelwrapper.model["Default"] + else: + raise ValueError("PyTorch backend only supports converting .pth or .pt file") + + model_dict = model.serialize() + data = { + "backend": "PyTorch", + "pt_version": torch.__version__, + "model": model_dict, + "model_def_script": model_def_script, + # TODO + "@variables": {}, + } + return data + + +def deserialize_to_file(model_file: str, data: dict) -> None: + """Deserialize the dictionary to a model file. + + Parameters + ---------- + model_file : str + The model file to be saved. + data : dict + The dictionary to be deserialized. + """ + if not model_file.endswith(".pth"): + raise ValueError("PyTorch backend only supports converting .pth file") + model = BaseModel.deserialize(data["model"]) + # JIT will happy in this way... + model.model_def_script = json.dumps(data["model_def_script"]) + model = torch.jit.script(model) + torch.jit.save(model, model_file) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py new file mode 100644 index 0000000000..a29d98addc --- /dev/null +++ b/deepmd/pt/utils/stat.py @@ -0,0 +1,254 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + Callable, + List, + Optional, + Union, +) + +import numpy as np +import torch + +from deepmd.pt.utils import ( + AtomExcludeMask, +) +from deepmd.pt.utils.auto_batch_size import ( + AutoBatchSize, +) +from deepmd.pt.utils.utils import ( + dict_to_device, + to_numpy_array, + to_torch_tensor, +) +from deepmd.utils.out_stat import ( + compute_stats_from_redu, +) +from deepmd.utils.path import ( + DPPath, +) + +log = logging.getLogger(__name__) + + +def make_stat_input(datasets, dataloaders, nbatches): + """Pack data for statistics. + + Args: + - dataset: A list of dataset to analyze. + - nbatches: Batch count for collecting stats. + + Returns + ------- + - a list of dicts, each of which contains data from a system + """ + lst = [] + log.info(f"Packing data for statistics from {len(datasets)} systems") + for i in range(len(datasets)): + sys_stat = {} + with torch.device("cpu"): + iterator = iter(dataloaders[i]) + for _ in range(nbatches): + try: + stat_data = next(iterator) + except StopIteration: + iterator = iter(dataloaders[i]) + stat_data = next(iterator) + for dd in stat_data: + if stat_data[dd] is None: + sys_stat[dd] = None + elif isinstance(stat_data[dd], torch.Tensor): + if dd not in sys_stat: + sys_stat[dd] = [] + sys_stat[dd].append(stat_data[dd]) + elif isinstance(stat_data[dd], np.float32): + sys_stat[dd] = stat_data[dd] + else: + pass + + for key in sys_stat: + if isinstance(sys_stat[key], np.float32): + pass + elif sys_stat[key] is None or sys_stat[key][0] is None: + sys_stat[key] = None + elif isinstance(stat_data[dd], torch.Tensor): + sys_stat[key] = torch.cat(sys_stat[key], dim=0) + dict_to_device(sys_stat) + lst.append(sys_stat) + return lst + + +def restore_from_file( + stat_file_path: DPPath, + keys: List[str] = ["energy"], +) -> Optional[dict]: + if stat_file_path is None: + return None + stat_files = [stat_file_path / f"bias_atom_{kk}" for kk in keys] + if any(not (ii.is_file()) for ii in stat_files): + return None + ret = {} + + for kk in keys: + fp = stat_file_path / f"bias_atom_{kk}" + assert fp.is_file() + ret[kk] = fp.load_numpy() + return ret + + +def save_to_file( + stat_file_path: DPPath, + results: dict, +): + assert stat_file_path is not None + stat_file_path.mkdir(exist_ok=True, parents=True) + for kk, vv in results.items(): + fp = stat_file_path / f"bias_atom_{kk}" + fp.save_numpy(vv) + + +def compute_output_stats( + merged: Union[Callable[[], List[dict]], List[dict]], + ntypes: int, + keys: List[str] = ["energy"], + stat_file_path: Optional[DPPath] = None, + rcond: Optional[float] = None, + atom_ener: Optional[List[float]] = None, + model_forward: Optional[Callable[..., torch.Tensor]] = None, +): + """ + Compute the output statistics (e.g. energy bias) for the fitting net from packed data. + + Parameters + ---------- + merged : Union[Callable[[], List[dict]], List[dict]] + - List[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], List[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + ntypes : int + The number of atom types. + stat_file_path : DPPath, optional + The path to the stat file. + rcond : float, optional + The condition number for the regression of atomic energy. + atom_ener : List[float], optional + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + model_forward : Callable[..., torch.Tensor], optional + The wrapped forward function of atomic model. + If not None, the model will be utilized to generate the original energy prediction, + which will be subtracted from the energy label of the data. + The difference will then be used to calculate the delta complement energy bias for each type. + """ + bias_atom_e = restore_from_file(stat_file_path, keys) + + if bias_atom_e is None: + if callable(merged): + # only get data for once + sampled = merged() + else: + sampled = merged + outputs = {kk: [item[kk] for item in sampled] for kk in keys} + data_mixed_type = "real_natoms_vec" in sampled[0] + natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" + for system in sampled: + if "atom_exclude_types" in system: + type_mask = AtomExcludeMask( + ntypes, system["atom_exclude_types"] + ).get_type_mask() + system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) + input_natoms = [item[natoms_key] for item in sampled] + # shape: (nframes, ndim) + merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} + # shape: (nframes, ntypes) + merged_natoms = to_numpy_array(torch.cat(input_natoms)[:, 2:]) + if atom_ener is not None and len(atom_ener) > 0: + assigned_atom_ener = np.array( + [ee if ee is not None else np.nan for ee in atom_ener] + ) + else: + assigned_atom_ener = None + if model_forward is None: + # only use statistics result + # [0]: take the first otuput (mean) of compute_stats_from_redu + bias_atom_e = { + kk: compute_stats_from_redu( + merged_output[kk], + merged_natoms, + assigned_bias=assigned_atom_ener, + rcond=rcond, + )[0] + for kk in keys + } + else: + # subtract the model bias and output the delta bias + auto_batch_size = AutoBatchSize() + model_predict = {kk: [] for kk in keys} + for system in sampled: + nframes = system["coord"].shape[0] + coord, atype, box, natoms = ( + system["coord"], + system["atype"], + system["box"], + system["natoms"], + ) + fparam = system.get("fparam", None) + aparam = system.get("aparam", None) + + def model_forward_auto_batch_size(*args, **kwargs): + return auto_batch_size.execute_all( + model_forward, + nframes, + system["atype"].shape[-1], + *args, + **kwargs, + ) + + sample_predict = model_forward_auto_batch_size( + coord, atype, box, fparam=fparam, aparam=aparam + ) + + for kk in keys: + model_predict[kk].append( + to_numpy_array( + torch.sum(sample_predict[kk], dim=1) # nf x nloc x odims + ) + ) + + model_predict = {kk: np.concatenate(model_predict[kk]) for kk in keys} + + bias_diff = {kk: merged_output[kk] - model_predict[kk] for kk in keys} + bias_atom_e = { + kk: compute_stats_from_redu( + bias_diff[kk], + merged_natoms, + assigned_bias=assigned_atom_ener, + rcond=rcond, + )[0] + for kk in keys + } + unbias_e = { + kk: model_predict[kk] + merged_natoms @ bias_atom_e[kk] for kk in keys + } + atom_numbs = merged_natoms.sum(-1) + for kk in keys: + rmse_ae = np.sqrt( + np.mean( + np.square( + (unbias_e[kk].ravel() - merged_output[kk].ravel()) + / atom_numbs + ) + ) + ) + log.info( + f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." + ) + + if stat_file_path is not None: + save_to_file(stat_file_path, bias_atom_e) + + ret = {kk: to_torch_tensor(bias_atom_e[kk]) for kk in keys} + + return ret diff --git a/deepmd/pt/utils/update_sel.py b/deepmd/pt/utils/update_sel.py new file mode 100644 index 0000000000..8c2d0699f2 --- /dev/null +++ b/deepmd/pt/utils/update_sel.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Type, +) + +from deepmd.pt.utils.neighbor_stat import ( + NeighborStat, +) +from deepmd.utils.update_sel import ( + BaseUpdateSel, +) + + +class UpdateSel(BaseUpdateSel): + @property + def neighbor_stat(self) -> Type[NeighborStat]: + return NeighborStat + + def hook(self, min_nbor_dist, max_nbor_size): + # TODO: save to the model in UpdateSel.hook + pass diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py new file mode 100644 index 0000000000..d1ef089e49 --- /dev/null +++ b/deepmd/pt/utils/utils.py @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, + overload, +) + +import ml_dtypes +import numpy as np +import torch +import torch.nn.functional as F + +from deepmd.dpmodel.common import PRECISION_DICT as NP_PRECISION_DICT + +from .env import ( + DEVICE, +) +from .env import PRECISION_DICT as PT_PRECISION_DICT + + +class ActivationFn(torch.nn.Module): + def __init__(self, activation: Optional[str]): + super().__init__() + self.activation: str = activation if activation is not None else "linear" + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Returns the tensor after applying activation function corresponding to `activation`.""" + # See jit supported types: https://pytorch.org/docs/stable/jit_language_reference.html#supported-type + + if self.activation.lower() == "relu": + return F.relu(x) + elif self.activation.lower() == "gelu" or self.activation.lower() == "gelu_tf": + return F.gelu(x, approximate="tanh") + elif self.activation.lower() == "tanh": + return torch.tanh(x) + elif self.activation.lower() == "relu6": + return F.relu6(x) + elif self.activation.lower() == "softplus": + return F.softplus(x) + elif self.activation.lower() == "sigmoid": + return torch.sigmoid(x) + elif self.activation.lower() == "linear" or self.activation.lower() == "none": + return x + else: + raise RuntimeError(f"activation function {self.activation} not supported") + + +@overload +def to_numpy_array(xx: torch.Tensor) -> np.ndarray: ... + + +@overload +def to_numpy_array(xx: None) -> None: ... + + +def to_numpy_array( + xx, +): + if xx is None: + return None + assert xx is not None + # Create a reverse mapping of PT_PRECISION_DICT + reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()} + # Use the reverse mapping to find keys with the desired value + prec = reverse_precision_dict.get(xx.dtype, None) + prec = NP_PRECISION_DICT.get(prec, None) + if prec is None: + raise ValueError(f"unknown precision {xx.dtype}") + if xx.dtype == torch.bfloat16: + # https://github.com/pytorch/pytorch/issues/109873 + xx = xx.float() + return xx.detach().cpu().numpy().astype(prec) + + +@overload +def to_torch_tensor(xx: np.ndarray) -> torch.Tensor: ... + + +@overload +def to_torch_tensor(xx: None) -> None: ... + + +def to_torch_tensor( + xx, +): + if xx is None: + return None + assert xx is not None + # Create a reverse mapping of NP_PRECISION_DICT + reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()} + # Use the reverse mapping to find keys with the desired value + prec = reverse_precision_dict.get(xx.dtype.type, None) + prec = PT_PRECISION_DICT.get(prec, None) + if prec is None: + raise ValueError(f"unknown precision {xx.dtype}") + if xx.dtype == ml_dtypes.bfloat16: + # https://github.com/pytorch/pytorch/issues/109873 + xx = xx.astype(np.float32) + return torch.tensor(xx, dtype=prec, device=DEVICE) + + +def dict_to_device(sample_dict): + for key in sample_dict: + if isinstance(sample_dict[key], list): + sample_dict[key] = [item.to(DEVICE) for item in sample_dict[key]] + if isinstance(sample_dict[key], np.float32): + sample_dict[key] = ( + torch.ones(1, dtype=torch.float32, device=DEVICE) * sample_dict[key] + ) + else: + if sample_dict[key] is not None: + sample_dict[key] = sample_dict[key].to(DEVICE) diff --git a/deepmd/__about__.py b/deepmd/tf/__about__.py similarity index 100% rename from deepmd/__about__.py rename to deepmd/tf/__about__.py diff --git a/deepmd/tf/__init__.py b/deepmd/tf/__init__.py new file mode 100644 index 0000000000..65aa03b39e --- /dev/null +++ b/deepmd/tf/__init__.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Root of the deepmd package, exposes all public classes and submodules.""" + +try: + from importlib import ( + metadata, + ) +except ImportError: # for Python<3.8 + import importlib_metadata as metadata + +import deepmd.tf.utils.network as network + +from . import ( + cluster, + descriptor, + fit, + loss, + nvnmd, + utils, +) +from .env import ( + set_mkl, +) +from .infer import ( + DeepEval, + DeepPotential, +) +from .infer.data_modifier import ( + DipoleChargeModifier, +) + +set_mkl() + +try: + from deepmd._version import version as __version__ +except ImportError: + from .__about__ import ( + __version__, + ) + +# load third-party plugins +try: + eps = metadata.entry_points(group="deepmd") +except TypeError: + eps = metadata.entry_points().get("deepmd", []) +for ep in eps: + ep.load() + +__all__ = [ + "__version__", + "descriptor", + "fit", + "loss", + "utils", + "cluster", + "network", + "DeepEval", + "DeepPotential", + "DipoleChargeModifier", + "nvnmd", +] diff --git a/deepmd/tf/__main__.py b/deepmd/tf/__main__.py new file mode 100644 index 0000000000..6026b1c269 --- /dev/null +++ b/deepmd/tf/__main__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Package dp entry point.""" + +from .entrypoints.main import ( + main, +) + +if __name__ == "__main__": + main() diff --git a/deepmd/tf/calculator.py b/deepmd/tf/calculator.py new file mode 100644 index 0000000000..5fc4b59f5f --- /dev/null +++ b/deepmd/tf/calculator.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.calculator import ( + DP, +) + +__all__ = [ + "DP", +] diff --git a/deepmd/cluster/__init__.py b/deepmd/tf/cluster/__init__.py similarity index 74% rename from deepmd/cluster/__init__.py rename to deepmd/tf/cluster/__init__.py index 3c15778fe5..6735ce92f4 100644 --- a/deepmd/cluster/__init__.py +++ b/deepmd/tf/cluster/__init__.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Module that reads node resources, auto detects if running local or on SLURM.""" -import os from typing import ( List, Optional, @@ -9,7 +8,6 @@ ) from .local import get_resource as get_local_res -from .slurm import get_resource as get_slurm_res __all__ = ["get_resource"] @@ -22,7 +20,4 @@ def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: Tuple[str, List[str], Optional[List[int]]] nodename, nodelist, and gpus """ - if "SLURM_JOB_NODELIST" in os.environ: - return get_slurm_res() - else: - return get_local_res() + return get_local_res() diff --git a/deepmd/cluster/local.py b/deepmd/tf/cluster/local.py similarity index 92% rename from deepmd/cluster/local.py rename to deepmd/tf/cluster/local.py index 3c12c9dc85..60961a0d65 100644 --- a/deepmd/cluster/local.py +++ b/deepmd/tf/cluster/local.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Get local GPU resources.""" -import socket import subprocess as sp import sys from typing import ( @@ -10,9 +9,12 @@ Tuple, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) +from deepmd.utils.hostlist import ( + get_host_names, +) __all__ = ["get_gpus", "get_resource"] @@ -57,7 +59,6 @@ def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: Tuple[str, List[str], Optional[List[int]]] nodename, nodelist, and gpus """ - nodename = socket.gethostname() - nodelist = [nodename] + nodename, nodelist = get_host_names() gpus = get_gpus() return nodename, nodelist, gpus diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py new file mode 100644 index 0000000000..5f2d0d882e --- /dev/null +++ b/deepmd/tf/common.py @@ -0,0 +1,291 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Collection of functions and classes used throughout the whole package.""" + +import warnings +from functools import ( + wraps, +) +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Union, +) + +import tensorflow +from tensorflow.python.framework import ( + tensor_util, +) + +from deepmd.common import ( + VALID_ACTIVATION, + VALID_PRECISION, + add_data_requirement, + data_requirement, + expand_sys_str, + get_np_precision, + j_loader, + j_must_have, + make_default_mesh, + select_idx_map, +) +from deepmd.tf.env import ( + GLOBAL_TF_FLOAT_PRECISION, + op_module, + tf, +) + +if TYPE_CHECKING: + from deepmd.common import ( + _ACTIVATION, + _PRECISION, + ) + +__all__ = [ + # from deepmd.common + "data_requirement", + "add_data_requirement", + "select_idx_map", + "make_default_mesh", + "j_must_have", + "j_loader", + "expand_sys_str", + "get_np_precision", + # from self + "PRECISION_DICT", + "gelu", + "gelu_tf", + "ACTIVATION_FN_DICT", + "get_activation_func", + "get_precision", + "safe_cast_tensor", + "cast_precision", + "clear_session", +] + +# define constants +PRECISION_DICT = { + "default": GLOBAL_TF_FLOAT_PRECISION, + "float16": tf.float16, + "float32": tf.float32, + "float64": tf.float64, + "bfloat16": tf.bfloat16, +} +assert VALID_PRECISION.issubset(PRECISION_DICT.keys()) + + +def gelu(x: tf.Tensor) -> tf.Tensor: + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU, implemented by custom operator. + + Parameters + ---------- + x : tf.Tensor + float Tensor to perform activation + + Returns + ------- + tf.Tensor + `x` with the GELU activation applied + + References + ---------- + Original paper + https://arxiv.org/abs/1606.08415 + """ + return op_module.gelu_custom(x) + + +def gelu_tf(x: tf.Tensor) -> tf.Tensor: + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU, implemented by TF. + + Parameters + ---------- + x : tf.Tensor + float Tensor to perform activation + + Returns + ------- + tf.Tensor + `x` with the GELU activation applied + + References + ---------- + Original paper + https://arxiv.org/abs/1606.08415 + """ + + def gelu_wrapper(x): + try: + return tensorflow.nn.gelu(x, approximate=True) + except AttributeError: + warnings.warn( + "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator." + ) + return op_module.gelu_custom(x) + + return (lambda x: gelu_wrapper(x))(x) + + +ACTIVATION_FN_DICT = { + "relu": tf.nn.relu, + "relu6": tf.nn.relu6, + "softplus": tf.nn.softplus, + "sigmoid": tf.sigmoid, + "tanh": tf.nn.tanh, + "gelu": gelu, + "gelu_tf": gelu_tf, + "linear": lambda x: x, + "none": lambda x: x, +} +assert VALID_ACTIVATION.issubset(ACTIVATION_FN_DICT.keys()) + + +def get_activation_func( + activation_fn: Union["_ACTIVATION", None], +) -> Callable[[tf.Tensor], tf.Tensor]: + """Get activation function callable based on string name. + + Parameters + ---------- + activation_fn : _ACTIVATION + one of the defined activation functions + + Returns + ------- + Callable[[tf.Tensor], tf.Tensor] + correspondingg TF callable + + Raises + ------ + RuntimeError + if unknown activation function is specified + """ + if activation_fn is None: + activation_fn = "none" + assert activation_fn is not None + if activation_fn.lower() not in ACTIVATION_FN_DICT: + raise RuntimeError(f"{activation_fn} is not a valid activation function") + return ACTIVATION_FN_DICT[activation_fn.lower()] + + +def get_precision(precision: "_PRECISION") -> Any: + """Convert str to TF DType constant. + + Parameters + ---------- + precision : _PRECISION + one of the allowed precisions + + Returns + ------- + tf.python.framework.dtypes.DType + appropriate TF constant + + Raises + ------ + RuntimeError + if supplied precision string does not have acorresponding TF constant + """ + if precision not in PRECISION_DICT: + raise RuntimeError(f"{precision} is not a valid precision") + return PRECISION_DICT[precision] + + +def safe_cast_tensor( + input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType +) -> tf.Tensor: + """Convert a Tensor from a precision to another precision. + + If input is not a Tensor or without the specific precision, the method will not + cast it. + + Parameters + ---------- + input : tf.Tensor + input tensor + from_precision : tf.DType + Tensor data type that is casted from + to_precision : tf.DType + Tensor data type that casts to + + Returns + ------- + tf.Tensor + casted Tensor + """ + if tensor_util.is_tensor(input) and input.dtype == from_precision: + return tf.cast(input, to_precision) + return input + + +def cast_precision(func: Callable) -> Callable: + """A decorator that casts and casts back the input + and output tensor of a method. + + The decorator should be used in a classmethod. + + The decorator will do the following thing: + (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION` + to precision defined by property `precision`. + (2) It casts output Tensors from `precision` to + `GLOBAL_TF_FLOAT_PRECISION`. + (3) It checks inputs and outputs and only casts when + input or output is a Tensor and its dtype matches + `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively. + If it does not match (e.g. it is an integer), the decorator + will do nothing on it. + + Returns + ------- + Callable + a decorator that casts and casts back the input and + output tensor of a method + + Examples + -------- + >>> class A: + ... @property + ... def precision(self): + ... return tf.float32 + ... + ... @cast_precision + ... def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: + ... return x**2 + y + """ + + @wraps(func) + def wrapper(self, *args, **kwargs): + # only convert tensors + returned_tensor = func( + self, + *[ + safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) + for vv in args + ], + **{ + kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) + for kk, vv in kwargs.items() + }, + ) + if isinstance(returned_tensor, tuple): + return tuple( + safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION) + for vv in returned_tensor + ) + else: + return safe_cast_tensor( + returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION + ) + + return wrapper + + +def clear_session(): + """Reset all state generated by DeePMD-kit.""" + tf.reset_default_graph() + # TODO: remove this line when data_requirement is not a global variable + data_requirement.clear() diff --git a/deepmd/descriptor/__init__.py b/deepmd/tf/descriptor/__init__.py similarity index 100% rename from deepmd/descriptor/__init__.py rename to deepmd/tf/descriptor/__init__.py diff --git a/deepmd/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py similarity index 88% rename from deepmd/descriptor/descriptor.py rename to deepmd/tf/descriptor/descriptor.py index bd731004cb..82b09c95fb 100644 --- a/deepmd/descriptor/descriptor.py +++ b/deepmd/tf/descriptor/descriptor.py @@ -4,7 +4,6 @@ ) from typing import ( Any, - Callable, Dict, List, Optional, @@ -13,17 +12,22 @@ import numpy as np -from deepmd.env import ( +from deepmd.common import ( + j_get_type, +) +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.utils import ( - Plugin, +from deepmd.tf.utils import ( PluginVariant, ) +from deepmd.utils.plugin import ( + make_plugin_registry, +) -class Descriptor(PluginVariant): +class Descriptor(PluginVariant, make_plugin_registry("descriptor")): r"""The abstract class for descriptors. All specific descriptors should be based on this class. @@ -32,9 +36,9 @@ class Descriptor(PluginVariant): Examples -------- - >>> descript = Descriptor(type="se_e2_a", rcut=6., rcut_smth=0.5, sel=[50]) + >>> descript = Descriptor(type="se_e2_a", rcut=6.0, rcut_smth=0.5, sel=[50]) >>> type(descript) - + Notes ----- @@ -42,44 +46,9 @@ class Descriptor(PluginVariant): that can be called by other classes. """ - __plugins = Plugin() - - @staticmethod - def register(key: str) -> Callable: - """Register a descriptor plugin. - - Parameters - ---------- - key : str - the key of a descriptor - - Returns - ------- - Descriptor - the registered descriptor - - Examples - -------- - >>> @Descriptor.register("some_descrpt") - class SomeDescript(Descriptor): - pass - """ - return Descriptor.__plugins.register(key) - - @classmethod - def get_class_by_input(cls, input: dict): - try: - descrpt_type = input["type"] - except KeyError: - raise KeyError("the type of descriptor should be set by `type`") - if descrpt_type in Descriptor.__plugins.plugins: - return Descriptor.__plugins.plugins[descrpt_type] - else: - raise RuntimeError("Unknown descriptor type: " + descrpt_type) - def __new__(cls, *args, **kwargs): if cls is Descriptor: - cls = cls.get_class_by_input(kwargs) + cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__)) return super().__new__(cls) @abstractmethod @@ -133,9 +102,6 @@ def get_dim_rot_mat_1(self) -> int: int the first dimension of the rotation matrix """ - # TODO: I think this method should be implemented as it's called by dipole and - # polar fitting network. However, currently not all descriptors have this - # method. raise NotImplementedError def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: @@ -152,8 +118,6 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: sel_r : list[int] The number of neighbors with only radial information """ - # TODO: I think this method should be implemented as it's called by energy - # model. However, se_ar and hybrid doesn't have this method. raise NotImplementedError @abstractmethod @@ -174,18 +138,18 @@ def compute_input_stats( ---------- data_coord : list[np.ndarray] The coordinates. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` + :meth:`deepmd.tf.model.model_stat.make_stat_input` data_box : list[np.ndarray] The box. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` + :meth:`deepmd.tf.model.model_stat.make_stat_input` data_atype : list[np.ndarray] - The atom types. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input` + The atom types. Can be generated by :meth:`deepmd.tf.model.model_stat.make_stat_input` natoms_vec : list[np.ndarray] The vector for the number of atoms of the system and different types of - atoms. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input` + atoms. Can be generated by :meth:`deepmd.tf.model.model_stat.make_stat_input` mesh : list[np.ndarray] The mesh for neighbor searching. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` + :meth:`deepmd.tf.model.model_stat.make_stat_input` input_dict : dict[str, list[np.ndarray]] Dictionary for additional input **kwargs @@ -507,5 +471,45 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): The local data refer to the current class """ # call subprocess - cls = cls.get_class_by_input(local_jdata) + cls = cls.get_class_by_type(j_get_type(local_jdata, cls.__name__)) return cls.update_sel(global_jdata, local_jdata) + + @classmethod + def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor": + """Deserialize the model. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Parameters + ---------- + data : dict + The serialized data + suffix : str, optional + Name suffix to identify this descriptor + + Returns + ------- + Descriptor + The deserialized descriptor + """ + if cls is Descriptor: + return Descriptor.get_class_by_type( + j_get_type(data, cls.__name__) + ).deserialize(data, suffix=suffix) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Returns + ------- + dict + The serialized data + suffix : str, optional + Name suffix to identify this descriptor + """ + raise NotImplementedError("Not implemented in class %s" % self.__name__) diff --git a/deepmd/descriptor/hybrid.py b/deepmd/tf/descriptor/hybrid.py similarity index 87% rename from deepmd/descriptor/hybrid.py rename to deepmd/tf/descriptor/hybrid.py index 5ee5ec884b..4e7eaa2c92 100644 --- a/deepmd/descriptor/hybrid.py +++ b/deepmd/tf/descriptor/hybrid.py @@ -1,26 +1,32 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + Any, + Dict, List, Optional, Tuple, + Union, ) import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) +from deepmd.utils.version import ( + check_version_compatibility, +) -# from deepmd.descriptor import DescrptLocFrame -# from deepmd.descriptor import DescrptSeA -# from deepmd.descriptor import DescrptSeT -# from deepmd.descriptor import DescrptSeAEbd -# from deepmd.descriptor import DescrptSeAEf -# from deepmd.descriptor import DescrptSeR +# from deepmd.tf.descriptor import DescrptLocFrame +# from deepmd.tf.descriptor import DescrptSeA +# from deepmd.tf.descriptor import DescrptSeT +# from deepmd.tf.descriptor import DescrptSeAEbd +# from deepmd.tf.descriptor import DescrptSeAEf +# from deepmd.tf.descriptor import DescrptSeR from .descriptor import ( Descriptor, ) @@ -32,13 +38,14 @@ class DescrptHybrid(Descriptor): Parameters ---------- - list : list + list : list : List[Union[Descriptor, Dict[str, Any]]] Build a descriptor from the concatenation of the list of descriptors. + The descriptor can be either an object or a dictionary. """ def __init__( self, - list: list, + list: List[Union[Descriptor, Dict[str, Any]]], multi_task: bool = False, ntypes: Optional[int] = None, spin: Optional[Spin] = None, @@ -146,15 +153,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input mixed_type @@ -434,3 +441,30 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): for sub_jdata in local_jdata["list"] ] return local_jdata_cpy + + def serialize(self, suffix: str = "") -> dict: + return { + "@class": "Descriptor", + "type": "hybrid", + "@version": 1, + "list": [ + descrpt.serialize(suffix=f"{suffix}_{idx}") + for idx, descrpt in enumerate(self.descrpt_list) + ], + } + + @classmethod + def deserialize(cls, data: dict, suffix: str = "") -> "DescrptHybrid": + data = data.copy() + class_name = data.pop("@class") + assert class_name == "Descriptor" + class_type = data.pop("type") + assert class_type == "hybrid" + check_version_compatibility(data.pop("@version"), 1, 1) + obj = cls( + list=[ + Descriptor.deserialize(ii, suffix=f"{suffix}_{idx}") + for idx, ii in enumerate(data["list"]) + ], + ) + return obj diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/tf/descriptor/loc_frame.py similarity index 95% rename from deepmd/descriptor/loc_frame.py rename to deepmd/tf/descriptor/loc_frame.py index 0765be55f8..ee414fc0bb 100644 --- a/deepmd/descriptor/loc_frame.py +++ b/deepmd/tf/descriptor/loc_frame.py @@ -7,17 +7,17 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) @@ -35,11 +35,11 @@ class DescrptLocFrame(Descriptor): ---------- rcut The cut-off radius - sel_a : list[str] + sel_a : list[int] The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor. - sel_r : list[str] + sel_r : list[int] The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. @@ -168,15 +168,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs @@ -343,7 +343,10 @@ def prod_force_virial( tf.summary.histogram("net_derivative", net_deriv) net_deriv_reshape = tf.reshape( net_deriv, - [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)], + [ + np.asarray(-1, dtype=np.int64), + natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64), + ], ) force = op_module.prod_force( net_deriv_reshape, diff --git a/deepmd/tf/descriptor/se.py b/deepmd/tf/descriptor/se.py new file mode 100644 index 0000000000..4232503464 --- /dev/null +++ b/deepmd/tf/descriptor/se.py @@ -0,0 +1,325 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import re +from typing import ( + List, + Set, + Tuple, +) + +from deepmd.dpmodel.utils.network import ( + EmbeddingNet, + NetworkCollection, +) +from deepmd.tf.env import ( + EMBEDDING_NET_PATTERN, + tf, +) +from deepmd.tf.utils.graph import ( + get_embedding_net_variables_from_graph_def, + get_tensor_by_name_from_graph, +) +from deepmd.tf.utils.update_sel import ( + UpdateSel, +) + +from .descriptor import ( + Descriptor, +) + + +class DescrptSe(Descriptor): + """A base class for smooth version of descriptors. + + Notes + ----- + All of these descriptors have an environmental matrix and an + embedding network (:meth:`deepmd.tf.utils.network.embedding_net`), so + they can share some similiar methods without defining them twice. + + Attributes + ---------- + embedding_net_variables : dict + initial embedding network variables + descrpt_reshape : tf.Tensor + the reshaped descriptor + descrpt_deriv : tf.Tensor + the descriptor derivative + rij : tf.Tensor + distances between two atoms + nlist : tf.Tensor + the neighbor list + + """ + + def _identity_tensors(self, suffix: str = "") -> None: + """Identify tensors which are expected to be stored and restored. + + Notes + ----- + These tensors will be indentitied: + self.descrpt_reshape : o_rmat + self.descrpt_deriv : o_rmat_deriv + self.rij : o_rij + self.nlist : o_nlist + Thus, this method should be called during building the descriptor and + after these tensors are initialized. + + Parameters + ---------- + suffix : str + The suffix of the scope + """ + self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix) + self.descrpt_deriv = tf.identity( + self.descrpt_deriv, name="o_rmat_deriv" + suffix + ) + self.rij = tf.identity(self.rij, name="o_rij" + suffix) + self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix) + + def get_tensor_names(self, suffix: str = "") -> Tuple[str]: + """Get names of tensors. + + Parameters + ---------- + suffix : str + The suffix of the scope + + Returns + ------- + Tuple[str] + Names of tensors + """ + return ( + f"o_rmat{suffix}:0", + f"o_rmat_deriv{suffix}:0", + f"o_rij{suffix}:0", + f"o_nlist{suffix}:0", + ) + + def pass_tensors_from_frz_model( + self, + descrpt_reshape: tf.Tensor, + descrpt_deriv: tf.Tensor, + rij: tf.Tensor, + nlist: tf.Tensor, + ): + """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. + + Parameters + ---------- + descrpt_reshape + The passed descrpt_reshape tensor + descrpt_deriv + The passed descrpt_deriv tensor + rij + The passed rij tensor + nlist + The passed nlist tensor + """ + self.rij = rij + self.nlist = nlist + self.descrpt_deriv = descrpt_deriv + self.descrpt_reshape = descrpt_reshape + + def init_variables( + self, + graph: tf.Graph, + graph_def: tf.GraphDef, + suffix: str = "", + ) -> None: + """Init the embedding net variables with the given dict. + + Parameters + ---------- + graph : tf.Graph + The input frozen model graph + graph_def : tf.GraphDef + The input frozen model graph_def + suffix : str, optional + The suffix of the scope + """ + self.embedding_net_variables = get_embedding_net_variables_from_graph_def( + graph_def, suffix=suffix + ) + self.davg = get_tensor_by_name_from_graph( + graph, "descrpt_attr%s/t_avg" % suffix + ) + self.dstd = get_tensor_by_name_from_graph( + graph, "descrpt_attr%s/t_std" % suffix + ) + + @property + def precision(self) -> tf.DType: + """Precision of filter network.""" + return self.filter_precision + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + # default behavior is to update sel which is a list + local_jdata_cpy = local_jdata.copy() + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False) + + def serialize_network( + self, + ntypes: int, + ndim: int, + in_dim: int, + neuron: List[int], + activation_function: str, + resnet_dt: bool, + variables: dict, + excluded_types: Set[Tuple[int, int]] = set(), + suffix: str = "", + ) -> dict: + """Serialize network. + + Parameters + ---------- + ntypes : int + The number of types + ndim : int + The dimension of elements + in_dim : int + The input dimension + neuron : List[int] + The neuron list + activation_function : str + The activation function + resnet_dt : bool + Whether to use resnet + variables : dict + The input variables + excluded_types : Set[Tuple[int, int]], optional + The excluded types + suffix : str, optional + The suffix of the scope + + Returns + ------- + dict + The converted network data + """ + embeddings = NetworkCollection( + ntypes=ntypes, + ndim=ndim, + network_type="embedding_network", + ) + if ndim == 2: + for type_i, type_j in excluded_types: + # initialize an empty network for the excluded types + embeddings[(type_i, type_j)] = EmbeddingNet( + in_dim=in_dim, + neuron=neuron, + activation_function=activation_function, + resnet_dt=resnet_dt, + precision=self.precision.name, + ) + embeddings[(type_j, type_i)] = EmbeddingNet( + in_dim=in_dim, + neuron=neuron, + activation_function=activation_function, + resnet_dt=resnet_dt, + precision=self.precision.name, + ) + embeddings[(type_i, type_j)].clear() + embeddings[(type_j, type_i)].clear() + + if suffix != "": + embedding_net_pattern = ( + EMBEDDING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") + ) + else: + embedding_net_pattern = EMBEDDING_NET_PATTERN + for key, value in variables.items(): + m = re.search(embedding_net_pattern, key) + m = [mm for mm in m.groups() if mm is not None] + typei = m[0] + typej = "_".join(m[3:]) if len(m[3:]) else "all" + layer_idx = int(m[2]) - 1 + weight_name = m[1] + if ndim == 0: + network_idx = () + elif ndim == 1: + network_idx = (int(typej),) + elif ndim == 2: + network_idx = (int(typei), int(typej)) + else: + raise ValueError(f"Invalid ndim: {ndim}") + if embeddings[network_idx] is None: + # initialize the network if it is not initialized + embeddings[network_idx] = EmbeddingNet( + in_dim=in_dim, + neuron=neuron, + activation_function=activation_function, + resnet_dt=resnet_dt, + precision=self.precision.name, + ) + assert embeddings[network_idx] is not None + if weight_name == "idt": + value = value.ravel() + embeddings[network_idx][layer_idx][weight_name] = value + return embeddings.serialize() + + @classmethod + def deserialize_network(cls, data: dict, suffix: str = "") -> dict: + """Deserialize network. + + Parameters + ---------- + data : dict + The input network data + suffix : str, optional + The suffix of the scope + + Returns + ------- + variables : dict + The input variables + """ + embedding_net_variables = {} + embeddings = NetworkCollection.deserialize(data) + for ii in range(embeddings.ntypes**embeddings.ndim): + net_idx = [] + rest_ii = ii + for _ in range(embeddings.ndim): + net_idx.append(rest_ii % embeddings.ntypes) + rest_ii //= embeddings.ntypes + net_idx = tuple(net_idx) + if embeddings.ndim in (0, 1): + key0 = "all" + key1 = f"_{ii}" + elif embeddings.ndim == 2: + key0 = f"{net_idx[0]}" + key1 = f"_{net_idx[1]}" + else: + raise ValueError(f"Invalid ndim: {embeddings.ndim}") + network = embeddings[net_idx] + assert network is not None + for layer_idx, layer in enumerate(network.layers): + embedding_net_variables[ + f"filter_type_{key0}{suffix}/matrix_{layer_idx + 1}{key1}" + ] = layer.w + embedding_net_variables[ + f"filter_type_{key0}{suffix}/bias_{layer_idx + 1}{key1}" + ] = layer.b + if layer.idt is not None: + embedding_net_variables[ + f"filter_type_{key0}{suffix}/idt_{layer_idx + 1}{key1}" + ] = layer.idt.reshape(1, -1) + else: + # prevent keyError + embedding_net_variables[ + f"filter_type_{key0}{suffix}/idt_{layer_idx + 1}{key1}" + ] = 0.0 + return embedding_net_variables diff --git a/deepmd/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py similarity index 90% rename from deepmd/descriptor/se_a.py rename to deepmd/tf/descriptor/se_a.py index 721bb0d534..7b22b3efd2 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/tf/descriptor/se_a.py @@ -7,20 +7,23 @@ import numpy as np -from deepmd.common import ( +from deepmd.dpmodel.utils.env_mat import ( + EnvMat, +) +from deepmd.tf.common import ( cast_precision, get_activation_func, get_np_precision, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.nvnmd.descriptor.se_a import ( +from deepmd.tf.nvnmd.descriptor.se_a import ( build_davg_dstd, build_op_descriptor, check_switch_range, @@ -28,40 +31,43 @@ filter_GR2D, filter_lower_R42GR, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.utils.compress import ( +from deepmd.tf.utils.compress import ( get_extra_side_embedding_net_variable, get_two_side_type_embedding, get_type_embedding, make_data, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_extra_embedding_net_suffix, get_extra_embedding_net_variables_from_graph_def, get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, embedding_net_rand_seed_shift, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.tabulate import ( +from deepmd.tf.utils.tabulate import ( DPTabulate, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( embed_atom_type, ) +from deepmd.utils.version import ( + check_version_compatibility, +) from .descriptor import ( Descriptor, @@ -112,7 +118,7 @@ class DescrptSeA(DescrptSe): :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at - :meth:`deepmd.utils.network.embedding_net`. + :meth:`deepmd.tf.utils.network.embedding_net`. Parameters ---------- @@ -120,7 +126,7 @@ class DescrptSeA(DescrptSe): The cut-off radius :math:`r_c` rcut_smth From where the environment matrix should be smoothed :math:`r_s` - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` @@ -148,6 +154,8 @@ class DescrptSeA(DescrptSe): Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed multi_task If the model has multi fitting nets to train. + env_protection: float + Protection parameter to prevent division by zero errors during environment matrix calculations. References ---------- @@ -161,7 +169,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, @@ -176,6 +184,7 @@ def __init__( multi_task: bool = False, spin: Optional[Spin] = None, stripped_type_embedding: bool = False, + env_protection: float = 0.0, # not implement!! **kwargs, ) -> None: """Constructor.""" @@ -183,6 +192,8 @@ def __init__( raise RuntimeError( f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!" ) + if env_protection != 0.0: + raise NotImplementedError("env_protection != 0.0 is not supported.") self.sel_a = sel self.rcut_r = rcut self.rcut_r_smth = rcut_smth @@ -195,9 +206,12 @@ def __init__( self.trainable = trainable self.compress_activation_fn = get_activation_func(activation_function) self.filter_activation_fn = get_activation_func(activation_function) + self.activation_function_name = activation_function self.filter_precision = get_precision(precision) self.filter_np_precision = get_np_precision(precision) + self.orig_exclude_types = exclude_types self.exclude_types = set() + self.env_protection = env_protection for tt in exclude_types: assert len(tt) == 2 self.exclude_types.add((tt[0], tt[1])) @@ -333,15 +347,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs @@ -708,7 +722,10 @@ def prod_force_virial( tf.summary.histogram("net_derivative", net_deriv) net_deriv_reshape = tf.reshape( net_deriv, - [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)], + [ + np.asarray(-1, dtype=np.int64), + natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64), + ], ) force = op_module.prod_force_se_a( net_deriv_reshape, @@ -1342,3 +1359,109 @@ def explicit_ntypes(self) -> bool: if self.stripped_type_embedding: return True return False + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + if cls is not DescrptSeA: + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class", None) + data.pop("type", None) + embedding_net_variables = cls.deserialize_network( + data.pop("embeddings"), suffix=suffix + ) + data.pop("env_mat") + variables = data.pop("@variables") + descriptor = cls(**data) + descriptor.embedding_net_variables = embedding_net_variables + descriptor.davg = variables["davg"].reshape( + descriptor.ntypes, descriptor.ndescrpt + ) + descriptor.dstd = variables["dstd"].reshape( + descriptor.ntypes, descriptor.ndescrpt + ) + return descriptor + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + Parameters + ---------- + suffix : str, optional + The suffix of the scope + + Returns + ------- + dict + The serialized data + """ + if type(self) is not DescrptSeA: + raise NotImplementedError( + "Not implemented in class %s" % self.__class__.__name__ + ) + if self.stripped_type_embedding: + raise NotImplementedError( + "stripped_type_embedding is unsupported by the native model" + ) + if (self.original_sel != self.sel_a).any(): + raise NotImplementedError( + "Adjusting sel is unsupported by the native model" + ) + if self.embedding_net_variables is None: + raise RuntimeError("init_variables must be called before serialize") + if self.spin is not None: + raise NotImplementedError("spin is unsupported") + assert self.davg is not None + assert self.dstd is not None + # TODO: tf: handle type embedding in DescrptSeA.serialize + # not sure how to handle type embedding - type embedding is not a model parameter, + # but instead a part of the input data. Maybe the interface should be refactored... + + return { + "@class": "Descriptor", + "type": "se_e2_a", + "@version": 1, + "rcut": self.rcut_r, + "rcut_smth": self.rcut_r_smth, + "sel": self.sel_a, + "neuron": self.filter_neuron, + "axis_neuron": self.n_axis_neuron, + "resnet_dt": self.filter_resnet_dt, + "trainable": self.trainable, + "type_one_side": self.type_one_side, + "exclude_types": list(self.orig_exclude_types), + "env_protection": self.env_protection, + "set_davg_zero": self.set_davg_zero, + "activation_function": self.activation_function_name, + "precision": self.filter_precision.name, + "embeddings": self.serialize_network( + ntypes=self.ntypes, + ndim=(1 if self.type_one_side else 2), + in_dim=1, + neuron=self.filter_neuron, + activation_function=self.activation_function_name, + resnet_dt=self.filter_resnet_dt, + variables=self.embedding_net_variables, + excluded_types=self.exclude_types, + suffix=suffix, + ), + "env_mat": EnvMat(self.rcut_r, self.rcut_r_smth).serialize(), + "@variables": { + "davg": self.davg.reshape(self.ntypes, self.nnei_a, 4), + "dstd": self.dstd.reshape(self.ntypes, self.nnei_a, 4), + }, + "spin": self.spin, + } diff --git a/deepmd/descriptor/se_a_ebd.py b/deepmd/tf/descriptor/se_a_ebd.py similarity index 99% rename from deepmd/descriptor/se_a_ebd.py rename to deepmd/tf/descriptor/se_a_ebd.py index 4816ec1569..f252bf114c 100644 --- a/deepmd/descriptor/se_a_ebd.py +++ b/deepmd/tf/descriptor/se_a_ebd.py @@ -6,15 +6,15 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, op_module, tf, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, one_layer, ) @@ -38,7 +38,7 @@ class DescrptSeAEbd(DescrptSeA): The cut-off radius rcut_smth From where the environment matrix should be smoothed - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net @@ -74,7 +74,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, diff --git a/deepmd/descriptor/se_a_ebd_v2.py b/deepmd/tf/descriptor/se_a_ebd_v2.py similarity index 96% rename from deepmd/descriptor/se_a_ebd_v2.py rename to deepmd/tf/descriptor/se_a_ebd_v2.py index c6e3cebc71..0d2acbc9d5 100644 --- a/deepmd/descriptor/se_a_ebd_v2.py +++ b/deepmd/tf/descriptor/se_a_ebd_v2.py @@ -5,7 +5,7 @@ Optional, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) @@ -31,7 +31,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, diff --git a/deepmd/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py similarity index 97% rename from deepmd/descriptor/se_a_ef.py rename to deepmd/tf/descriptor/se_a_ef.py index 32a62b48f3..f1201d30fb 100644 --- a/deepmd/descriptor/se_a_ef.py +++ b/deepmd/tf/descriptor/se_a_ef.py @@ -7,17 +7,17 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) @@ -42,7 +42,7 @@ class DescrptSeAEf(DescrptSe): The cut-off radius rcut_smth From where the environment matrix should be smoothed - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net @@ -74,7 +74,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, @@ -180,15 +180,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs @@ -305,7 +305,7 @@ def __init__( op, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/tf/descriptor/se_a_mask.py similarity index 95% rename from deepmd/descriptor/se_a_mask.py rename to deepmd/tf/descriptor/se_a_mask.py index cc2e6b4fc8..d1ae5d7bad 100644 --- a/deepmd/descriptor/se_a_mask.py +++ b/deepmd/tf/descriptor/se_a_mask.py @@ -10,18 +10,18 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net_rand_seed_shift, ) @@ -73,7 +73,7 @@ class DescrptSeAMask(DescrptSeA): :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at - :meth:`deepmd.utils.network.embedding_net`. + :meth:`deepmd.tf.utils.network.embedding_net`. Specially for descriptor se_a_mask is a concise implementation of se_a. The difference is that se_a_mask only considered a non-pbc system. And accept a mask matrix to indicate the atom i in frame j is a real atom or not. @@ -82,7 +82,7 @@ class DescrptSeAMask(DescrptSeA): Parameters ---------- - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the neighbor list. neuron : list[int] Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` @@ -117,7 +117,7 @@ class DescrptSeAMask(DescrptSeA): def __init__( self, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], axis_neuron: int = 8, resnet_dt: bool = False, @@ -235,24 +235,23 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs Additional keyword arguments. """ - """ - TODO: Since not all input atoms are real in se_a_mask, - statistics should be reimplemented for se_a_mask descriptor. - """ + # TODO: implement compute_input_stats for DescrptSeAMask + # Since not all input atoms are real in se_a_mask, + # statistics should be reimplemented for se_a_mask descriptor. self.davg = None self.dstd = None diff --git a/deepmd/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py similarity index 96% rename from deepmd/descriptor/se_atten.py rename to deepmd/tf/descriptor/se_atten.py index 1ceda23065..51e34e9b08 100644 --- a/deepmd/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -12,11 +12,11 @@ Version, ) -from deepmd.common import ( +from deepmd.tf.common import ( cast_precision, get_np_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, TF_VERSION, @@ -24,7 +24,7 @@ op_module, tf, ) -from deepmd.nvnmd.descriptor.se_atten import ( +from deepmd.tf.nvnmd.descriptor.se_atten import ( build_davg_dstd, build_op_descriptor, check_switch_range, @@ -32,31 +32,34 @@ filter_GR2D, filter_lower_R42GR, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.utils.compress import ( +from deepmd.tf.utils.compress import ( get_extra_side_embedding_net_variable, get_two_side_type_embedding, make_data, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_attention_layer_variables_from_graph_def, get_extra_embedding_net_suffix, get_extra_embedding_net_variables_from_graph_def, get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, one_layer, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) -from deepmd.utils.tabulate import ( +from deepmd.tf.utils.tabulate import ( DPTabulate, ) +from deepmd.tf.utils.update_sel import ( + UpdateSel, +) from .descriptor import ( Descriptor, @@ -68,6 +71,7 @@ log = logging.getLogger(__name__) +@Descriptor.register("dpa1") @Descriptor.register("se_atten") class DescrptSeAtten(DescrptSeA): r"""Smooth version descriptor with attention. @@ -78,7 +82,7 @@ class DescrptSeAtten(DescrptSeA): The cut-off radius :math:`r_c` rcut_smth From where the environment matrix should be smoothed :math:`r_s` - sel : list[str] + sel : int sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` @@ -152,6 +156,16 @@ def __init__( multi_task: bool = False, stripped_type_embedding: bool = False, smooth_type_embdding: bool = False, + # not implemented + post_ln=True, + ffn=False, + ffn_embed_dim=1024, + scaling_factor=1.0, + head_num=1, + normalize=True, + temperature=None, + return_rot=False, + concat_output_tebd: bool = True, **kwargs, ) -> None: if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding): @@ -159,6 +173,24 @@ def __init__( "Set 'set_davg_zero' False in descriptor 'se_atten' " "may cause unexpected incontinuity during model inference!" ) + if not post_ln: + raise NotImplementedError("post_ln is not supported.") + if ffn: + raise NotImplementedError("ffn is not supported.") + if ffn_embed_dim != 1024: + raise NotImplementedError("ffn_embed_dim is not supported.") + if scaling_factor != 1.0: + raise NotImplementedError("scaling_factor is not supported.") + if head_num != 1: + raise NotImplementedError("head_num is not supported.") + if not normalize: + raise NotImplementedError("normalize is not supported.") + if temperature is not None: + raise NotImplementedError("temperature is not supported.") + if return_rot: + raise NotImplementedError("return_rot is not supported.") + if not concat_output_tebd: + raise NotImplementedError("concat_output_tebd is not supported.") DescrptSeA.__init__( self, rcut, @@ -269,16 +301,16 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec The vector for the number of atoms of the system and different types of atoms. If mixed_type is True, this para is blank. See real_natoms_vec. mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input mixed_type @@ -990,6 +1022,7 @@ def _attention_layers( input_xyz = tf.keras.layers.LayerNormalization( beta_initializer=tf.constant_initializer(self.beta[i]), gamma_initializer=tf.constant_initializer(self.gamma[i]), + dtype=self.filter_precision, )(input_xyz) # input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n) return input_xyz @@ -1339,7 +1372,7 @@ def build_type_exclude_mask( Notes ----- This method has the similiar way to build the type exclude mask as - :meth:`deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask`. + :meth:`deepmd.tf.descriptor.descriptor.Descriptor.build_type_exclude_mask`. The mathmatical expression has been explained in that method. The difference is that the attention descriptor has provided the type of the neighbors (idx_j) that is not in order, so we use it from an extra @@ -1373,7 +1406,7 @@ def build_type_exclude_mask( See Also -------- - deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask + deepmd.tf.descriptor.descriptor.Descriptor.build_type_exclude_mask """ # generate a mask # op returns ntypes when the neighbor doesn't exist, so we need to add 1 @@ -1424,9 +1457,5 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): local_jdata : dict The local data refer to the current class """ - from deepmd.entrypoints.train import ( - update_one_sel, - ) - local_jdata_cpy = local_jdata.copy() - return update_one_sel(global_jdata, local_jdata_cpy, True) + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True) diff --git a/deepmd/descriptor/se_atten_v2.py b/deepmd/tf/descriptor/se_atten_v2.py similarity index 99% rename from deepmd/descriptor/se_atten_v2.py rename to deepmd/tf/descriptor/se_atten_v2.py index 0e1a70262f..784e02d84d 100644 --- a/deepmd/descriptor/se_atten_v2.py +++ b/deepmd/tf/descriptor/se_atten_v2.py @@ -25,7 +25,7 @@ class DescrptSeAttenV2(DescrptSeAtten): The cut-off radius :math:`r_c` rcut_smth From where the environment matrix should be smoothed :math:`r_s` - sel : list[str] + sel : int sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` diff --git a/deepmd/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py similarity index 83% rename from deepmd/descriptor/se_r.py rename to deepmd/tf/descriptor/se_r.py index ae926c339f..1443914aab 100644 --- a/deepmd/descriptor/se_r.py +++ b/deepmd/tf/descriptor/se_r.py @@ -7,34 +7,40 @@ import numpy as np -from deepmd.common import ( +from deepmd.dpmodel.utils.env_mat import ( + EnvMat, +) +from deepmd.tf.common import ( cast_precision, get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, embedding_net_rand_seed_shift, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.tabulate import ( +from deepmd.tf.utils.tabulate import ( DPTabulate, ) +from deepmd.utils.version import ( + check_version_compatibility, +) from .descriptor import ( Descriptor, @@ -57,7 +63,7 @@ class DescrptSeR(DescrptSe): The cut-off radius rcut_smth From where the environment matrix should be smoothed - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net @@ -85,7 +91,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], resnet_dt: bool = False, trainable: bool = True, @@ -98,6 +104,7 @@ def __init__( uniform_seed: bool = False, multi_task: bool = False, spin: Optional[Spin] = None, + env_protection: float = 0.0, # not implement!! **kwargs, ) -> None: """Constructor.""" @@ -105,6 +112,8 @@ def __init__( raise RuntimeError( f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!" ) + if env_protection != 0.0: + raise NotImplementedError("env_protection != 0.0 is not supported.") self.sel_r = sel self.rcut = rcut self.rcut_smth = rcut_smth @@ -115,9 +124,11 @@ def __init__( self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron) self.trainable = trainable self.filter_activation_fn = get_activation_func(activation_function) + self.activation_function_name = activation_function self.filter_precision = get_precision(precision) - exclude_types = exclude_types + self.orig_exclude_types = exclude_types self.exclude_types = set() + self.env_protection = env_protection for tt in exclude_types: assert len(tt) == 2 self.exclude_types.add((tt[0], tt[1])) @@ -235,15 +246,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs @@ -500,7 +511,10 @@ def prod_force_virial( tf.summary.histogram("net_derivative", net_deriv) net_deriv_reshape = tf.reshape( net_deriv, - [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)], + [ + np.asarray(-1, dtype=np.int64), + natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64), + ], ) force = op_module.prod_force_se_r( net_deriv_reshape, self.descrpt_deriv, self.nlist, natoms @@ -695,3 +709,97 @@ def _filter_r( result = tf.reduce_mean(xyz_scatter, axis=1) * res_rescale return result + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + if cls is not DescrptSeR: + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + embedding_net_variables = cls.deserialize_network( + data.pop("embeddings"), suffix=suffix + ) + data.pop("env_mat") + variables = data.pop("@variables") + descriptor = cls(**data) + descriptor.embedding_net_variables = embedding_net_variables + descriptor.davg = variables["davg"].reshape( + descriptor.ntypes, descriptor.ndescrpt + ) + descriptor.dstd = variables["dstd"].reshape( + descriptor.ntypes, descriptor.ndescrpt + ) + return descriptor + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + Parameters + ---------- + suffix : str, optional + The suffix of the scope + + Returns + ------- + dict + The serialized data + """ + if type(self) is not DescrptSeR: + raise NotImplementedError( + "Not implemented in class %s" % self.__class__.__name__ + ) + if self.embedding_net_variables is None: + raise RuntimeError("init_variables must be called before serialize") + if self.spin is not None: + raise NotImplementedError("spin is unsupported") + assert self.davg is not None + assert self.dstd is not None + # TODO: tf: handle type embedding in DescrptSeR.serialize + # not sure how to handle type embedding - type embedding is not a model parameter, + # but instead a part of the input data. Maybe the interface should be refactored... + return { + "@class": "Descriptor", + "type": "se_r", + "@version": 1, + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "sel": self.sel_r, + "neuron": self.filter_neuron, + "resnet_dt": self.filter_resnet_dt, + "trainable": self.trainable, + "type_one_side": self.type_one_side, + "exclude_types": list(self.orig_exclude_types), + "env_protection": self.env_protection, + "set_davg_zero": self.set_davg_zero, + "activation_function": self.activation_function_name, + "precision": self.filter_precision.name, + "embeddings": self.serialize_network( + ntypes=self.ntypes, + ndim=(1 if self.type_one_side else 2), + in_dim=1, + neuron=self.filter_neuron, + activation_function=self.activation_function_name, + resnet_dt=self.filter_resnet_dt, + variables=self.embedding_net_variables, + excluded_types=self.exclude_types, + suffix=suffix, + ), + "env_mat": EnvMat(self.rcut, self.rcut_smth).serialize(), + "@variables": { + "davg": self.davg.reshape(self.ntypes, self.nnei_r, 1), + "dstd": self.dstd.reshape(self.ntypes, self.nnei_r, 1), + }, + "spin": self.spin, + } diff --git a/deepmd/descriptor/se_t.py b/deepmd/tf/descriptor/se_t.py similarity index 97% rename from deepmd/descriptor/se_t.py rename to deepmd/tf/descriptor/se_t.py index d0c9fcbc2e..4f6cda6c9c 100644 --- a/deepmd/descriptor/se_t.py +++ b/deepmd/tf/descriptor/se_t.py @@ -7,29 +7,29 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( cast_precision, get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, embedding_net_rand_seed_shift, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) -from deepmd.utils.tabulate import ( +from deepmd.tf.utils.tabulate import ( DPTabulate, ) @@ -56,7 +56,7 @@ class DescrptSeT(DescrptSe): The cut-off radius rcut_smth From where the environment matrix should be smoothed - sel : list[str] + sel : list[int] sel[i] specifies the maxmum number of type i atoms in the cut-off radius neuron : list[int] Number of neurons in each hidden layers of the embedding net @@ -81,7 +81,7 @@ def __init__( self, rcut: float, rcut_smth: float, - sel: List[str], + sel: List[int], neuron: List[int] = [24, 48, 96], resnet_dt: bool = False, trainable: bool = True, @@ -225,15 +225,15 @@ def compute_input_stats( Parameters ---------- data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input + The coordinates. Can be generated by deepmd.tf.model.make_stat_input data_box - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input data_atype - The atom types. Can be generated by deepmd.model.make_stat_input + The atom types. Can be generated by deepmd.tf.model.make_stat_input natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input input_dict Dictionary for additional input **kwargs @@ -513,7 +513,10 @@ def prod_force_virial( [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape) net_deriv_reshape = tf.reshape( net_deriv, - [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)], + [ + np.asarray(-1, dtype=np.int64), + natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64), + ], ) force = op_module.prod_force_se_a( net_deriv_reshape, diff --git a/deepmd/tf/entrypoints/__init__.py b/deepmd/tf/entrypoints/__init__.py new file mode 100644 index 0000000000..9c3a8b31e1 --- /dev/null +++ b/deepmd/tf/entrypoints/__init__.py @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Submodule that contains all the DeePMD-Kit entry point scripts.""" + +from ..infer.model_devi import ( + make_model_devi, +) +from .compress import ( + compress, +) +from .convert import ( + convert, +) +from .doc import ( + doc_train_input, +) +from .freeze import ( + freeze, +) +from .gui import ( + start_dpgui, +) +from .neighbor_stat import ( + neighbor_stat, +) +from .test import ( + test, +) + +# import `train` as `train_dp` to avoid the conflict of the +# module name `train` and the function name `train` +from .train import train as train_dp +from .transfer import ( + transfer, +) + +__all__ = [ + "doc_train_input", + "freeze", + "test", + "train_dp", + "transfer", + "compress", + "doc_train_input", + "make_model_devi", + "convert", + "neighbor_stat", + "start_dpgui", +] diff --git a/deepmd/entrypoints/compress.py b/deepmd/tf/entrypoints/compress.py similarity index 93% rename from deepmd/entrypoints/compress.py rename to deepmd/tf/entrypoints/compress.py index 61d6dfcb44..1f2bbc93a0 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/tf/entrypoints/compress.py @@ -8,34 +8,35 @@ Optional, ) -from deepmd.common import ( +from deepmd.tf.common import ( j_loader, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_ENER_FLOAT_PRECISION, tf, ) -from deepmd.utils.argcheck import ( +from deepmd.tf.utils.argcheck import ( normalize, ) -from deepmd.utils.compat import ( +from deepmd.tf.utils.compat import ( update_deepmd_input, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphTooLargeError, GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, load_graph_def, ) +from deepmd.tf.utils.update_sel import ( + UpdateSel, +) from .freeze import ( freeze, ) from .train import ( - get_min_nbor_dist, - get_rcut, train, ) @@ -115,7 +116,10 @@ def compress( log.info("stage 0: compute the min_nbor_dist") jdata = j_loader(training_script) jdata = update_deepmd_input(jdata) - t_min_nbor_dist = get_min_nbor_dist(jdata, get_rcut(jdata)) + update_sel = UpdateSel() + t_min_nbor_dist = update_sel.get_min_nbor_dist( + jdata, update_sel.get_rcut(jdata) + ) _check_compress_type(graph) diff --git a/deepmd/entrypoints/convert.py b/deepmd/tf/entrypoints/convert.py similarity index 97% rename from deepmd/entrypoints/convert.py rename to deepmd/tf/entrypoints/convert.py index bea047ba72..17c8667362 100644 --- a/deepmd/entrypoints/convert.py +++ b/deepmd/tf/entrypoints/convert.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd.utils.convert import ( +from deepmd.tf.utils.convert import ( convert_10_to_21, convert_012_to_21, convert_12_to_21, diff --git a/deepmd/tf/entrypoints/doc.py b/deepmd/tf/entrypoints/doc.py new file mode 100644 index 0000000000..941f989109 --- /dev/null +++ b/deepmd/tf/entrypoints/doc.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.entrypoints.doc import ( + doc_train_input, +) + +__all__ = ["doc_train_input"] diff --git a/deepmd/entrypoints/freeze.py b/deepmd/tf/entrypoints/freeze.py similarity index 93% rename from deepmd/entrypoints/freeze.py rename to deepmd/tf/entrypoints/freeze.py index 22f3cb80b4..3d2a609797 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/tf/entrypoints/freeze.py @@ -12,6 +12,9 @@ from os.path import ( abspath, ) +from pathlib import ( + Path, +) from typing import ( List, Optional, @@ -21,22 +24,22 @@ import google.protobuf.message # load grad of force module -import deepmd.op # noqa: F401 -from deepmd.env import ( +import deepmd.tf.op # noqa: F401 +from deepmd.tf.env import ( FITTING_NET_PATTERN, REMOVE_SUFFIX_DICT, tf, ) -from deepmd.nvnmd.entrypoints.freeze import ( +from deepmd.tf.nvnmd.entrypoints.freeze import ( save_weight, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphTooLargeError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_pattern_nodes_from_graph_def, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) @@ -149,10 +152,8 @@ def _modify_model_suffix(output_graph_def, out_suffix, freeze_type): else: jdata["training"]["training_data"] = {} log.warning( - "The fitting net {} has no training data in input script, resulting in " - "untrained frozen model, and cannot be compressed directly! ".format( - out_suffix - ) + f"The fitting net {out_suffix} has no training data in input script, resulting in " + "untrained frozen model, and cannot be compressed directly! " ) # loss if "loss_dict" in jdata: @@ -356,13 +357,21 @@ def freeze_graph( output_node = _make_node_names( freeze_type, modifier, out_suffix=out_suffix, node_names=node_names ) + # see #3334 + optional_node = [ + "train_attr/min_nbor_dist", + "fitting_attr/aparam_nall", + "spin_attr/ntypes_spin", + ] different_set = set(output_node) - set(input_node) if different_set: - log.warning( - "The following nodes are not in the graph: %s. " - "Skip freezeing these nodes. You may be freezing " - "a checkpoint generated by an old version." % different_set - ) + different_set -= set(optional_node) + if different_set: + log.warning( + "The following nodes are not in the graph: %s. " + "Skip freezeing these nodes. You may be freezing " + "a checkpoint generated by an old version." % different_set + ) # use intersection as output list output_node = list(set(output_node) & set(input_node)) log.info(f"The following nodes will be frozen: {output_node}") @@ -479,7 +488,7 @@ def freeze( Parameters ---------- checkpoint_folder : str - location of the folder with model + location of either the folder with checkpoint or the checkpoint prefix output : str output file name node_names : Optional[str], optional @@ -492,8 +501,11 @@ def freeze( other arguments """ # We retrieve our checkpoint fullpath - checkpoint = tf.train.get_checkpoint_state(checkpoint_folder) - input_checkpoint = checkpoint.model_checkpoint_path + if Path(checkpoint_folder).is_dir(): + checkpoint = tf.train.get_checkpoint_state(checkpoint_folder) + input_checkpoint = checkpoint.model_checkpoint_path + else: + input_checkpoint = checkpoint_folder # expand the output file to full path output_graph = abspath(output) diff --git a/deepmd/tf/entrypoints/gui.py b/deepmd/tf/entrypoints/gui.py new file mode 100644 index 0000000000..ffeee29f7d --- /dev/null +++ b/deepmd/tf/entrypoints/gui.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.entrypoints.gui import ( + start_dpgui, +) + +__all__ = ["start_dpgui"] diff --git a/deepmd/entrypoints/ipi.py b/deepmd/tf/entrypoints/ipi.py similarity index 95% rename from deepmd/entrypoints/ipi.py rename to deepmd/tf/entrypoints/ipi.py index da287ff3de..1631a35c2e 100644 --- a/deepmd/entrypoints/ipi.py +++ b/deepmd/tf/entrypoints/ipi.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Use dp_ipi inside the Python package.""" + import os import subprocess import sys @@ -7,7 +8,7 @@ List, ) -from deepmd.lmp import ( +from deepmd.tf.lmp import ( get_op_dir, ) diff --git a/deepmd/tf/entrypoints/main.py b/deepmd/tf/entrypoints/main.py new file mode 100644 index 0000000000..493e5b7aa4 --- /dev/null +++ b/deepmd/tf/entrypoints/main.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""DeePMD-Kit entry point module.""" + +import argparse +from pathlib import ( + Path, +) +from typing import ( + List, + Optional, + Union, +) + +from deepmd.backend.suffix import ( + format_model_suffix, +) +from deepmd.main import ( + get_ll, + main_parser, + parse_args, +) +from deepmd.tf.common import ( + clear_session, +) +from deepmd.tf.entrypoints import ( + compress, + convert, + freeze, + train_dp, + transfer, +) +from deepmd.tf.loggers import ( + set_log_handles, +) +from deepmd.tf.nvnmd.entrypoints.train import ( + train_nvnmd, +) + +__all__ = ["main", "parse_args", "get_ll", "main_parser"] + + +def main(args: Optional[Union[List[str], argparse.Namespace]] = None): + """DeePMD-Kit entry point. + + Parameters + ---------- + args : List[str] or argparse.Namespace, optional + list of command line arguments, used to avoid calling from the subprocess, + as it is quite slow to import tensorflow; if Namespace is given, it will + be used directly + + Raises + ------ + RuntimeError + if no command was input + """ + if args is not None: + clear_session() + + if not isinstance(args, argparse.Namespace): + args = parse_args(args=args) + + # do not set log handles for None, it is useless + # log handles for train will be set separatelly + # when the use of MPI will be determined in `RunOptions` + if args.command not in (None, "train"): + set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None) + + dict_args = vars(args) + + if args.command == "train": + train_dp(**dict_args) + elif args.command == "freeze": + dict_args["output"] = format_model_suffix( + dict_args["output"], preferred_backend=args.backend, strict_prefer=True + ) + freeze(**dict_args) + elif args.command == "transfer": + transfer(**dict_args) + elif args.command == "compress": + compress(**dict_args) + elif args.command == "convert-from": + convert(**dict_args) + elif args.command == "train-nvnmd": # nvnmd + train_nvnmd(**dict_args) + elif args.command is None: + pass + else: + raise RuntimeError(f"unknown command {args.command}") + + if args is not None: + clear_session() diff --git a/deepmd/tf/entrypoints/neighbor_stat.py b/deepmd/tf/entrypoints/neighbor_stat.py new file mode 100644 index 0000000000..5d31cdd179 --- /dev/null +++ b/deepmd/tf/entrypoints/neighbor_stat.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, +) + +__all__ = ["neighbor_stat"] diff --git a/deepmd/tf/entrypoints/test.py b/deepmd/tf/entrypoints/test.py new file mode 100644 index 0000000000..8b4ca64179 --- /dev/null +++ b/deepmd/tf/entrypoints/test.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.entrypoints.test import ( + test, +) + +__all__ = ["test"] diff --git a/deepmd/entrypoints/train.py b/deepmd/tf/entrypoints/train.py similarity index 55% rename from deepmd/entrypoints/train.py rename to deepmd/tf/entrypoints/train.py index 227aa13644..e573423fc3 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/tf/entrypoints/train.py @@ -13,53 +13,41 @@ Optional, ) -from deepmd.common import ( - data_requirement, - expand_sys_str, +from deepmd.tf.common import ( j_loader, j_must_have, ) -from deepmd.env import ( - GLOBAL_ENER_FLOAT_PRECISION, +from deepmd.tf.env import ( reset_default_tf_session_config, tf, ) -from deepmd.infer.data_modifier import ( +from deepmd.tf.infer.data_modifier import ( DipoleChargeModifier, ) -from deepmd.model.model import ( +from deepmd.tf.model.model import ( Model, ) -from deepmd.train.run_options import ( - BUILD, - CITATION, - WELCOME, +from deepmd.tf.train.run_options import ( RunOptions, ) -from deepmd.train.trainer import ( +from deepmd.tf.train.trainer import ( DPTrainer, ) -from deepmd.utils import random as dp_random -from deepmd.utils.argcheck import ( +from deepmd.tf.utils import random as dp_random +from deepmd.tf.utils.argcheck import ( normalize, ) -from deepmd.utils.compat import ( +from deepmd.tf.utils.compat import ( update_deepmd_input, ) -from deepmd.utils.data_system import ( - DeepmdDataSystem, -) -from deepmd.utils.finetune import ( +from deepmd.tf.utils.finetune import ( replace_model_params_with_pretrained_model, ) -from deepmd.utils.multi_init import ( +from deepmd.tf.utils.multi_init import ( replace_model_params_with_frz_multi_model, ) -from deepmd.utils.neighbor_stat import ( - NeighborStat, -) -from deepmd.utils.path import ( - DPPath, +from deepmd.utils.data_system import ( + get_data, ) __all__ = ["train"] @@ -159,9 +147,6 @@ def train( dtype=tf.string, ) - for message in WELCOME + CITATION + BUILD: - log.info(message) - run_opt.print_resource_summary() if origin_type_map is not None: jdata["model"]["origin_type_map"] = origin_type_map @@ -291,53 +276,6 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal log.info("finished compressing") -def get_data(jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=False): - systems = j_must_have(jdata, "systems") - if isinstance(systems, str): - systems = expand_sys_str(systems) - elif isinstance(systems, list): - systems = systems.copy() - help_msg = "Please check your setting for data systems" - # check length of systems - if len(systems) == 0: - msg = "cannot find valid a data system" - log.fatal(msg) - raise OSError(msg, help_msg) - # rougly check all items in systems are valid - for ii in systems: - ii = DPPath(ii) - if not ii.is_dir(): - msg = f"dir {ii} is not a valid dir" - log.fatal(msg) - raise OSError(msg, help_msg) - if not (ii / "type.raw").is_file(): - msg = f"dir {ii} is not a valid data system dir" - log.fatal(msg) - raise OSError(msg, help_msg) - - batch_size = j_must_have(jdata, "batch_size") - sys_probs = jdata.get("sys_probs", None) - auto_prob = jdata.get("auto_prob", "prob_sys_size") - optional_type_map = not multi_task_mode - - data = DeepmdDataSystem( - systems=systems, - batch_size=batch_size, - test_size=1, # to satisfy the old api - shuffle_test=True, # to satisfy the old api - rcut=rcut, - type_map=type_map, - optional_type_map=optional_type_map, - modifier=modifier, - trn_all_set=True, # sample from all sets - sys_probs=sys_probs, - auto_prob_style=auto_prob, - ) - data.add_dict(data_requirement) - - return data - - def get_modifier(modi_data=None): modifier: Optional[DipoleChargeModifier] if modi_data is not None: @@ -356,154 +294,6 @@ def get_modifier(modi_data=None): return modifier -def get_rcut(jdata): - if jdata["model"].get("type") == "pairwise_dprc": - return max( - jdata["model"]["qm_model"]["descriptor"]["rcut"], - jdata["model"]["qmmm_model"]["descriptor"]["rcut"], - ) - descrpt_data = jdata["model"]["descriptor"] - rcut_list = [] - if descrpt_data["type"] == "hybrid": - for ii in descrpt_data["list"]: - rcut_list.append(ii["rcut"]) - else: - rcut_list.append(descrpt_data["rcut"]) - return max(rcut_list) - - -def get_type_map(jdata): - return jdata["model"].get("type_map", None) - - -def get_nbor_stat(jdata, rcut, one_type: bool = False): - # it seems that DeepmdDataSystem does not need rcut - # it's not clear why there is an argument... - # max_rcut = get_rcut(jdata) - max_rcut = rcut - type_map = get_type_map(jdata) - - if type_map and len(type_map) == 0: - type_map = None - multi_task_mode = "data_dict" in jdata["training"] - if not multi_task_mode: - train_data = get_data( - jdata["training"]["training_data"], max_rcut, type_map, None - ) - train_data.get_batch() - else: - assert ( - type_map is not None - ), "Data stat in multi-task mode must have available type_map! " - train_data = None - for systems in jdata["training"]["data_dict"]: - tmp_data = get_data( - jdata["training"]["data_dict"][systems]["training_data"], - max_rcut, - type_map, - None, - ) - tmp_data.get_batch() - assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! " - if train_data is None: - train_data = tmp_data - else: - train_data.system_dirs += tmp_data.system_dirs - train_data.data_systems += tmp_data.data_systems - train_data.natoms += tmp_data.natoms - train_data.natoms_vec += tmp_data.natoms_vec - train_data.default_mesh += tmp_data.default_mesh - data_ntypes = train_data.get_ntypes() - if type_map is not None: - map_ntypes = len(type_map) - else: - map_ntypes = data_ntypes - ntypes = max([map_ntypes, data_ntypes]) - - neistat = NeighborStat(ntypes, rcut, one_type=one_type) - - min_nbor_dist, max_nbor_size = neistat.get_stat(train_data) - - # moved from traier.py as duplicated - # TODO: this is a simple fix but we should have a clear - # architecture to call neighbor stat - tf.constant( - min_nbor_dist, - name="train_attr/min_nbor_dist", - dtype=GLOBAL_ENER_FLOAT_PRECISION, - ) - tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32) - return min_nbor_dist, max_nbor_size - - -def get_sel(jdata, rcut, one_type: bool = False): - _, max_nbor_size = get_nbor_stat(jdata, rcut, one_type=one_type) - return max_nbor_size - - -def get_min_nbor_dist(jdata, rcut): - min_nbor_dist, _ = get_nbor_stat(jdata, rcut) - return min_nbor_dist - - -def parse_auto_sel(sel): - if not isinstance(sel, str): - return False - words = sel.split(":") - if words[0] == "auto": - return True - else: - return False - - -def parse_auto_sel_ratio(sel): - if not parse_auto_sel(sel): - raise RuntimeError(f"invalid auto sel format {sel}") - else: - words = sel.split(":") - if len(words) == 1: - ratio = 1.1 - elif len(words) == 2: - ratio = float(words[1]) - else: - raise RuntimeError(f"invalid auto sel format {sel}") - return ratio - - -def wrap_up_4(xx): - return 4 * ((int(xx) + 3) // 4) - - -def update_one_sel(jdata, descriptor, one_type: bool = False): - rcut = descriptor["rcut"] - tmp_sel = get_sel( - jdata, - rcut, - one_type=one_type, - ) - sel = descriptor["sel"] - if isinstance(sel, int): - # convert to list and finnally convert back to int - sel = [sel] - if parse_auto_sel(descriptor["sel"]): - ratio = parse_auto_sel_ratio(descriptor["sel"]) - descriptor["sel"] = sel = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel] - else: - # sel is set by user - for ii, (tt, dd) in enumerate(zip(tmp_sel, sel)): - if dd and tt > dd: - # we may skip warning for sel=0, where the user is likely - # to exclude such type in the descriptor - log.warning( - "sel of type %d is not enough! The expected value is " - "not less than %d, but you set it to %d. The accuracy" - " of your model may get worse." % (ii, tt, dd) - ) - if one_type: - descriptor["sel"] = sel = sum(sel) - return descriptor - - def update_sel(jdata): log.info( "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)" diff --git a/deepmd/entrypoints/transfer.py b/deepmd/tf/entrypoints/transfer.py similarity index 99% rename from deepmd/entrypoints/transfer.py rename to deepmd/tf/entrypoints/transfer.py index 535b32ec09..7c90c77de8 100644 --- a/deepmd/entrypoints/transfer.py +++ b/deepmd/tf/entrypoints/transfer.py @@ -11,7 +11,7 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( TRANSFER_PATTERN, tf, ) diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py new file mode 100644 index 0000000000..c7873b951c --- /dev/null +++ b/deepmd/tf/env.py @@ -0,0 +1,456 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Module that sets tensorflow working environment and exports inportant constants.""" + +import ctypes +import os +import platform +from importlib import ( + import_module, + reload, +) +from pathlib import ( + Path, +) +from typing import ( + TYPE_CHECKING, + Any, +) + +import numpy as np +from packaging.version import ( + Version, +) + +from deepmd.env import ( + GLOBAL_CONFIG, + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, + SHARED_LIB_DIR, + SHARED_LIB_MODULE, +) +from deepmd.env import get_default_nthreads as get_tf_default_nthreads +from deepmd.env import ( + global_float_prec, +) +from deepmd.env import set_default_nthreads as set_tf_default_nthreads +from deepmd.env import ( + set_env_if_empty, +) + +if TYPE_CHECKING: + from types import ( + ModuleType, + ) + + +def dlopen_library(module: str, filename: str): + """Dlopen a library from a module. + + Parameters + ---------- + module : str + The module name. + filename : str + The library filename pattern. + """ + try: + m = import_module(module) + except ModuleNotFoundError: + pass + else: + libs = sorted(Path(m.__path__[0]).glob(filename)) + # hope that there is only one version installed... + if len(libs): + ctypes.CDLL(str(libs[0].absolute())) + + +# dlopen pip cuda library before tensorflow +if platform.system() == "Linux": + dlopen_library("nvidia.cuda_runtime.lib", "libcudart.so*") + dlopen_library("nvidia.cublas.lib", "libcublasLt.so*") + dlopen_library("nvidia.cublas.lib", "libcublas.so*") + dlopen_library("nvidia.cufft.lib", "libcufft.so*") + dlopen_library("nvidia.curand.lib", "libcurand.so*") + dlopen_library("nvidia.cusolver.lib", "libcusolver.so*") + dlopen_library("nvidia.cusparse.lib", "libcusparse.so*") + dlopen_library("nvidia.cudnn.lib", "libcudnn.so*") + +# keras 3 is incompatible with tf.compat.v1 +# https://keras.io/getting_started/#tensorflow--keras-2-backwards-compatibility +os.environ["TF_USE_LEGACY_KERAS"] = "1" +# import tensorflow v1 compatability +try: + import tensorflow.compat.v1 as tf + + tf.disable_v2_behavior() +except ImportError: + import tensorflow as tf +try: + import tensorflow.compat.v2 as tfv2 +except ImportError: + tfv2 = None + +__all__ = [ + "GLOBAL_CONFIG", + "GLOBAL_TF_FLOAT_PRECISION", + "GLOBAL_NP_FLOAT_PRECISION", + "GLOBAL_ENER_FLOAT_PRECISION", + "global_float_prec", + "global_cvt_2_tf_float", + "global_cvt_2_ener_float", + "MODEL_VERSION", + "SHARED_LIB_DIR", + "SHARED_LIB_MODULE", + "default_tf_session_config", + "reset_default_tf_session_config", + "op_module", + "op_grads_module", + "TRANSFER_PATTERN", + "FITTING_NET_PATTERN", + "EMBEDDING_NET_PATTERN", + "TYPE_EMBEDDING_PATTERN", + "ATTENTION_LAYER_PATTERN", + "REMOVE_SUFFIX_DICT", + "TF_VERSION", + "tf_py_version", +] + + +# Python library version +try: + tf_py_version = tf.version.VERSION +except AttributeError: + tf_py_version = tf.__version__ + +# subpatterns: +# \1: type of centeral atom +# \2: weight name +# \3: layer index +# The rest: types of neighbor atoms +# IMPORTANT: the order is critical to match the pattern +EMBEDDING_NET_PATTERN = str( + r"filter_type_(\d+)/(matrix)_(\d+)_(\d+)|" + r"filter_type_(\d+)/(bias)_(\d+)_(\d+)|" + r"filter_type_(\d+)/(idt)_(\d+)_(\d+)|" + r"filter_type_(all)/(matrix)_(\d+)_(\d+)_(\d+)|" + r"filter_type_(all)/(matrix)_(\d+)_(\d+)|" + r"filter_type_(all)/(matrix)_(\d+)|" + r"filter_type_(all)/(bias)_(\d+)_(\d+)_(\d+)|" + r"filter_type_(all)/(bias)_(\d+)_(\d+)|" + r"filter_type_(all)/(bias)_(\d+)|" + r"filter_type_(all)/(idt)_(\d+)_(\d+)|" + r"filter_type_(all)/(idt)_(\d+)|" +)[:-1] + +# subpatterns: +# \1: layer index or "final" +# \2: type of centeral atom, optional +# the last: weight name +FITTING_NET_PATTERN = str( + r"layer_(\d+)/(matrix)|" + r"layer_(\d+)_type_(\d+)/(matrix)|" + r"layer_(\d+)/(bias)|" + r"layer_(\d+)_type_(\d+)/(bias)|" + r"layer_(\d+)/(idt)|" + r"layer_(\d+)_type_(\d+)/(idt)|" + r"(final)_layer/(matrix)|" + r"(final)_layer_type_(\d+)/(matrix)|" + r"(final)_layer/(bias)|" + r"(final)_layer_type_(\d+)/(bias)|" + # TODO: supporting extracting parameters for shared layers + # not sure how to parse for shared layers... + # layer_name + r"share_.+_type_\d/matrix|" + r"share_.+_type_\d/bias|" + r"share_.+_type_\d/idt|" + r"share_.+/matrix|" + r"share_.+/bias|" + r"share_.+/idt|" +)[:-1] + +# subpatterns: +# \1: weight name +# \2: layer index +TYPE_EMBEDDING_PATTERN = str( + r"type_embed_net/(matrix)_(\d+)|" + r"type_embed_net/(bias)_(\d+)|" + r"type_embed_net/(idt)_(\d+)|" +)[:-1] + +ATTENTION_LAYER_PATTERN = str( + r"attention_layer_\d+/c_query/matrix|" + r"attention_layer_\d+/c_query/bias|" + r"attention_layer_\d+/c_key/matrix|" + r"attention_layer_\d+/c_key/bias|" + r"attention_layer_\d+/c_value/matrix|" + r"attention_layer_\d+/c_value/bias|" + r"attention_layer_\d+/c_out/matrix|" + r"attention_layer_\d+/c_out/bias|" + r"attention_layer_\d+/layer_normalization/beta|" + r"attention_layer_\d+/layer_normalization/gamma|" + r"attention_layer_\d+/layer_normalization_\d+/beta|" + r"attention_layer_\d+/layer_normalization_\d+/gamma|" +) + +TRANSFER_PATTERN = ( + EMBEDDING_NET_PATTERN + + FITTING_NET_PATTERN + + TYPE_EMBEDDING_PATTERN + + str( + r"descrpt_attr/t_avg|" + r"descrpt_attr/t_std|" + r"fitting_attr/t_fparam_avg|" + r"fitting_attr/t_fparam_istd|" + r"fitting_attr/t_aparam_avg|" + r"fitting_attr/t_aparam_istd|" + r"model_attr/t_tab_info|" + r"model_attr/t_tab_data|" + ) +) + +REMOVE_SUFFIX_DICT = { + "model_attr/sel_type_{}": "model_attr/sel_type", + "model_attr/output_dim_{}": "model_attr/output_dim", + "_{}/": "/", + # when atom_ener is set + "_{}_1/": "_1/", + "o_energy_{}": "o_energy", + "o_force_{}": "o_force", + "o_virial_{}": "o_virial", + "o_atom_energy_{}": "o_atom_energy", + "o_atom_virial_{}": "o_atom_virial", + "o_dipole_{}": "o_dipole", + "o_global_dipole_{}": "o_global_dipole", + "o_polar_{}": "o_polar", + "o_global_polar_{}": "o_global_polar", + "o_rmat_{}": "o_rmat", + "o_rmat_deriv_{}": "o_rmat_deriv", + "o_nlist_{}": "o_nlist", + "o_rij_{}": "o_rij", + "o_dm_force_{}": "o_dm_force", + "o_dm_virial_{}": "o_dm_virial", + "o_dm_av_{}": "o_dm_av", + "o_wfc_{}": "o_wfc", +} + + +def set_mkl(): + """Tuning MKL for the best performance. + + References + ---------- + TF overview + https://www.tensorflow.org/guide/performance/overview + + Fixing an issue in numpy built by MKL + https://github.com/ContinuumIO/anaconda-issues/issues/11367 + https://github.com/numpy/numpy/issues/12374 + + check whether the numpy is built by mkl, see + https://github.com/numpy/numpy/issues/14751 + """ + try: + is_mkl = ( + np.show_config("dicts") + .get("Build Dependencies", {}) + .get("blas", {}) + .get("name", "") + .lower() + .startswith("mkl") + ) + except TypeError: + is_mkl = "mkl_rt" in np.__config__.get_info("blas_mkl_info").get( + "libraries", [] + ) + if is_mkl: + set_env_if_empty("KMP_BLOCKTIME", "0") + set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0") + reload(np) + + +def get_tf_session_config() -> Any: + """Configure tensorflow session. + + Returns + ------- + Any + session configure object + """ + set_tf_default_nthreads() + intra, inter = get_tf_default_nthreads() + if int(os.environ.get("DP_JIT", 0)): + set_env_if_empty("TF_XLA_FLAGS", "--tf_xla_auto_jit=2") + # pip cuda package + if platform.system() == "Linux": + try: + m = import_module("nvidia.cuda_nvcc") + except ModuleNotFoundError: + pass + else: + cuda_data_dir = str(Path(m.__file__).parent.absolute()) + set_env_if_empty( + "XLA_FLAGS", "--xla_gpu_cuda_data_dir=" + cuda_data_dir + ) + config = tf.ConfigProto( + gpu_options=tf.GPUOptions(allow_growth=True), + intra_op_parallelism_threads=intra, + inter_op_parallelism_threads=inter, + ) + if Version(tf_py_version) >= Version("1.15") and int( + os.environ.get("DP_AUTO_PARALLELIZATION", 0) + ): + config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel" + return config + + +default_tf_session_config = get_tf_session_config() + + +def reset_default_tf_session_config(cpu_only: bool): + """Limit tensorflow session to CPU or not. + + Parameters + ---------- + cpu_only : bool + If enabled, no GPU device is visible to the TensorFlow Session. + """ + global default_tf_session_config + if cpu_only: + default_tf_session_config.device_count["GPU"] = 0 + else: + if "GPU" in default_tf_session_config.device_count: + del default_tf_session_config.device_count["GPU"] + + +def get_module(module_name: str) -> "ModuleType": + """Load force module. + + Returns + ------- + ModuleType + loaded force module + + Raises + ------ + FileNotFoundError + if module is not found in directory + """ + if platform.system() == "Windows": + ext = ".dll" + prefix = "" + # elif platform.system() == "Darwin": + # ext = ".dylib" + else: + ext = ".so" + prefix = "lib" + + module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve() + + if not module_file.is_file(): + raise FileNotFoundError(f"module {module_name} does not exist") + else: + try: + module = tf.load_op_library(str(module_file)) + except tf.errors.NotFoundError as e: + # check CXX11_ABI_FLAG is compatiblity + # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html + # ABI should be the same + if "CXX11_ABI_FLAG" in tf.__dict__: + tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG + else: + tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG + if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag: + raise RuntimeError( + "This deepmd-kit package was compiled with " + "CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled " + "with CXX11_ABI_FLAG=%d. These two library ABIs are " + "incompatible and thus an error is raised when loading %s. " + "You need to rebuild deepmd-kit against this TensorFlow " + "runtime." + % ( + TF_CXX11_ABI_FLAG, + tf_cxx11_abi_flag, + module_name, + ) + ) from e + + # different versions may cause incompatibility + # see #406, #447, #557, #774, and #796 for example + # throw a message if versions are different + if TF_VERSION != tf_py_version: + raise RuntimeError( + "The version of TensorFlow used to compile this " + f"deepmd-kit package is {TF_VERSION}, but the version of TensorFlow " + f"runtime you are using is {tf_py_version}. These two versions are " + f"incompatible and thus an error is raised when loading {module_name}. " + f"You need to install TensorFlow {TF_VERSION}, or rebuild deepmd-kit " + f"against TensorFlow {tf_py_version}.\nIf you are using a wheel from " + "pypi, you may consider to install deepmd-kit execuating " + "`pip install deepmd-kit --no-binary deepmd-kit` " + "instead." + ) from e + error_message = ( + "This deepmd-kit package is inconsitent with TensorFlow " + f"Runtime, thus an error is raised when loading {module_name}. " + "You need to rebuild deepmd-kit against this TensorFlow " + "runtime." + ) + if TF_CXX11_ABI_FLAG == 1: + # #1791 + error_message += ( + "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. " + "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704" + ) + raise RuntimeError(error_message) from e + return module + + +if GLOBAL_CONFIG["enable_tensorflow"] == "0": + raise RuntimeError( + "TensorFlow backend is not built. To enable it, " + "set the environmental variable DP_ENABLE_TENSORFLOW=1." + ) +MODEL_VERSION = GLOBAL_CONFIG["model_version"] +TF_VERSION = GLOBAL_CONFIG["tf_version"] +TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"]) + +op_module = get_module("deepmd_op") +op_grads_module = get_module("op_grads") +# prevent OOM when using with other backends +# tf.config doesn't work for unclear reason +set_env_if_empty("TF_FORCE_GPU_ALLOW_GROWTH", "true", verbose=False) + +# FLOAT_PREC +GLOBAL_TF_FLOAT_PRECISION = tf.dtypes.as_dtype(GLOBAL_NP_FLOAT_PRECISION) + + +def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor: + """Cast tensor to globally set TF precision. + + Parameters + ---------- + xx : tf.Tensor + input tensor + + Returns + ------- + tf.Tensor + output tensor cast to `GLOBAL_TF_FLOAT_PRECISION` + """ + return tf.cast(xx, GLOBAL_TF_FLOAT_PRECISION) + + +def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor: + """Cast tensor to globally set energy precision. + + Parameters + ---------- + xx : tf.Tensor + input tensor + + Returns + ------- + tf.Tensor + output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION` + """ + return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION) diff --git a/deepmd/fit/__init__.py b/deepmd/tf/fit/__init__.py similarity index 100% rename from deepmd/fit/__init__.py rename to deepmd/tf/fit/__init__.py diff --git a/deepmd/fit/dipole.py b/deepmd/tf/fit/dipole.py similarity index 77% rename from deepmd/fit/dipole.py rename to deepmd/tf/fit/dipole.py index 312bcc9bf1..f98d52c7bd 100644 --- a/deepmd/fit/dipole.py +++ b/deepmd/tf/fit/dipole.py @@ -6,30 +6,33 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( cast_precision, get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.loss.tensor import ( +from deepmd.tf.loss.tensor import ( TensorLoss, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_fitting_net_variables_from_graph_def, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( one_layer, one_layer_rand_seed_shift, ) +from deepmd.utils.version import ( + check_version_compatibility, +) @Fitting.register("dipole") @@ -38,8 +41,12 @@ class DipoleFittingSeA(Fitting): Parameters ---------- - descrpt : tf.Tensor - The descrptor + ntypes + The ntypes of the descrptor :math:`\mathcal{D}` + dim_descrpt + The dimension of the descrptor :math:`\mathcal{D}` + embedding_width + The rotation matrix dimension of the descrptor :math:`\mathcal{D}` neuron : List[int] Number of neurons in each hidden layer of the fitting net resnet_dt : bool @@ -55,11 +62,16 @@ class DipoleFittingSeA(Fitting): The precision of the embedding net parameters. Supported options are |PRECISION| uniform_seed Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. """ def __init__( self, - descrpt: tf.Tensor, + ntypes: int, + dim_descrpt: int, + embedding_width: int, neuron: List[int] = [120, 120, 120], resnet_dt: bool = True, sel_type: Optional[List[int]] = None, @@ -67,11 +79,12 @@ def __init__( activation_function: str = "tanh", precision: str = "default", uniform_seed: bool = False, + mixed_types: bool = False, **kwargs, ) -> None: """Constructor.""" - self.ntypes = descrpt.get_ntypes() - self.dim_descrpt = descrpt.get_dim_out() + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt self.n_neuron = neuron self.resnet_dt = resnet_dt self.sel_type = sel_type @@ -83,13 +96,15 @@ def __init__( self.seed = seed self.uniform_seed = uniform_seed self.seed_shift = one_layer_rand_seed_shift() + self.activation_function_name = activation_function self.fitting_activation_fn = get_activation_func(activation_function) self.fitting_precision = get_precision(precision) - self.dim_rot_mat_1 = descrpt.get_dim_rot_mat_1() + self.dim_rot_mat_1 = embedding_width self.dim_rot_mat = self.dim_rot_mat_1 * 3 self.useBN = False self.fitting_net_variables = None self.mixed_prec = None + self.mixed_types = mixed_types def get_sel_type(self) -> int: """Get selected type.""" @@ -99,6 +114,7 @@ def get_out_size(self) -> int: """Get the output size. Should be 3.""" return 3 + @cast_precision def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None): # cut-out inputs inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1]) @@ -162,7 +178,6 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms, 3]) return final_layer - @cast_precision def build( self, input_d: tf.Tensor, @@ -205,8 +220,12 @@ def build( start_index = 0 inputs = tf.reshape(input_d, [-1, natoms[0], self.dim_descrpt]) rot_mat = tf.reshape(rot_mat, [-1, natoms[0], self.dim_rot_mat]) + if nframes is None: + nframes = tf.shape(inputs)[0] - if type_embedding is not None: + if self.mixed_types or type_embedding is not None: + # keep old behavior + self.mixed_types = True nloc_mask = tf.reshape( tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1] ) @@ -218,13 +237,30 @@ def build( self.nloc_masked = tf.shape( tf.reshape(self.atype_nloc_masked, [nframes, -1]) )[1] + + if type_embedding is not None: atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked) else: atype_embed = None self.atype_embed = atype_embed + if atype_embed is not None: + inputs = tf.reshape( + tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask], + [-1, self.dim_descrpt], + ) + rot_mat = tf.reshape( + tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[ + nloc_mask + ], + [-1, self.dim_rot_mat_1, 3], + ) + atype_embed = tf.cast(atype_embed, self.fitting_precision) + type_shape = atype_embed.get_shape().as_list() + inputs = tf.concat([inputs, atype_embed], axis=1) + self.dim_descrpt = self.dim_descrpt + type_shape[1] - if atype_embed is None: + if not self.mixed_types: count = 0 outs_list = [] for type_i in range(self.ntypes): @@ -245,20 +281,6 @@ def build( count += 1 outs = tf.concat(outs_list, axis=1) else: - inputs = tf.reshape( - tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask], - [-1, self.dim_descrpt], - ) - rot_mat = tf.reshape( - tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[ - nloc_mask - ], - [-1, self.dim_rot_mat_1, 3], - ) - atype_embed = tf.cast(atype_embed, self.fitting_precision) - type_shape = atype_embed.get_shape().as_list() - inputs = tf.concat([inputs, atype_embed], axis=1) - self.dim_descrpt = self.dim_descrpt + type_shape[1] inputs = tf.reshape(inputs, [nframes, self.nloc_masked, self.dim_descrpt]) rot_mat = tf.reshape( rot_mat, [nframes, self.nloc_masked, self.dim_rot_mat_1 * 3] @@ -327,3 +349,63 @@ def get_loss(self, loss: dict, lr) -> Loss: tensor_size=3, label_name="dipole", ) + + def serialize(self, suffix: str) -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + data = { + "@class": "Fitting", + "type": "dipole", + "@version": 1, + "var_name": "dipole", + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "embedding_width": self.dim_rot_mat_1, + "mixed_types": self.mixed_types, + "dim_out": 3, + "neuron": self.n_neuron, + "resnet_dt": self.resnet_dt, + "activation_function": self.activation_function_name, + "precision": self.fitting_precision.name, + "exclude_types": [], + "nets": self.serialize_network( + ntypes=self.ntypes, + ndim=0 if self.mixed_types else 1, + in_dim=self.dim_descrpt, + out_dim=self.dim_rot_mat_1, + neuron=self.n_neuron, + activation_function=self.activation_function_name, + resnet_dt=self.resnet_dt, + variables=self.fitting_net_variables, + suffix=suffix, + ), + } + return data + + @classmethod + def deserialize(cls, data: dict, suffix: str): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + fitting = cls(**data) + fitting.fitting_net_variables = cls.deserialize_network( + data["nets"], + suffix=suffix, + ) + return fitting diff --git a/deepmd/fit/dos.py b/deepmd/tf/fit/dos.py similarity index 83% rename from deepmd/fit/dos.py rename to deepmd/tf/fit/dos.py index bbf7d39a09..7989752e5a 100644 --- a/deepmd/fit/dos.py +++ b/deepmd/tf/fit/dos.py @@ -7,42 +7,48 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, cast_precision, get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.dos import ( +from deepmd.tf.loss.dos import ( DOSLoss, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.nvnmd.fit.ener import ( +from deepmd.tf.nvnmd.fit.ener import ( one_layer_nvnmd, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_fitting_net_variables_from_graph_def, get_tensor_by_name_from_graph, ) -from deepmd.utils.network import one_layer as one_layer_deepmd -from deepmd.utils.network import ( +from deepmd.tf.utils.network import one_layer as one_layer_deepmd +from deepmd.tf.utils.network import ( one_layer_rand_seed_shift, ) +from deepmd.utils.out_stat import ( + compute_stats_from_redu, +) +from deepmd.utils.version import ( + check_version_compatibility, +) log = logging.getLogger(__name__) @@ -54,8 +60,10 @@ class DOSFitting(Fitting): Parameters ---------- - descrpt - The descrptor :math:`\mathcal{D}` + ntypes + The ntypes of the descrptor :math:`\mathcal{D}` + dim_descrpt + The dimension of the descrptor :math:`\mathcal{D}` neuron Number of neurons :math:`N` in each hidden layer of the fitting net resnet_dt @@ -87,11 +95,15 @@ class DOSFitting(Fitting): use_aparam_as_mask: bool, optional If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. And the aparam will not be used as the atomic parameters for embedding. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. """ def __init__( self, - descrpt: tf.Tensor, + ntypes: int, + dim_descrpt: int, neuron: List[int] = [120, 120, 120], resnet_dt: bool = True, numb_fparam: int = 0, @@ -105,12 +117,13 @@ def __init__( uniform_seed: bool = False, layer_name: Optional[List[Optional[str]]] = None, use_aparam_as_mask: bool = False, + mixed_types: bool = False, **kwargs, ) -> None: """Constructor.""" # model param - self.ntypes = descrpt.get_ntypes() - self.dim_descrpt = descrpt.get_dim_out() + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt self.use_aparam_as_mask = use_aparam_as_mask self.numb_fparam = numb_fparam @@ -124,6 +137,7 @@ def __init__( self.seed = seed self.uniform_seed = uniform_seed self.seed_shift = one_layer_rand_seed_shift() + self.activation_function = activation_function self.fitting_activation_fn = get_activation_func(activation_function) self.fitting_precision = get_precision(precision) self.trainable = trainable @@ -142,16 +156,16 @@ def __init__( add_data_requirement( "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False ) - self.fparam_avg = None - self.fparam_std = None - self.fparam_inv_std = None + self.fparam_avg = None + self.fparam_std = None + self.fparam_inv_std = None if self.numb_aparam > 0: add_data_requirement( "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False ) - self.aparam_avg = None - self.aparam_std = None - self.aparam_inv_std = None + self.aparam_avg = None + self.aparam_std = None + self.aparam_inv_std = None self.fitting_net_variables = None self.mixed_prec = None @@ -161,6 +175,7 @@ def __init__( assert ( len(self.layer_name) == len(self.n_neuron) + 1 ), "length of layer_name should be that of n_neuron + 1" + self.mixed_types = mixed_types def get_numb_fparam(self) -> int: """Get the number of frame parameters.""" @@ -225,8 +240,10 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False): sys_tynatom = np.reshape(sys_tynatom, [nsys, -1]) sys_tynatom = sys_tynatom[:, 2:] - dos_shift, resd, rank, s_value = np.linalg.lstsq( - sys_tynatom, sys_dos, rcond=rcond + dos_shift, _ = compute_stats_from_redu( + sys_dos, + sys_tynatom, + rcond=rcond, ) return dos_shift @@ -492,13 +509,22 @@ def build( tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1] ) ## lammps will make error if type_embedding is not None: + # keep old behavior + self.mixed_types = True atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc) else: atype_embed = None self.atype_embed = atype_embed + if atype_embed is not None: + atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) + type_shape = atype_embed.get_shape().as_list() + inputs = tf.concat( + [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 + ) + self.dim_descrpt = self.dim_descrpt + type_shape[1] - if atype_embed is None: + if not self.mixed_types: start_index = 0 outs_list = [] for type_i in range(self.ntypes): @@ -516,7 +542,11 @@ def build( final_layer = tf.reshape( final_layer, - [tf.shape(inputs)[0] * self.numb_dos, natoms[2 + type_i]], + [ + tf.shape(inputs)[0], + natoms[2 + type_i], + self.numb_dos, + ], ) outs_list.append(final_layer) start_index += natoms[2 + type_i] @@ -525,13 +555,6 @@ def build( outs = tf.concat(outs_list, axis=1) # with type embedding else: - atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) - type_shape = atype_embed.get_shape().as_list() - inputs = tf.concat( - [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 - ) - original_dim_descrpt = self.dim_descrpt - self.dim_descrpt = self.dim_descrpt + type_shape[1] inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) final_layer = self._build_lower( 0, @@ -545,7 +568,8 @@ def build( ) outs = tf.reshape( - final_layer, [tf.shape(inputs)[0] * self.numb_dos, natoms[0]] + final_layer, + [tf.shape(inputs)[0], natoms[0], self.numb_dos], ) # add bias # self.atom_ener_before = outs @@ -557,7 +581,7 @@ def build( # self.atom_ener_after = outs tf.summary.histogram("fitting_net_output", outs) - return tf.reshape(outs, [-1]) + return outs def init_variables( self, @@ -636,3 +660,81 @@ def get_loss(self, loss: dict, lr) -> Loss: return DOSLoss( **loss, starter_learning_rate=lr.start_lr(), numb_dos=self.get_numb_dos() ) + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data["numb_dos"] = data.pop("dim_out") + fitting = cls(**data) + fitting.fitting_net_variables = cls.deserialize_network( + data["nets"], + suffix=suffix, + ) + fitting.bias_dos = data["@variables"]["bias_atom_e"] + if fitting.numb_fparam > 0: + fitting.fparam_avg = data["@variables"]["fparam_avg"] + fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"] + if fitting.numb_aparam > 0: + fitting.aparam_avg = data["@variables"]["aparam_avg"] + fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"] + return fitting + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + data = { + "@class": "Fitting", + "type": "dos", + "@version": 1, + "var_name": "dos", + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "mixed_types": self.mixed_types, + "dim_out": self.numb_dos, + "neuron": self.n_neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "rcond": self.rcond, + "trainable": self.trainable, + "activation_function": self.activation_function, + "precision": self.fitting_precision.name, + "exclude_types": [], + "nets": self.serialize_network( + ntypes=self.ntypes, + ndim=0 if self.mixed_types else 1, + in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam, + out_dim=self.numb_dos, + neuron=self.n_neuron, + activation_function=self.activation_function, + resnet_dt=self.resnet_dt, + variables=self.fitting_net_variables, + suffix=suffix, + ), + "@variables": { + "bias_atom_e": self.bias_dos, + "fparam_avg": self.fparam_avg, + "fparam_inv_std": self.fparam_inv_std, + "aparam_avg": self.aparam_avg, + "aparam_inv_std": self.aparam_inv_std, + }, + } + return data diff --git a/deepmd/fit/ener.py b/deepmd/tf/fit/ener.py similarity index 84% rename from deepmd/fit/ener.py rename to deepmd/tf/fit/ener.py index 4c15e57124..d38d0416af 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/tf/fit/ener.py @@ -1,57 +1,70 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + TYPE_CHECKING, List, Optional, ) import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, cast_precision, get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, global_cvt_2_tf_float, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.infer import ( +from deepmd.tf.infer import ( DeepPotential, ) -from deepmd.loss.ener import ( +from deepmd.tf.loss.ener import ( EnerDipoleLoss, EnerSpinLoss, EnerStdLoss, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.nvnmd.fit.ener import ( +from deepmd.tf.nvnmd.fit.ener import ( one_layer_nvnmd, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_fitting_net_variables_from_graph_def, get_tensor_by_name_from_graph, ) -from deepmd.utils.network import one_layer as one_layer_deepmd -from deepmd.utils.network import ( +from deepmd.tf.utils.network import one_layer as one_layer_deepmd +from deepmd.tf.utils.network import ( one_layer_rand_seed_shift, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) +from deepmd.utils.finetune import ( + change_energy_bias_lower, +) +from deepmd.utils.out_stat import ( + compute_stats_from_redu, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +if TYPE_CHECKING: + pass log = logging.getLogger(__name__) @@ -91,8 +104,10 @@ class EnerFitting(Fitting): Parameters ---------- - descrpt - The descrptor :math:`\mathcal{D}` + ntypes + The ntypes of the descrptor :math:`\mathcal{D}` + dim_descrpt + The dimension of the descrptor :math:`\mathcal{D}` neuron Number of neurons :math:`N` in each hidden layer of the fitting net resnet_dt @@ -126,11 +141,15 @@ class EnerFitting(Fitting): use_aparam_as_mask: bool, optional If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. And the aparam will not be used as the atomic parameters for embedding. + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. """ def __init__( self, - descrpt: tf.Tensor, + ntypes: int, + dim_descrpt: int, neuron: List[int] = [120, 120, 120], resnet_dt: bool = True, numb_fparam: int = 0, @@ -146,12 +165,13 @@ def __init__( layer_name: Optional[List[Optional[str]]] = None, use_aparam_as_mask: bool = False, spin: Optional[Spin] = None, + mixed_types: bool = False, **kwargs, ) -> None: """Constructor.""" # model param - self.ntypes = descrpt.get_ntypes() - self.dim_descrpt = descrpt.get_dim_out() + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt self.use_aparam_as_mask = use_aparam_as_mask # args = ()\ # .add('numb_fparam', int, default = 0)\ @@ -176,6 +196,7 @@ def __init__( self.ntypes_spin = self.spin.get_ntypes_spin() if self.spin is not None else 0 self.seed_shift = one_layer_rand_seed_shift() self.tot_ener_zero = tot_ener_zero + self.activation_function_name = activation_function self.fitting_activation_fn = get_activation_func(activation_function) self.fitting_precision = get_precision(precision) self.trainable = trainable @@ -188,7 +209,7 @@ def __init__( ), "length of trainable should be that of n_neuron + 1" self.atom_ener = [] self.atom_ener_v = atom_ener - for at, ae in enumerate(atom_ener): + for at, ae in enumerate(atom_ener if atom_ener is not None else []): if ae is not None: self.atom_ener.append( tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name="atom_%d_ener" % at) @@ -202,16 +223,16 @@ def __init__( add_data_requirement( "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False ) - self.fparam_avg = None - self.fparam_std = None - self.fparam_inv_std = None + self.fparam_avg = None + self.fparam_std = None + self.fparam_inv_std = None if self.numb_aparam > 0: add_data_requirement( "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False ) - self.aparam_avg = None - self.aparam_std = None - self.aparam_inv_std = None + self.aparam_avg = None + self.aparam_std = None + self.aparam_inv_std = None self.fitting_net_variables = None self.mixed_prec = None @@ -221,6 +242,7 @@ def __init__( assert ( len(self.layer_name) == len(self.n_neuron) + 1 ), "length of layer_name should be that of n_neuron + 1" + self.mixed_types = mixed_types def get_numb_fparam(self) -> int: """Get the number of frame parameters.""" @@ -284,21 +306,17 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False): # In this situation, we directly use these assigned energies instead of computing stats. # This will make the loss decrease quickly assigned_atom_ener = np.array( - [ee for ee in self.atom_ener_v if ee is not None] + [ee if ee is not None else np.nan for ee in self.atom_ener_v] ) - assigned_ener_idx = [ - ii for ii, ee in enumerate(self.atom_ener_v) if ee is not None - ] - # np.dot out size: nframe - sys_ener -= np.dot(sys_tynatom[:, assigned_ener_idx], assigned_atom_ener) - sys_tynatom[:, assigned_ener_idx] = 0.0 - energy_shift, resd, rank, s_value = np.linalg.lstsq( - sys_tynatom, sys_ener, rcond=rcond + else: + assigned_atom_ener = None + energy_shift, _ = compute_stats_from_redu( + sys_ener.reshape(-1, 1), + sys_tynatom, + assigned_bias=assigned_atom_ener, + rcond=rcond, ) - if len(self.atom_ener) > 0: - for ii in assigned_ener_idx: - energy_shift[ii] = self.atom_ener_v[ii] - return energy_shift + return energy_shift.ravel() def compute_input_stats(self, all_stat: dict, protection: float = 1e-2) -> None: """Compute the input statistics. @@ -572,6 +590,8 @@ def build( ) else: inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION) + else: + inputs_zero = None if bias_atom_e is not None: assert len(bias_atom_e) == self.ntypes @@ -615,13 +635,29 @@ def build( ): type_embedding = nvnmd_cfg.map["t_ebd"] if type_embedding is not None: + # keep old behavior + self.mixed_types = True atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc) else: atype_embed = None self.atype_embed = atype_embed + original_dim_descrpt = self.dim_descrpt + if atype_embed is not None: + atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) + type_shape = atype_embed.get_shape().as_list() + inputs = tf.concat( + [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 + ) + self.dim_descrpt = self.dim_descrpt + type_shape[1] + if len(self.atom_ener): + assert inputs_zero is not None + inputs_zero = tf.concat( + [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed], + axis=1, + ) - if atype_embed is None: + if not self.mixed_types: start_index = 0 outs_list = [] for type_i in range(ntypes_atom): @@ -660,13 +696,6 @@ def build( outs = tf.concat(outs_list, axis=1) # with type embedding else: - atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION) - type_shape = atype_embed.get_shape().as_list() - inputs = tf.concat( - [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1 - ) - original_dim_descrpt = self.dim_descrpt - self.dim_descrpt = self.dim_descrpt + type_shape[1] inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt]) final_layer = self._build_lower( 0, @@ -680,10 +709,6 @@ def build( ) if len(self.atom_ener): # remove contribution in vacuum - inputs_zero = tf.concat( - [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed], - axis=1, - ) inputs_zero = tf.reshape(inputs_zero, [-1, natoms[0], self.dim_descrpt]) zero_layer = self._build_lower( 0, @@ -780,109 +805,21 @@ def change_energy_bias( frozen_model, origin_type_map, full_type_map, - bias_shift="delta", + bias_adjust_mode="change-by-statistic", ntest=10, ) -> None: - """Change the energy bias according to the input data and the pretrained model. - - Parameters - ---------- - data : DeepmdDataSystem - The training data. - frozen_model : str - The path file of frozen model. - origin_type_map : list - The original type_map in dataset, they are targets to change the energy bias. - full_type_map : str - The full type_map in pretrained model - bias_shift : str - The mode for changing energy bias : ['delta', 'statistic'] - 'delta' : perform predictions on energies of target dataset, - and do least sqaure on the errors to obtain the target shift as bias. - 'statistic' : directly use the statistic energy bias in the target dataset. - ntest : int - The number of test samples in a system to change the energy bias. - """ - type_numbs = [] - energy_ground_truth = [] - energy_predict = [] - sorter = np.argsort(full_type_map) - idx_type_map = sorter[ - np.searchsorted(full_type_map, origin_type_map, sorter=sorter) - ] - mixed_type = data.mixed_type - numb_type = len(full_type_map) dp = None - if bias_shift == "delta": + if bias_adjust_mode == "change-by-statistic": # init model dp = DeepPotential(frozen_model) - for sys in data.data_systems: - test_data = sys.get_test() - nframes = test_data["box"].shape[0] - numb_test = min(nframes, ntest) - if mixed_type: - atype = test_data["type"][:numb_test].reshape([numb_test, -1]) - else: - atype = test_data["type"][0] - assert np.array( - [i in idx_type_map for i in list(set(atype.reshape(-1)))] - ).all(), "Some types are not in 'type_map'!" - energy_ground_truth.append( - test_data["energy"][:numb_test].reshape([numb_test, 1]) - ) - if mixed_type: - type_numbs.append( - np.array( - [(atype == i).sum(axis=-1) for i in idx_type_map], - dtype=np.int32, - ).T - ) - else: - type_numbs.append( - np.tile( - np.bincount(atype, minlength=numb_type)[idx_type_map], - (numb_test, 1), - ) - ) - if bias_shift == "delta": - coord = test_data["coord"][:numb_test].reshape([numb_test, -1]) - if sys.pbc: - box = test_data["box"][:numb_test] - else: - box = None - ret = dp.eval(coord, box, atype, mixed_type=mixed_type) - energy_predict.append(ret[0].reshape([numb_test, 1])) - type_numbs = np.concatenate(type_numbs) - energy_ground_truth = np.concatenate(energy_ground_truth) - old_bias = self.bias_atom_e[idx_type_map] - if bias_shift == "delta": - energy_predict = np.concatenate(energy_predict) - bias_diff = energy_ground_truth - energy_predict - delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0] - unbias_e = energy_predict + type_numbs @ delta_bias - atom_numbs = type_numbs.sum(-1) - rmse_ae = np.sqrt( - np.mean( - np.square( - (unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs - ) - ) - ) - self.bias_atom_e[idx_type_map] += delta_bias.reshape(-1) - log.info( - f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom." - ) - elif bias_shift == "statistic": - statistic_bias = np.linalg.lstsq( - type_numbs, energy_ground_truth, rcond=None - )[0] - self.bias_atom_e[idx_type_map] = statistic_bias.reshape(-1) - else: - raise RuntimeError("Unknown bias_shift mode: " + bias_shift) - log.info( - "Change energy bias of {} from {} to {}.".format( - str(origin_type_map), str(old_bias), str(self.bias_atom_e[idx_type_map]) - ) + self.bias_atom_e = change_energy_bias_lower( + data, + dp, + origin_type_map, + full_type_map, + self.bias_atom_e, + bias_adjust_mode=bias_adjust_mode, + ntest=ntest, ) def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None: @@ -921,3 +858,84 @@ def get_loss(self, loss: dict, lr) -> Loss: return EnerSpinLoss(**loss, use_spin=self.spin.use_spin) else: raise RuntimeError("unknown loss type") + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + fitting = cls(**data) + fitting.fitting_net_variables = cls.deserialize_network( + data["nets"], + suffix=suffix, + ) + fitting.bias_atom_e = data["@variables"]["bias_atom_e"].ravel() + if fitting.numb_fparam > 0: + fitting.fparam_avg = data["@variables"]["fparam_avg"] + fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"] + if fitting.numb_aparam > 0: + fitting.aparam_avg = data["@variables"]["aparam_avg"] + fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"] + return fitting + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + data = { + "@class": "Fitting", + "type": "ener", + "@version": 1, + "var_name": "energy", + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "mixed_types": self.mixed_types, + "dim_out": 1, + "neuron": self.n_neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "rcond": self.rcond, + "tot_ener_zero": self.tot_ener_zero, + "trainable": self.trainable, + "atom_ener": self.atom_ener_v, + "activation_function": self.activation_function_name, + "precision": self.fitting_precision.name, + "layer_name": self.layer_name, + "use_aparam_as_mask": self.use_aparam_as_mask, + "spin": self.spin, + "exclude_types": [], + "nets": self.serialize_network( + ntypes=self.ntypes, + ndim=0 if self.mixed_types else 1, + in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam, + neuron=self.n_neuron, + activation_function=self.activation_function_name, + resnet_dt=self.resnet_dt, + variables=self.fitting_net_variables, + suffix=suffix, + ), + "@variables": { + "bias_atom_e": self.bias_atom_e.reshape(-1, 1), + "fparam_avg": self.fparam_avg, + "fparam_inv_std": self.fparam_inv_std, + "aparam_avg": self.aparam_avg, + "aparam_inv_std": self.aparam_inv_std, + }, + } + return data diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py new file mode 100644 index 0000000000..0f73230bc8 --- /dev/null +++ b/deepmd/tf/fit/fitting.py @@ -0,0 +1,255 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import re +from abc import ( + abstractmethod, +) +from typing import ( + List, + Optional, +) + +from deepmd.common import ( + j_get_type, +) +from deepmd.dpmodel.utils.network import ( + FittingNet, + NetworkCollection, +) +from deepmd.tf.env import ( + FITTING_NET_PATTERN, + tf, +) +from deepmd.tf.loss.loss import ( + Loss, +) +from deepmd.tf.utils import ( + PluginVariant, +) +from deepmd.utils.plugin import ( + make_plugin_registry, +) + + +class Fitting(PluginVariant, make_plugin_registry("fitting")): + def __new__(cls, *args, **kwargs): + if cls is Fitting: + cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__)) + return super().__new__(cls) + + @property + def precision(self) -> tf.DType: + """Precision of fitting network.""" + return self.fitting_precision + + def init_variables( + self, + graph: tf.Graph, + graph_def: tf.GraphDef, + suffix: str = "", + ) -> None: + """Init the fitting net variables with the given dict. + + Parameters + ---------- + graph : tf.Graph + The input frozen model graph + graph_def : tf.GraphDef + The input frozen model graph_def + suffix : str + suffix to name scope + + Notes + ----- + This method is called by others when the fitting supported initialization from the given variables. + """ + raise NotImplementedError( + "Fitting %s doesn't support initialization from the given variables!" + % type(self).__name__ + ) + + @abstractmethod + def get_loss(self, loss: dict, lr) -> Loss: + """Get the loss function. + + Parameters + ---------- + loss : dict + the loss dict + lr : LearningRateExp + the learning rate + + Returns + ------- + Loss + the loss function + """ + + @classmethod + def deserialize(cls, data: dict, suffix: str = "") -> "Fitting": + """Deserialize the fitting. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Parameters + ---------- + data : dict + The serialized data + suffix : str, optional + Name suffix to identify this fitting + + Returns + ------- + Fitting + The deserialized fitting + """ + if cls is Fitting: + return Fitting.get_class_by_type( + j_get_type(data, cls.__name__) + ).deserialize(data, suffix=suffix) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + def serialize(self, suffix: str = "") -> dict: + """Serialize the fitting. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Returns + ------- + dict + The serialized data + suffix : str, optional + Name suffix to identify this fitting + """ + raise NotImplementedError("Not implemented in class %s" % self.__name__) + + def serialize_network( + self, + ntypes: int, + ndim: int, + in_dim: int, + neuron: List[int], + activation_function: str, + resnet_dt: bool, + variables: dict, + out_dim: Optional[int] = 1, + suffix: str = "", + ) -> dict: + """Serialize network. + + Parameters + ---------- + ntypes : int + The number of types + ndim : int + The dimension of elements + in_dim : int + The input dimension + neuron : List[int] + The neuron list + activation_function : str + The activation function + resnet_dt : bool + Whether to use resnet + variables : dict + The input variables + suffix : str, optional + The suffix of the scope + out_dim : int, optional + The output dimension + + Returns + ------- + dict + The converted network data + """ + fittings = NetworkCollection( + ntypes=ntypes, + ndim=ndim, + network_type="fitting_network", + ) + if suffix != "": + fitting_net_pattern = ( + FITTING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") + ) + else: + fitting_net_pattern = FITTING_NET_PATTERN + for key, value in variables.items(): + m = re.search(fitting_net_pattern, key) + m = [mm for mm in m.groups() if mm is not None] + layer_idx = int(m[0]) if m[0] != "final" else len(neuron) + weight_name = m[-1] + if ndim == 0: + network_idx = () + elif ndim == 1: + network_idx = (int(m[1]),) + else: + raise ValueError(f"Invalid ndim: {ndim}") + if fittings[network_idx] is None: + # initialize the network if it is not initialized + fittings[network_idx] = FittingNet( + in_dim=in_dim, + out_dim=out_dim, + neuron=neuron, + activation_function=activation_function, + resnet_dt=resnet_dt, + precision=self.precision.name, + bias_out=True, + ) + assert fittings[network_idx] is not None + if weight_name == "idt": + value = value.ravel() + fittings[network_idx][layer_idx][weight_name] = value + return fittings.serialize() + + @classmethod + def deserialize_network(cls, data: dict, suffix: str = "") -> dict: + """Deserialize network. + + Parameters + ---------- + data : dict + The input network data + suffix : str, optional + The suffix of the scope + + Returns + ------- + variables : dict + The input variables + """ + fitting_net_variables = {} + fittings = NetworkCollection.deserialize(data) + for ii in range(fittings.ntypes**fittings.ndim): + net_idx = [] + rest_ii = ii + for _ in range(fittings.ndim): + net_idx.append(rest_ii % fittings.ntypes) + rest_ii //= fittings.ntypes + net_idx = tuple(net_idx) + if fittings.ndim == 0: + key = "" + elif fittings.ndim == 1: + key = "_type_" + str(net_idx[0]) + else: + raise ValueError(f"Invalid ndim: {fittings.ndim}") + network = fittings[net_idx] + assert network is not None + for layer_idx, layer in enumerate(network.layers): + if layer_idx == len(network.layers) - 1: + layer_name = "final_layer" + else: + layer_name = f"layer_{layer_idx}" + fitting_net_variables[f"{layer_name}{key}{suffix}/matrix"] = layer.w + fitting_net_variables[f"{layer_name}{key}{suffix}/bias"] = layer.b + if layer.idt is not None: + fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = ( + layer.idt.reshape(1, -1) + ) + else: + # prevent keyError + fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = 0.0 + return fitting_net_variables diff --git a/deepmd/fit/polar.py b/deepmd/tf/fit/polar.py similarity index 83% rename from deepmd/fit/polar.py rename to deepmd/tf/fit/polar.py index 8f6631866c..473b57ff54 100644 --- a/deepmd/fit/polar.py +++ b/deepmd/tf/fit/polar.py @@ -7,33 +7,37 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( cast_precision, get_activation_func, get_precision, ) -from deepmd.descriptor import ( +from deepmd.tf.descriptor import ( DescrptSeA, ) -from deepmd.env import ( +from deepmd.tf.env import ( + GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.loss.tensor import ( +from deepmd.tf.loss.tensor import ( TensorLoss, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_fitting_net_variables_from_graph_def, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( one_layer, one_layer_rand_seed_shift, ) +from deepmd.utils.version import ( + check_version_compatibility, +) @Fitting.register("polar") @@ -42,8 +46,12 @@ class PolarFittingSeA(Fitting): Parameters ---------- - descrpt : tf.Tensor - The descrptor + ntypes + The ntypes of the descrptor :math:`\mathcal{D}` + dim_descrpt + The dimension of the descrptor :math:`\mathcal{D}` + embedding_width + The rotation matrix dimension of the descrptor :math:`\mathcal{D}` neuron : List[int] Number of neurons in each hidden layer of the fitting net resnet_dt : bool @@ -65,11 +73,16 @@ class PolarFittingSeA(Fitting): The precision of the embedding net parameters. Supported options are |PRECISION| uniform_seed Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed + mixed_types : bool + If true, use a uniform fitting net for all atom types, otherwise use + different fitting nets for different atom types. """ def __init__( self, - descrpt: tf.Tensor, + ntypes: int, + dim_descrpt: int, + embedding_width: int, neuron: List[int] = [120, 120, 120], resnet_dt: bool = True, sel_type: Optional[List[int]] = None, @@ -81,11 +94,12 @@ def __init__( activation_function: str = "tanh", precision: str = "default", uniform_seed: bool = False, + mixed_types: bool = False, **kwargs, ) -> None: """Constructor.""" - self.ntypes = descrpt.get_ntypes() - self.dim_descrpt = descrpt.get_dim_out() + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt self.n_neuron = neuron self.resnet_dt = resnet_dt self.sel_type = sel_type @@ -96,6 +110,7 @@ def __init__( # self.diag_shift = diag_shift self.shift_diag = shift_diag self.scale = scale + self.activation_function_name = activation_function self.fitting_activation_fn = get_activation_func(activation_function) self.fitting_precision = get_precision(precision) if self.sel_type is None: @@ -104,7 +119,19 @@ def __init__( [ii in self.sel_type for ii in range(self.ntypes)], dtype=bool ) if self.scale is None: - self.scale = [1.0 for ii in range(self.ntypes)] + self.scale = np.array([1.0 for ii in range(self.ntypes)]) + else: + if isinstance(self.scale, list): + assert ( + len(self.scale) == ntypes + ), "Scale should be a list of length ntypes." + elif isinstance(self.scale, float): + self.scale = [self.scale for _ in range(ntypes)] + else: + raise ValueError( + "Scale must be a list of float of length ntypes or a float." + ) + self.scale = np.array(self.scale) # if self.diag_shift is None: # self.diag_shift = [0.0 for ii in range(self.ntypes)] if not isinstance(self.sel_type, list): @@ -115,14 +142,12 @@ def __init__( ) # self.ntypes x 1, store the average diagonal value # if type(self.diag_shift) is not list: # self.diag_shift = [self.diag_shift] - if not isinstance(self.scale, list): - self.scale = [self.scale for ii in range(self.ntypes)] - self.scale = np.array(self.scale) - self.dim_rot_mat_1 = descrpt.get_dim_rot_mat_1() + self.dim_rot_mat_1 = embedding_width self.dim_rot_mat = self.dim_rot_mat_1 * 3 self.useBN = False self.fitting_net_variables = None self.mixed_prec = None + self.mixed_types = mixed_types def get_sel_type(self) -> List[int]: """Get selected atom types.""" @@ -132,16 +157,14 @@ def get_out_size(self) -> int: """Get the output size. Should be 9.""" return 9 - def compute_input_stats(self, all_stat, protection=1e-2): - """Compute the input statistics. + def compute_output_stats(self, all_stat): + """Compute the output statistics. Parameters ---------- all_stat Dictionary of inputs. can be prepared by model.make_stat_input - protection - Divided-by-zero protection """ if "polarizability" not in all_stat.keys(): self.avgeig = np.zeros([9]) @@ -166,6 +189,7 @@ def compute_input_stats(self, all_stat, protection=1e-2): mean_polar = np.zeros([len(self.sel_type), 9]) sys_matrix, polar_bias = [], [] for ss in range(len(all_stat["type"])): + nframes = all_stat["type"][ss].shape[0] atom_has_polar = [ w for w in all_stat["type"][ss][0] if (w in self.sel_type) ] # select atom with polar @@ -176,7 +200,7 @@ def compute_input_stats(self, all_stat, protection=1e-2): index_lis = [ index for index, w in enumerate(atom_has_polar) - if atom_has_polar[index] == self.sel_type[itype] + if w == self.sel_type[itype] ] # select index in this type sys_matrix.append(np.zeros((1, len(self.sel_type)))) @@ -184,10 +208,11 @@ def compute_input_stats(self, all_stat, protection=1e-2): polar_bias.append( np.sum( - all_stat["atomic_polarizability"][ss].reshape((-1, 9))[ - index_lis - ], - axis=0, + all_stat["atomic_polarizability"][ss].reshape( + nframes, len(atom_has_polar), -1 + )[:, index_lis, :] + / nframes, + axis=(0, 1), ).reshape((1, 9)) ) else: # No atomic polar in this system, so it should have global polar @@ -211,7 +236,9 @@ def compute_input_stats(self, all_stat, protection=1e-2): sys_matrix[-1][0, itype] = len(index_lis) # add polar_bias - polar_bias.append(all_stat["polarizability"][ss].reshape((1, 9))) + polar_bias.append( + np.mean(all_stat["polarizability"][ss], axis=0).reshape((1, 9)) + ) matrix, bias = ( np.concatenate(sys_matrix, axis=0), @@ -223,6 +250,7 @@ def compute_input_stats(self, all_stat, protection=1e-2): np.diagonal(atom_polar[itype].reshape((3, 3))) ) + @cast_precision def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None): # cut-out inputs inputs_i = tf.slice( @@ -331,7 +359,6 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms, 3, 3]) return final_layer - @cast_precision def build( self, input_d: tf.Tensor, @@ -374,8 +401,12 @@ def build( start_index = 0 inputs = tf.reshape(input_d, [-1, self.dim_descrpt * natoms[0]]) rot_mat = tf.reshape(rot_mat, [-1, self.dim_rot_mat * natoms[0]]) + if nframes is None: + nframes = tf.shape(inputs)[0] - if type_embedding is not None: + if self.mixed_types or type_embedding is not None: + # keep old behavior + self.mixed_types = True # nframes x nloc nloc_mask = tf.reshape( tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1] @@ -404,13 +435,28 @@ def build( self.nloc_masked = tf.shape( tf.reshape(self.atype_nloc_masked, [nframes, -1]) )[1] + + if type_embedding is not None: atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked) else: atype_embed = None self.atype_embed = atype_embed + if atype_embed is not None: + inputs = tf.reshape( + tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask], + [-1, self.dim_descrpt], + ) + rot_mat = tf.reshape( + tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask], + [-1, self.dim_rot_mat * self.nloc_masked], + ) + atype_embed = tf.cast(atype_embed, self.fitting_precision) + type_shape = atype_embed.get_shape().as_list() + inputs = tf.concat([inputs, atype_embed], axis=1) + self.dim_descrpt = self.dim_descrpt + type_shape[1] - if atype_embed is None: + if not self.mixed_types: count = 0 outs_list = [] for type_i in range(self.ntypes): @@ -431,7 +477,7 @@ def build( final_layer = final_layer + self.constant_matrix[sel_type_idx] * tf.eye( 3, batch_shape=[tf.shape(inputs)[0], natoms[2 + type_i]], - dtype=self.fitting_precision, + dtype=GLOBAL_TF_FLOAT_PRECISION, ) start_index += natoms[2 + type_i] @@ -440,18 +486,6 @@ def build( count += 1 outs = tf.concat(outs_list, axis=1) else: - inputs = tf.reshape( - tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask], - [-1, self.dim_descrpt], - ) - rot_mat = tf.reshape( - tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask], - [-1, self.dim_rot_mat * self.nloc_masked], - ) - atype_embed = tf.cast(atype_embed, self.fitting_precision) - type_shape = atype_embed.get_shape().as_list() - inputs = tf.concat([inputs, atype_embed], axis=1) - self.dim_descrpt = self.dim_descrpt + type_shape[1] inputs = tf.reshape(inputs, [-1, self.dim_descrpt * self.nloc_masked]) final_layer = self._build_lower( 0, self.nloc_masked, inputs, rot_mat, suffix=suffix, reuse=reuse @@ -461,7 +495,7 @@ def build( if self.shift_diag: final_layer += tf.expand_dims( tf.expand_dims(constant_matrix, -1), -1 - ) * tf.eye(3, batch_shape=[1, 1], dtype=self.fitting_precision) + ) * tf.eye(3, batch_shape=[1, 1], dtype=GLOBAL_TF_FLOAT_PRECISION) outs = final_layer tf.summary.histogram("fitting_net_output", outs) @@ -509,6 +543,71 @@ def get_loss(self, loss: dict, lr) -> Loss: label_name="polarizability", ) + def serialize(self, suffix: str) -> dict: + """Serialize the model. + + Returns + ------- + dict + The serialized data + """ + data = { + "@class": "Fitting", + "type": "polar", + "@version": 1, + "var_name": "polar", + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "embedding_width": self.dim_rot_mat_1, + "mixed_types": self.mixed_types, + "dim_out": 3, + "neuron": self.n_neuron, + "resnet_dt": self.resnet_dt, + "activation_function": self.activation_function_name, + "precision": self.fitting_precision.name, + "exclude_types": [], + "fit_diag": self.fit_diag, + "scale": list(self.scale), + "shift_diag": self.shift_diag, + "nets": self.serialize_network( + ntypes=self.ntypes, + ndim=0 if self.mixed_types else 1, + in_dim=self.dim_descrpt, + out_dim=self.dim_rot_mat_1, + neuron=self.n_neuron, + activation_function=self.activation_function_name, + resnet_dt=self.resnet_dt, + variables=self.fitting_net_variables, + suffix=suffix, + ), + } + return data + + @classmethod + def deserialize(cls, data: dict, suffix: str): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility( + data.pop("@version", 1), 2, 1 + ) # to allow PT version. + fitting = cls(**data) + fitting.fitting_net_variables = cls.deserialize_network( + data["nets"], + suffix=suffix, + ) + return fitting + class GlobalPolarFittingSeA: r"""Fit the system polarizability with descriptor se_a. diff --git a/deepmd/tf/infer/__init__.py b/deepmd/tf/infer/__init__.py new file mode 100644 index 0000000000..9ef9c0d348 --- /dev/null +++ b/deepmd/tf/infer/__init__.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Submodule containing all the implemented potentials.""" + +from typing import ( + TYPE_CHECKING, +) + +from deepmd.infer import ( + DeepPotential, +) + +from .data_modifier import ( + DipoleChargeModifier, +) +from .deep_dipole import ( + DeepDipole, +) +from .deep_dos import ( + DeepDOS, +) +from .deep_eval import ( + DeepEval, +) +from .deep_polar import ( + DeepGlobalPolar, + DeepPolar, +) +from .deep_pot import ( + DeepPot, +) +from .deep_wfc import ( + DeepWFC, +) +from .ewald_recp import ( + EwaldRecp, +) +from .model_devi import ( + calc_model_devi, +) + +if TYPE_CHECKING: + from deepmd.infer.deep_eval import ( + DeepEval, + ) + +__all__ = [ + "DeepPotential", + "DeepDipole", + "DeepEval", + "DeepGlobalPolar", + "DeepPolar", + "DeepPot", + "DeepDOS", + "DeepWFC", + "DipoleChargeModifier", + "EwaldRecp", + "calc_model_devi", +] diff --git a/deepmd/infer/data_modifier.py b/deepmd/tf/infer/data_modifier.py similarity index 98% rename from deepmd/infer/data_modifier.py rename to deepmd/tf/infer/data_modifier.py index 62c4b879e9..ccd072673d 100644 --- a/deepmd/infer/data_modifier.py +++ b/deepmd/tf/infer/data_modifier.py @@ -7,26 +7,24 @@ import numpy as np -import deepmd.op # noqa: F401 -from deepmd.common import ( +import deepmd.tf.op # noqa: F401 +from deepmd.tf.common import ( make_default_mesh, select_idx_map, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, op_module, tf, ) -from deepmd.infer.deep_dipole import ( - DeepDipole, -) -from deepmd.infer.ewald_recp import ( +from deepmd.tf.infer.deep_dipole import DeepDipoleOld as DeepDipole +from deepmd.tf.infer.ewald_recp import ( EwaldRecp, ) -from deepmd.utils.data import ( +from deepmd.tf.utils.data import ( DeepmdData, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/tf/infer/deep_dipole.py b/deepmd/tf/infer/deep_dipole.py new file mode 100644 index 0000000000..e10d09564d --- /dev/null +++ b/deepmd/tf/infer/deep_dipole.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from pathlib import ( + Path, +) +from typing import ( + Optional, +) + +from deepmd.infer.deep_dipole import ( + DeepDipole, +) +from deepmd.tf.infer.deep_tensor import ( + DeepTensor, +) + +__all__ = [ + "DeepDipole", +] + + +class DeepDipoleOld(DeepTensor): + # used for DipoleChargeModifier only + """Constructor. + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + load_prefix: str + The prefix in the load computational graph + default_tf_graph : bool + If uses the default tf graph, otherwise build a new tf graph for evaluation + input_map : dict, optional + The input map for tf.import_graph_def. Only work with default tf graph + neighbor_list : ase.neighborlist.NeighborList, optional + The neighbor list object. If None, then build the native neighbor list. + + Warnings + -------- + For developers: `DeepTensor` initializer must be called at the end after + `self.tensors` are modified because it uses the data in `self.tensors` dict. + Do not chanage the order! + """ + + def __init__( + self, + model_file: "Path", + load_prefix: str = "load", + default_tf_graph: bool = False, + input_map: Optional[dict] = None, + neighbor_list=None, + ) -> None: + # use this in favor of dict update to move attribute from class to + # instance namespace + self.tensors = dict( + { + # output tensor + "t_tensor": "o_dipole:0", + }, + **self.tensors, + ) + + DeepTensor.__init__( + self, + model_file, + load_prefix=load_prefix, + default_tf_graph=default_tf_graph, + input_map=input_map, + neighbor_list=neighbor_list, + ) + + def get_dim_fparam(self) -> int: + """Unsupported in this model.""" + raise NotImplementedError("This model type does not support this attribute") + + def get_dim_aparam(self) -> int: + """Unsupported in this model.""" + raise NotImplementedError("This model type does not support this attribute") diff --git a/deepmd/tf/infer/deep_dos.py b/deepmd/tf/infer/deep_dos.py new file mode 100644 index 0000000000..7a9f9b781c --- /dev/null +++ b/deepmd/tf/infer/deep_dos.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.infer.deep_dos import ( + DeepDOS, +) + +__all__ = [ + "DeepDOS", +] diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py new file mode 100644 index 0000000000..ccbd44cf97 --- /dev/null +++ b/deepmd/tf/infer/deep_eval.py @@ -0,0 +1,1547 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from functools import ( + lru_cache, +) +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) + +import numpy as np + +from deepmd.common import ( + make_default_mesh, +) +from deepmd.dpmodel.output_def import ( + ModelOutputDef, + OutputVariableCategory, +) +from deepmd.infer.deep_dipole import ( + DeepDipole, +) +from deepmd.infer.deep_dos import ( + DeepDOS, +) +from deepmd.infer.deep_eval import ( + DeepEvalBackend, +) +from deepmd.infer.deep_polar import ( + DeepGlobalPolar, + DeepPolar, +) +from deepmd.infer.deep_pot import ( + DeepPot, +) +from deepmd.infer.deep_wfc import ( + DeepWFC, +) +from deepmd.tf.env import ( + MODEL_VERSION, + default_tf_session_config, + tf, +) +from deepmd.tf.utils.batch_size import ( + AutoBatchSize, +) +from deepmd.tf.utils.sess import ( + run_sess, +) + +if TYPE_CHECKING: + from pathlib import ( + Path, + ) + + from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper + + +class DeepEval(DeepEvalBackend): + """TensorFlow backend implementation for DeepEval. + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + output_def : ModelOutputDef + The output definition of the model. + *args : list + Positional arguments. + load_prefix: str + The prefix in the load computational graph + default_tf_graph : bool + If uses the default tf graph, otherwise build a new tf graph for evaluation + auto_batch_size : bool or int or AutomaticBatchSize, default: False + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + input_map : dict, optional + The input map for tf.import_graph_def. Only work with default tf graph + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + **kwargs : dict + Keyword arguments. + """ + + def __init__( + self, + model_file: "Path", + output_def: ModelOutputDef, + *args: list, + load_prefix: str = "load", + default_tf_graph: bool = False, + auto_batch_size: Union[bool, int, AutoBatchSize] = False, + input_map: Optional[dict] = None, + neighbor_list=None, + **kwargs: dict, + ): + self.graph = self._load_graph( + model_file, + prefix=load_prefix, + default_tf_graph=default_tf_graph, + input_map=input_map, + ) + self.load_prefix = load_prefix + + # graph_compatable should be called after graph and prefix are set + if not self._graph_compatable(): + raise RuntimeError( + f"model in graph (version {self.model_version}) is incompatible" + f"with the model (version {MODEL_VERSION}) supported by the current code." + "See https://deepmd.rtfd.io/compatability/ for details." + ) + + # set default to False, as subclasses may not support + if isinstance(auto_batch_size, bool): + if auto_batch_size: + self.auto_batch_size = AutoBatchSize() + else: + self.auto_batch_size = None + elif isinstance(auto_batch_size, int): + self.auto_batch_size = AutoBatchSize(auto_batch_size) + elif isinstance(auto_batch_size, AutoBatchSize): + self.auto_batch_size = auto_batch_size + else: + raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + + self.neighbor_list = neighbor_list + + self.output_def = output_def + self._init_tensors() + self._init_attr() + self.has_efield = self.tensors["efield"] is not None + self.has_fparam = self.tensors["fparam"] is not None + self.has_aparam = self.tensors["aparam"] is not None + self.has_spin = self.ntypes_spin > 0 + self.modifier_type = None + + # looks ugly... + if self.modifier_type == "dipole_charge": + from deepmd.tf.infer.data_modifier import ( + DipoleChargeModifier, + ) + + t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0") + t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0") + t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0") + t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0") + t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0") + [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess( + self.sess, + [ + t_mdl_name, + t_mdl_charge_map, + t_sys_charge_map, + t_ewald_h, + t_ewald_beta, + ], + ) + mdl_name = mdl_name.decode("UTF-8") + mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()] + sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()] + self.dm = DipoleChargeModifier( + mdl_name, + mdl_charge_map, + sys_charge_map, + ewald_h=ewald_h, + ewald_beta=ewald_beta, + ) + + def _init_tensors(self): + tensor_names = { + # descrpt attrs + "ntypes": "descrpt_attr/ntypes:0", + "rcut": "descrpt_attr/rcut:0", + # model attrs + "tmap": "model_attr/tmap:0", + # inputs + "coord": "t_coord:0", + "type": "t_type:0", + "natoms": "t_natoms:0", + "box": "t_box:0", + "mesh": "t_mesh:0", + } + optional_tensor_names = { + # fitting attrs + "dfparam": "fitting_attr/dfparam:0", + "daparam": "fitting_attr/daparam:0", + "numb_dos": "fitting_attr/numb_dos:0", + # model attrs + "sel_type": "model_attr/sel_type:0", + # additonal inputs + "efield": "t_efield:0", + "fparam": "t_fparam:0", + "aparam": "t_aparam:0", + "ntypes_spin": "spin_attr/ntypes_spin:0", + # descriptor + "descriptor": "o_descriptor:0", + } + # output tensors + output_tensor_names = {} + for vv in self.output_def.var_defs: + output_tensor_names[vv] = f"o_{self._OUTDEF_DP2BACKEND[vv]}:0" + + self.tensors = {} + for tensor_key, tensor_name in tensor_names.items(): + self.tensors[tensor_key] = self._get_tensor(tensor_name) + for tensor_key, tensor_name in optional_tensor_names.items(): + try: + self.tensors[tensor_key] = self._get_tensor(tensor_name) + except KeyError: + self.tensors[tensor_key] = None + self.output_tensors = {} + removed_defs = [] + for ii, (tensor_key, tensor_name) in enumerate(output_tensor_names.items()): + try: + self.output_tensors[tensor_key] = self._get_tensor(tensor_name) + except KeyError: + # do not output + removed_defs.append(ii) + for ii in sorted(removed_defs, reverse=True): + del self.output_def.var_defs[list(self.output_def.var_defs.keys())[ii]] + + def _init_attr(self): + [ + self.ntypes, + self.rcut, + tmap, + ] = run_sess( + self.sess, + [ + self.tensors["ntypes"], + self.tensors["rcut"], + self.tensors["tmap"], + ], + ) + if self.tensors["ntypes_spin"] is not None: + self.ntypes_spin = run_sess(self.sess, [self.tensors["ntypes_spin"]])[0] + else: + self.ntypes_spin = 0 + if self.tensors["dfparam"] is not None: + self.dfparam = run_sess(self.sess, [self.tensors["dfparam"]])[0] + else: + self.dfparam = 0 + if self.tensors["daparam"] is not None: + self.daparam = run_sess(self.sess, [self.tensors["daparam"]])[0] + else: + self.daparam = 0 + if self.tensors["sel_type"] is not None: + self.sel_type = run_sess(self.sess, [self.tensors["sel_type"]])[0] + else: + self.sel_type = None + if self.tensors["numb_dos"] is not None: + self.numb_dos = run_sess(self.sess, [self.tensors["numb_dos"]])[0] + else: + self.numb_dos = 0 + self.tmap = tmap.decode("utf-8").split() + + @property + @lru_cache(maxsize=None) + def model_type(self) -> "DeepEvalWrapper": + """Get type of model. + + :type:str + """ + t_mt = self._get_tensor("model_attr/model_type:0") + [mt] = run_sess(self.sess, [t_mt], feed_dict={}) + model_type = mt.decode("utf-8") + if model_type == "ener": + return DeepPot + elif model_type == "dos": + return DeepDOS + elif model_type == "dipole": + return DeepDipole + elif model_type == "polar": + return DeepPolar + elif model_type == "global_polar": + return DeepGlobalPolar + elif model_type == "wfc": + return DeepWFC + else: + raise RuntimeError(f"unknown model type {model_type}") + + @property + @lru_cache(maxsize=None) + def model_version(self) -> str: + """Get version of model. + + Returns + ------- + str + version of model + """ + try: + t_mt = self._get_tensor("model_attr/model_version:0") + except KeyError: + # For deepmd-kit version 0.x - 1.x, set model version to 0.0 + return "0.0" + else: + [mt] = run_sess(self.sess, [t_mt], feed_dict={}) + return mt.decode("utf-8") + + @property + @lru_cache(maxsize=None) + def sess(self) -> tf.Session: + """Get TF session.""" + # start a tf session associated to the graph + return tf.Session(graph=self.graph, config=default_tf_session_config) + + def _graph_compatable(self) -> bool: + """Check the model compatability. + + Returns + ------- + bool + If the model stored in the graph file is compatable with the current code + """ + model_version_major = int(self.model_version.split(".")[0]) + model_version_minor = int(self.model_version.split(".")[1]) + MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0]) + MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1]) + if (model_version_major != MODEL_VERSION_MAJOR) or ( + model_version_minor > MODEL_VERSION_MINOR + ): + return False + else: + return True + + def _get_tensor( + self, + tensor_name: str, + ) -> tf.Tensor: + """Get TF graph tensor. + + Parameters + ---------- + tensor_name : str + name of tensor to get + + Returns + ------- + tf.Tensor + loaded tensor + """ + # do not use os.path.join as it doesn't work on Windows + tensor_path = "/".join((self.load_prefix, tensor_name)) + tensor = self.graph.get_tensor_by_name(tensor_path) + return tensor + + @staticmethod + def _load_graph( + frozen_graph_filename: "Path", + prefix: str = "load", + default_tf_graph: bool = False, + input_map: Optional[dict] = None, + ): + # We load the protobuf file from the disk and parse it to retrieve the + # unserialized graph_def + with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + + if default_tf_graph: + tf.import_graph_def( + graph_def, + input_map=input_map, + return_elements=None, + name=prefix, + producer_op_list=None, + ) + graph = tf.get_default_graph() + else: + # Then, we can use again a convenient built-in function to import + # a graph_def into the current default Graph + with tf.Graph().as_default() as graph: + tf.import_graph_def( + graph_def, + input_map=None, + return_elements=None, + name=prefix, + producer_op_list=None, + ) + + return graph + + @staticmethod + def sort_input( + coord: np.ndarray, + atom_type: np.ndarray, + sel_atoms: Optional[List[int]] = None, + ): + """Sort atoms in the system according their types. + + Parameters + ---------- + coord + The coordinates of atoms. + Should be of shape [nframes, natoms, 3] + atom_type + The type of atoms + Should be of shape [natoms] + sel_atoms + The selected atoms by type + + Returns + ------- + coord_out + The coordinates after sorting + atom_type_out + The atom types after sorting + idx_map + The index mapping from the input to the output. + For example coord_out = coord[:,idx_map,:] + sel_atom_type + Only output if sel_atoms is not None + The sorted selected atom types + sel_idx_map + Only output if sel_atoms is not None + The index mapping from the selected atoms to sorted selected atoms. + """ + natoms = atom_type.shape[1] + if sel_atoms is not None: + selection = np.array([False] * natoms, dtype=bool) + for ii in sel_atoms: + selection += atom_type[0] == ii + sel_atom_type = atom_type[:, selection] + idx = np.arange(natoms) + idx_map = np.lexsort((idx, atom_type[0])) + nframes = coord.shape[0] + coord = coord.reshape([nframes, -1, 3]) + coord = np.reshape(coord[:, idx_map, :], [nframes, -1]) + atom_type = atom_type[:, idx_map] + if sel_atoms is not None: + sel_natoms = sel_atom_type.shape[1] + sel_idx = np.arange(sel_natoms) + sel_idx_map = np.lexsort((sel_idx, sel_atom_type[0])) + sel_atom_type = sel_atom_type[:, sel_idx_map] + return coord, atom_type, idx_map, sel_atom_type, sel_idx_map + else: + return coord, atom_type, idx_map, atom_type, idx_map + + @staticmethod + def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray: + """Reverse mapping of a vector according to the index map. + + Parameters + ---------- + vec + Input vector. Be of shape [nframes, natoms, -1] + imap + Index map. Be of shape [natoms] + + Returns + ------- + vec_out + Reverse mapped vector. + """ + ret = np.zeros(vec.shape) + ret[:, imap, :] = vec + return ret + + def make_natoms_vec( + self, + atom_types: np.ndarray, + ) -> np.ndarray: + """Make the natom vector used by deepmd-kit. + + Parameters + ---------- + atom_types + The type of atoms + + Returns + ------- + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + + """ + natoms_vec = np.zeros(self.ntypes + 2).astype(int) + natoms = atom_types[0].size + natoms_vec[0] = natoms + natoms_vec[1] = natoms + for ii in range(self.ntypes): + natoms_vec[ii + 2] = np.count_nonzero(atom_types[0] == ii) + if np.count_nonzero(atom_types[0] == -1) > 0: + # contains virtual atoms + # energy fitting sums over natoms_vec[2:] instead of reading from natoms_vec[0] + # causing errors for shape mismatch + natoms_vec[2] += np.count_nonzero(atom_types[0] == -1) + return natoms_vec + + def eval_typeebd(self) -> np.ndarray: + """Evaluate output of type embedding network by using this model. + + Returns + ------- + np.ndarray + The output of type embedding network. The shape is [ntypes, o_size], + where ntypes is the number of types, and o_size is the number of nodes + in the output layer. + + Raises + ------ + KeyError + If the model does not enable type embedding. + + See Also + -------- + deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network. + + Examples + -------- + Get the output of type embedding network of `graph.pb`: + + >>> from deepmd.tf.infer import DeepPotential + >>> dp = DeepPotential("graph.pb") + >>> dp.eval_typeebd() + """ + t_typeebd = self._get_tensor("t_typeebd:0") + [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={}) + return typeebd + + def build_neighbor_list( + self, + coords: np.ndarray, + cell: Optional[np.ndarray], + atype: np.ndarray, + imap: np.ndarray, + neighbor_list, + ): + """Make the mesh with neighbor list for a single frame. + + Parameters + ---------- + coords : np.ndarray + The coordinates of atoms. Should be of shape [natoms, 3] + cell : Optional[np.ndarray] + The cell of the system. Should be of shape [3, 3] + atype : np.ndarray + The type of atoms. Should be of shape [natoms] + imap : np.ndarray + The index map of atoms. Should be of shape [natoms] + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList + ASE neighbor list. The following method or attribute will be + used/set: bothways, self_interaction, update, build, first_neigh, + pair_second, offset_vec. + + Returns + ------- + natoms_vec : np.ndarray + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: nloc + natoms[1]: nall + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc + coords : np.ndarray + The coordinates of atoms, including ghost atoms. Should be of + shape [nframes, nall, 3] + atype : np.ndarray + The type of atoms, including ghost atoms. Should be of shape [nall] + mesh : np.ndarray + The mesh in nei_mode=4. + imap : np.ndarray + The index map of atoms. Should be of shape [nall] + ghost_map : np.ndarray + The index map of ghost atoms. Should be of shape [nghost] + """ + pbc = np.repeat(cell is not None, 3) + cell = cell.reshape(3, 3) + positions = coords.reshape(-1, 3) + neighbor_list.bothways = True + neighbor_list.self_interaction = False + if neighbor_list.update(pbc, cell, positions): + neighbor_list.build(pbc, cell, positions) + first_neigh = neighbor_list.first_neigh.copy() + pair_second = neighbor_list.pair_second.copy() + offset_vec = neighbor_list.offset_vec.copy() + # get out-of-box neighbors + out_mask = np.any(offset_vec != 0, axis=1) + out_idx = pair_second[out_mask] + out_offset = offset_vec[out_mask] + out_coords = positions[out_idx] + out_offset.dot(cell) + atype = np.array(atype, dtype=int).reshape(-1) + out_atype = atype[out_idx] + + nloc = positions.shape[0] + nghost = out_idx.size + all_coords = np.concatenate((positions, out_coords), axis=0) + all_atype = np.concatenate((atype, out_atype), axis=0) + # convert neighbor indexes + ghost_map = pair_second[out_mask] + pair_second[out_mask] = np.arange(nloc, nloc + nghost) + # get the mesh + mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int) + mesh[0] = nloc + # ilist + mesh[16 : 16 + nloc] = np.arange(nloc) + # numnei + mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1] + # jlist + mesh[16 + nloc * 2 :] = pair_second + + # natoms_vec + natoms_vec = np.zeros(self.ntypes + 2).astype(int) + natoms_vec[0] = nloc + natoms_vec[1] = nloc + nghost + for ii in range(self.ntypes): + natoms_vec[ii + 2] = np.count_nonzero(atype == ii) + # imap append ghost atoms + imap = np.concatenate((imap, np.arange(nloc, nloc + nghost))) + return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map + + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" + return self.ntypes + + def get_ntypes_spin(self) -> int: + """Get the number of spin atom types of this model.""" + return self.ntypes_spin + + def get_rcut(self) -> float: + """Get the cut-off radius of this model.""" + return self.rcut + + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" + return self.tmap + + def get_sel_type(self) -> Optional[np.ndarray]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return np.array(self.sel_type).ravel() + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" + return self.dfparam + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" + return self.daparam + + def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable: + """Wrapper method with auto batch size. + + Parameters + ---------- + inner_func : Callable + the method to be wrapped + numb_test : int + number of tests + natoms : int + number of atoms + + Returns + ------- + Callable + the wrapper + """ + if self.auto_batch_size is not None: + + def eval_func(*args, **kwargs): + return self.auto_batch_size.execute_all( + inner_func, numb_test, natoms, *args, **kwargs + ) + + else: + eval_func = inner_func + return eval_func + + def _get_natoms_and_nframes( + self, + coords: np.ndarray, + atom_types: Union[List[int], np.ndarray], + ) -> Tuple[int, int]: + natoms = len(atom_types[0]) + if natoms == 0: + assert coords.size == 0 + else: + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + return natoms, nframes + + def eval( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + atomic: bool = False, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + efield: Optional[np.ndarray] = None, + **kwargs: Dict[str, Any], + ) -> Dict[str, np.ndarray]: + """Evaluate the energy, force and virial by using this DP. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + Calculate the atomic energy and virial + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + efield + The external field on atoms. + The array should be of size nframes x natoms x 3 + **kwargs + Other parameters + + Returns + ------- + output_dict : dict + The output of the evaluation. The keys are the names of the output + variables, and the values are the corresponding output arrays. + """ + # reshape coords before getting shape + natoms, numb_test = self._get_natoms_and_nframes( + coords, + atom_types, + ) + output = self._eval_func(self._eval_inner, numb_test, natoms)( + coords, + cells, + atom_types, + fparam=fparam, + aparam=aparam, + atomic=atomic, + efield=efield, + ) + if not isinstance(output, tuple): + output = (output,) + + output_dict = { + odef.name: oo for oo, odef in zip(output, self.output_def.var_defs.values()) + } + # ugly!! + if self.modifier_type is not None and isinstance(self.model_type, DeepPot): + if atomic: + raise RuntimeError("modifier does not support atomic modification") + me, mf, mv = self.dm.eval(coords, cells, atom_types) + output = list(output) # tuple to list + e, f, v = output[:3] + output_dict["energy_redu"] += me.reshape(e.shape) + output_dict["energy_deri_r"] += mf.reshape(f.shape) + output_dict["energy_deri_c_redu"] += mv.reshape(v.shape) + return output_dict + + def _prepare_feed_dict( + self, + coords, + cells, + atom_types, + fparam=None, + aparam=None, + efield=None, + ): + # standarize the shape of inputs + natoms, nframes = self._get_natoms_and_nframes( + coords, + atom_types, + ) + atom_types = np.array(atom_types, dtype=int).reshape([nframes, natoms]) + coords = np.reshape(np.array(coords), [nframes, natoms * 3]) + if cells is None: + pbc = False + # make cells to work around the requirement of pbc + cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) + else: + pbc = True + cells = np.array(cells).reshape([nframes, 9]) + + if self.has_fparam: + assert fparam is not None + fparam = np.array(fparam) + if self.has_aparam: + assert aparam is not None + aparam = np.array(aparam) + if self.has_efield: + assert ( + efield is not None + ), "you are using a model with external field, parameter efield should be provided" + efield = np.array(efield) + + # reshape the inputs + if self.has_fparam: + fdim = self.get_dim_fparam() + if fparam.size == nframes * fdim: + fparam = np.reshape(fparam, [nframes, fdim]) + elif fparam.size == fdim: + fparam = np.tile(fparam.reshape([-1]), [nframes, 1]) + else: + raise RuntimeError( + "got wrong size of frame param, should be either %d x %d or %d" + % (nframes, fdim, fdim) + ) + if self.has_aparam: + fdim = self.get_dim_aparam() + if aparam.size == nframes * natoms * fdim: + aparam = np.reshape(aparam, [nframes, natoms * fdim]) + elif aparam.size == natoms * fdim: + aparam = np.tile(aparam.reshape([-1]), [nframes, 1]) + elif aparam.size == fdim: + aparam = np.tile(aparam.reshape([-1]), [nframes, natoms]) + else: + raise RuntimeError( + "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d" + % (nframes, natoms, fdim, natoms, fdim, fdim) + ) + + # sort inputs + coords, atom_types, imap, sel_at, sel_imap = self.sort_input( + coords, atom_types, sel_atoms=self.get_sel_type() + ) + if self.has_efield: + efield = np.reshape(efield, [nframes, natoms, 3]) + efield = efield[:, imap, :] + efield = np.reshape(efield, [nframes, natoms * 3]) + if self.has_aparam: + aparam = np.reshape(aparam, [nframes, natoms, fdim]) + aparam = aparam[:, imap, :] + aparam = np.reshape(aparam, [nframes, natoms * fdim]) + + # make natoms_vec and default_mesh + if self.neighbor_list is None: + natoms_vec = self.make_natoms_vec(atom_types) + assert natoms_vec[0] == natoms + mesh = make_default_mesh(pbc, not self._check_mixed_types(atom_types)) + ghost_map = None + else: + if nframes > 1: + raise NotImplementedError( + "neighbor_list does not support multiple frames" + ) + ( + natoms_vec, + coords, + atom_types, + mesh, + imap, + ghost_map, + ) = self.build_neighbor_list( + coords, + cells if cells is not None else None, + atom_types, + imap, + self.neighbor_list, + ) + + # evaluate + feed_dict_test = {} + feed_dict_test[self.tensors["natoms"]] = natoms_vec + feed_dict_test[self.tensors["type"]] = atom_types.reshape([-1]) + feed_dict_test[self.tensors["coord"]] = np.reshape(coords, [-1]) + + if len(self.tensors["box"].shape) == 1: + feed_dict_test[self.tensors["box"]] = np.reshape(cells, [-1]) + elif len(self.tensors["box"].shape) == 2: + feed_dict_test[self.tensors["box"]] = cells + else: + raise RuntimeError + if self.has_efield: + feed_dict_test[self.tensors["efield"]] = np.reshape(efield, [-1]) + feed_dict_test[self.tensors["mesh"]] = mesh + if self.has_fparam: + feed_dict_test[self.tensors["fparam"]] = np.reshape(fparam, [-1]) + if self.has_aparam: + feed_dict_test[self.tensors["aparam"]] = np.reshape(aparam, [-1]) + return feed_dict_test, imap, natoms_vec, ghost_map, sel_at, sel_imap + + def _eval_inner( + self, + coords, + cells, + atom_types, + fparam=None, + aparam=None, + efield=None, + **kwargs, + ): + natoms, nframes = self._get_natoms_and_nframes( + coords, + atom_types, + ) + ( + feed_dict_test, + imap, + natoms_vec, + ghost_map, + sel_at, + sel_imap, + ) = self._prepare_feed_dict( + coords, + cells, + atom_types, + fparam, + aparam, + efield, + ) + + nloc = natoms_vec[0] + nloc_sel = sel_at.shape[1] + nall = natoms_vec[1] + + t_out = list(self.output_tensors.values()) + + v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test) + + if nloc_sel == 0: + nloc_sel = nloc + sel_imap = imap + if self.has_spin: + ntypes_real = self.ntypes - self.ntypes_spin + natoms_real = sum( + [ + np.count_nonzero(np.array(atom_types[0]) == ii) + for ii in range(ntypes_real) + ] + ) + else: + natoms_real = nloc_sel + if ghost_map is not None: + # add the value of ghost atoms to real atoms + for ii, odef in enumerate(self.output_def.var_defs.values()): + # when the shape is nall + if odef.category in ( + OutputVariableCategory.DERV_R, + OutputVariableCategory.DERV_C, + ): + odef_shape = self._get_output_shape(odef, nframes, nall) + tmp_shape = [np.prod(odef_shape[:-2]), *odef_shape[-2:]] + v_out[ii] = np.reshape(v_out[ii], tmp_shape) + for jj in range(v_out[ii].shape[0]): + np.add.at(v_out[ii][jj], ghost_map, v_out[ii][jj, nloc:]) + + for ii, odef in enumerate(self.output_def.var_defs.values()): + if odef.category in ( + OutputVariableCategory.DERV_R, + OutputVariableCategory.DERV_C, + ): + odef_shape = self._get_output_shape(odef, nframes, nall) + tmp_shape = [np.prod(odef_shape[:-2]), *odef_shape[-2:]] + # reverse map of the outputs + v_out[ii] = self.reverse_map(np.reshape(v_out[ii], tmp_shape), imap) + v_out[ii] = np.reshape(v_out[ii], odef_shape) + if nloc < nall: + v_out[ii] = v_out[ii][:, :, :nloc] + elif odef.category == OutputVariableCategory.OUT: + odef_shape = self._get_output_shape(odef, nframes, natoms_real) + v_out[ii] = self.reverse_map( + np.reshape(v_out[ii], odef_shape), sel_imap[:natoms_real] + ) + if nloc_sel < nloc: + # convert shape from nsel to nloc + # sel_atoms was applied before sort; see sort_input + # do not consider mixed_types here (as it is never supported) + sel_mask = np.isin(atom_types[0], self.sel_type) + out_nsel = v_out[ii] + out_nloc = np.zeros( + (nframes, nloc, *out_nsel.shape[2:]), dtype=out_nsel.dtype + ) + out_nloc[:, sel_mask] = out_nsel + v_out[ii] = out_nloc + odef_shape = self._get_output_shape(odef, nframes, nloc) + v_out[ii] = np.reshape(v_out[ii], odef_shape) + elif odef.category in ( + OutputVariableCategory.REDU, + OutputVariableCategory.DERV_C_REDU, + ): + odef_shape = self._get_output_shape(odef, nframes, 0) + v_out[ii] = np.reshape(v_out[ii], odef_shape) + else: + raise RuntimeError("unknown category") + return tuple(v_out) + + def _get_output_shape(self, odef, nframes, natoms): + if odef.category == OutputVariableCategory.DERV_C_REDU: + # virial + return [nframes, *odef.shape[:-1], 9] + elif odef.category == OutputVariableCategory.REDU: + # energy + return [nframes, *odef.shape, 1] + elif odef.category == OutputVariableCategory.DERV_C: + # atom_virial + return [nframes, *odef.shape[:-1], natoms, 9] + elif odef.category == OutputVariableCategory.DERV_R: + # force + return [nframes, *odef.shape[:-1], natoms, 3] + elif odef.category == OutputVariableCategory.OUT: + # atom_energy, atom_tensor + # Something wrong here? + # return [nframes, *shape, natoms, 1] + return [nframes, natoms, *odef.shape, 1] + else: + raise RuntimeError("unknown category") + + def eval_descriptor( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + efield: Optional[np.ndarray] = None, + ) -> np.ndarray: + """Evaluate descriptors by using this DP. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + fparam + The frame parameter. + The array can be of size : + - nframes x dim_fparam. + - dim_fparam. Then all frames are assumed to be provided with the same fparam. + aparam + The atomic parameter + The array can be of size : + - nframes x natoms x dim_aparam. + - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam. + - dim_aparam. Then all frames and atoms are provided with the same aparam. + efield + The external field on atoms. + The array should be of size nframes x natoms x 3 + + Returns + ------- + descriptor + Descriptors. + """ + natoms, numb_test = self._get_natoms_and_nframes( + coords, + atom_types, + ) + descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)( + coords, + cells, + atom_types, + fparam=fparam, + aparam=aparam, + efield=efield, + ) + return descriptor + + def _eval_descriptor_inner( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: np.ndarray, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + efield: Optional[np.ndarray] = None, + ) -> np.ndarray: + natoms, nframes = self._get_natoms_and_nframes( + coords, + atom_types, + ) + ( + feed_dict_test, + imap, + natoms_vec, + ghost_map, + sel_at, + sel_imap, + ) = self._prepare_feed_dict( + coords, + cells, + atom_types, + fparam, + aparam, + efield, + ) + (descriptor,) = run_sess( + self.sess, [self.tensors["descriptor"]], feed_dict=feed_dict_test + ) + imap = imap[:natoms] + return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap) + + def get_numb_dos(self) -> int: + return self.numb_dos + + def get_has_efield(self) -> bool: + return self.has_efield + + +class DeepEvalOld: + # old class for DipoleChargeModifier only + """Common methods for DeepPot, DeepWFC, DeepPolar, ... + + Parameters + ---------- + model_file : Path + The name of the frozen model file. + load_prefix: str + The prefix in the load computational graph + default_tf_graph : bool + If uses the default tf graph, otherwise build a new tf graph for evaluation + auto_batch_size : bool or int or AutomaticBatchSize, default: False + If True, automatic batch size will be used. If int, it will be used + as the initial batch size. + input_map : dict, optional + The input map for tf.import_graph_def. Only work with default tf graph + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional + The ASE neighbor list class to produce the neighbor list. If None, the + neighbor list will be built natively in the model. + """ + + load_prefix: str # set by subclass + + def __init__( + self, + model_file: "Path", + load_prefix: str = "load", + default_tf_graph: bool = False, + auto_batch_size: Union[bool, int, AutoBatchSize] = False, + input_map: Optional[dict] = None, + neighbor_list=None, + ): + self.graph = self._load_graph( + model_file, + prefix=load_prefix, + default_tf_graph=default_tf_graph, + input_map=input_map, + ) + self.load_prefix = load_prefix + + # graph_compatable should be called after graph and prefix are set + if not self._graph_compatable(): + raise RuntimeError( + f"model in graph (version {self.model_version}) is incompatible" + f"with the model (version {MODEL_VERSION}) supported by the current code." + "See https://deepmd.rtfd.io/compatability/ for details." + ) + + # set default to False, as subclasses may not support + if isinstance(auto_batch_size, bool): + if auto_batch_size: + self.auto_batch_size = AutoBatchSize() + else: + self.auto_batch_size = None + elif isinstance(auto_batch_size, int): + self.auto_batch_size = AutoBatchSize(auto_batch_size) + elif isinstance(auto_batch_size, AutoBatchSize): + self.auto_batch_size = auto_batch_size + else: + raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + + self.neighbor_list = neighbor_list + + @property + @lru_cache(maxsize=None) + def model_type(self) -> str: + """Get type of model. + + :type:str + """ + t_mt = self._get_tensor("model_attr/model_type:0") + [mt] = run_sess(self.sess, [t_mt], feed_dict={}) + return mt.decode("utf-8") + + @property + @lru_cache(maxsize=None) + def model_version(self) -> str: + """Get version of model. + + Returns + ------- + str + version of model + """ + try: + t_mt = self._get_tensor("model_attr/model_version:0") + except KeyError: + # For deepmd-kit version 0.x - 1.x, set model version to 0.0 + return "0.0" + else: + [mt] = run_sess(self.sess, [t_mt], feed_dict={}) + return mt.decode("utf-8") + + @property + @lru_cache(maxsize=None) + def sess(self) -> tf.Session: + """Get TF session.""" + # start a tf session associated to the graph + return tf.Session(graph=self.graph, config=default_tf_session_config) + + def _graph_compatable(self) -> bool: + """Check the model compatability. + + Returns + ------- + bool + If the model stored in the graph file is compatable with the current code + """ + model_version_major = int(self.model_version.split(".")[0]) + model_version_minor = int(self.model_version.split(".")[1]) + MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0]) + MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1]) + if (model_version_major != MODEL_VERSION_MAJOR) or ( + model_version_minor > MODEL_VERSION_MINOR + ): + return False + else: + return True + + def _get_tensor( + self, tensor_name: str, attr_name: Optional[str] = None + ) -> tf.Tensor: + """Get TF graph tensor and assign it to class namespace. + + Parameters + ---------- + tensor_name : str + name of tensor to get + attr_name : Optional[str], optional + if specified, class attribute with this name will be created and tensor will + be assigned to it, by default None + + Returns + ------- + tf.Tensor + loaded tensor + """ + # do not use os.path.join as it doesn't work on Windows + tensor_path = "/".join((self.load_prefix, tensor_name)) + tensor = self.graph.get_tensor_by_name(tensor_path) + if attr_name: + setattr(self, attr_name, tensor) + return tensor + else: + return tensor + + @staticmethod + def _load_graph( + frozen_graph_filename: "Path", + prefix: str = "load", + default_tf_graph: bool = False, + input_map: Optional[dict] = None, + ): + # We load the protobuf file from the disk and parse it to retrieve the + # unserialized graph_def + with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + + if default_tf_graph: + tf.import_graph_def( + graph_def, + input_map=input_map, + return_elements=None, + name=prefix, + producer_op_list=None, + ) + graph = tf.get_default_graph() + else: + # Then, we can use again a convenient built-in function to import + # a graph_def into the current default Graph + with tf.Graph().as_default() as graph: + tf.import_graph_def( + graph_def, + input_map=None, + return_elements=None, + name=prefix, + producer_op_list=None, + ) + + return graph + + @staticmethod + def sort_input( + coord: np.ndarray, + atom_type: np.ndarray, + sel_atoms: Optional[List[int]] = None, + mixed_type: bool = False, + ): + """Sort atoms in the system according their types. + + Parameters + ---------- + coord + The coordinates of atoms. + Should be of shape [nframes, natoms, 3] + atom_type + The type of atoms + Should be of shape [natoms] + sel_atoms + The selected atoms by type + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. + + Returns + ------- + coord_out + The coordinates after sorting + atom_type_out + The atom types after sorting + idx_map + The index mapping from the input to the output. + For example coord_out = coord[:,idx_map,:] + sel_atom_type + Only output if sel_atoms is not None + The sorted selected atom types + sel_idx_map + Only output if sel_atoms is not None + The index mapping from the selected atoms to sorted selected atoms. + """ + if mixed_type: + # mixed_type need not to resort + natoms = atom_type[0].size + idx_map = np.arange(natoms) + return coord, atom_type, idx_map + if sel_atoms is not None: + selection = [False] * np.size(atom_type) + for ii in sel_atoms: + selection += atom_type == ii + sel_atom_type = atom_type[selection] + natoms = atom_type.size + idx = np.arange(natoms) + idx_map = np.lexsort((idx, atom_type)) + nframes = coord.shape[0] + coord = coord.reshape([nframes, -1, 3]) + coord = np.reshape(coord[:, idx_map, :], [nframes, -1]) + atom_type = atom_type[idx_map] + if sel_atoms is not None: + sel_natoms = np.size(sel_atom_type) + sel_idx = np.arange(sel_natoms) + sel_idx_map = np.lexsort((sel_idx, sel_atom_type)) + sel_atom_type = sel_atom_type[sel_idx_map] + return coord, atom_type, idx_map, sel_atom_type, sel_idx_map + else: + return coord, atom_type, idx_map + + @staticmethod + def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray: + """Reverse mapping of a vector according to the index map. + + Parameters + ---------- + vec + Input vector. Be of shape [nframes, natoms, -1] + imap + Index map. Be of shape [natoms] + + Returns + ------- + vec_out + Reverse mapped vector. + """ + ret = np.zeros(vec.shape) + # for idx,ii in enumerate(imap) : + # ret[:,ii,:] = vec[:,idx,:] + ret[:, imap, :] = vec + return ret + + def make_natoms_vec( + self, atom_types: np.ndarray, mixed_type: bool = False + ) -> np.ndarray: + """Make the natom vector used by deepmd-kit. + + Parameters + ---------- + atom_types + The type of atoms + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. + + Returns + ------- + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + + """ + natoms_vec = np.zeros(self.ntypes + 2).astype(int) + if mixed_type: + natoms = atom_types[0].size + else: + natoms = atom_types.size + natoms_vec[0] = natoms + natoms_vec[1] = natoms + if mixed_type: + natoms_vec[2] = natoms + return natoms_vec + for ii in range(self.ntypes): + natoms_vec[ii + 2] = np.count_nonzero(atom_types == ii) + return natoms_vec + + def eval_typeebd(self) -> np.ndarray: + """Evaluate output of type embedding network by using this model. + + Returns + ------- + np.ndarray + The output of type embedding network. The shape is [ntypes, o_size], + where ntypes is the number of types, and o_size is the number of nodes + in the output layer. + + Raises + ------ + KeyError + If the model does not enable type embedding. + + See Also + -------- + deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network. + + Examples + -------- + Get the output of type embedding network of `graph.pb`: + + >>> from deepmd.tf.infer import DeepPotential + >>> dp = DeepPotential("graph.pb") + >>> dp.eval_typeebd() + """ + t_typeebd = self._get_tensor("t_typeebd:0") + [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={}) + return typeebd + + def build_neighbor_list( + self, + coords: np.ndarray, + cell: Optional[np.ndarray], + atype: np.ndarray, + imap: np.ndarray, + neighbor_list, + ): + """Make the mesh with neighbor list for a single frame. + + Parameters + ---------- + coords : np.ndarray + The coordinates of atoms. Should be of shape [natoms, 3] + cell : Optional[np.ndarray] + The cell of the system. Should be of shape [3, 3] + atype : np.ndarray + The type of atoms. Should be of shape [natoms] + imap : np.ndarray + The index map of atoms. Should be of shape [natoms] + neighbor_list : ase.neighborlist.NewPrimitiveNeighborList + ASE neighbor list. The following method or attribute will be + used/set: bothways, self_interaction, update, build, first_neigh, + pair_second, offset_vec. + + Returns + ------- + natoms_vec : np.ndarray + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: nloc + natoms[1]: nall + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc + coords : np.ndarray + The coordinates of atoms, including ghost atoms. Should be of + shape [nframes, nall, 3] + atype : np.ndarray + The type of atoms, including ghost atoms. Should be of shape [nall] + mesh : np.ndarray + The mesh in nei_mode=4. + imap : np.ndarray + The index map of atoms. Should be of shape [nall] + ghost_map : np.ndarray + The index map of ghost atoms. Should be of shape [nghost] + """ + pbc = np.repeat(cell is not None, 3) + cell = cell.reshape(3, 3) + positions = coords.reshape(-1, 3) + neighbor_list.bothways = True + neighbor_list.self_interaction = False + if neighbor_list.update(pbc, cell, positions): + neighbor_list.build(pbc, cell, positions) + first_neigh = neighbor_list.first_neigh.copy() + pair_second = neighbor_list.pair_second.copy() + offset_vec = neighbor_list.offset_vec.copy() + # get out-of-box neighbors + out_mask = np.any(offset_vec != 0, axis=1) + out_idx = pair_second[out_mask] + out_offset = offset_vec[out_mask] + out_coords = positions[out_idx] + out_offset.dot(cell) + atype = np.array(atype, dtype=int) + out_atype = atype[out_idx] + + nloc = positions.shape[0] + nghost = out_idx.size + all_coords = np.concatenate((positions, out_coords), axis=0) + all_atype = np.concatenate((atype, out_atype), axis=0) + # convert neighbor indexes + ghost_map = pair_second[out_mask] + pair_second[out_mask] = np.arange(nloc, nloc + nghost) + # get the mesh + mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int) + mesh[0] = nloc + # ilist + mesh[16 : 16 + nloc] = np.arange(nloc) + # numnei + mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1] + # jlist + mesh[16 + nloc * 2 :] = pair_second + + # natoms_vec + natoms_vec = np.zeros(self.ntypes + 2).astype(int) + natoms_vec[0] = nloc + natoms_vec[1] = nloc + nghost + for ii in range(self.ntypes): + natoms_vec[ii + 2] = np.count_nonzero(atype == ii) + # imap append ghost atoms + imap = np.concatenate((imap, np.arange(nloc, nloc + nghost))) + return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map diff --git a/deepmd/tf/infer/deep_polar.py b/deepmd/tf/infer/deep_polar.py new file mode 100644 index 0000000000..c3d42fd537 --- /dev/null +++ b/deepmd/tf/infer/deep_polar.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.infer.deep_polar import ( + DeepGlobalPolar, + DeepPolar, +) + +__all__ = [ + "DeepPolar", + "DeepGlobalPolar", +] diff --git a/deepmd/tf/infer/deep_pot.py b/deepmd/tf/infer/deep_pot.py new file mode 100644 index 0000000000..587a13996a --- /dev/null +++ b/deepmd/tf/infer/deep_pot.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.infer import ( + DeepPot, +) + +__all__ = ["DeepPot"] diff --git a/deepmd/tf/infer/deep_tensor.py b/deepmd/tf/infer/deep_tensor.py new file mode 100644 index 0000000000..59fdab7cd1 --- /dev/null +++ b/deepmd/tf/infer/deep_tensor.py @@ -0,0 +1,443 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + ClassVar, + Dict, + List, + Optional, + Tuple, +) + +import numpy as np + +from deepmd.tf.common import ( + make_default_mesh, +) +from deepmd.tf.infer.deep_eval import DeepEvalOld as DeepEval +from deepmd.tf.utils.sess import ( + run_sess, +) + +if TYPE_CHECKING: + from pathlib import ( + Path, + ) + + +class DeepTensor(DeepEval): + """Evaluates a tensor model. + + Parameters + ---------- + model_file: str + The name of the frozen model file. + load_prefix: str + The prefix in the load computational graph + default_tf_graph : bool + If uses the default tf graph, otherwise build a new tf graph for evaluation + input_map : dict, optional + The input map for tf.import_graph_def. Only work with default tf graph + neighbor_list : ase.neighborlist.NeighborList, optional + The neighbor list object. If None, then build the native neighbor list. + """ + + tensors: ClassVar[Dict[str, str]] = { + # descriptor attrs + "t_ntypes": "descrpt_attr/ntypes:0", + "t_rcut": "descrpt_attr/rcut:0", + # model attrs + "t_tmap": "model_attr/tmap:0", + "t_sel_type": "model_attr/sel_type:0", + "t_ouput_dim": "model_attr/output_dim:0", + # inputs + "t_coord": "t_coord:0", + "t_type": "t_type:0", + "t_natoms": "t_natoms:0", + "t_box": "t_box:0", + "t_mesh": "t_mesh:0", + } + + def __init__( + self, + model_file: "Path", + load_prefix: str = "load", + default_tf_graph: bool = False, + input_map: Optional[dict] = None, + neighbor_list=None, + ) -> None: + """Constructor.""" + DeepEval.__init__( + self, + model_file, + load_prefix=load_prefix, + default_tf_graph=default_tf_graph, + input_map=input_map, + neighbor_list=neighbor_list, + ) + # check model type + model_type = self.tensors["t_tensor"][2:-2] + assert ( + self.model_type == model_type + ), f"expect {model_type} model but got {self.model_type}" + + # now load tensors to object attributes + for attr_name, tensor_name in self.tensors.items(): + self._get_tensor(tensor_name, attr_name) + + # load optional tensors if possible + optional_tensors = { + "t_global_tensor": f"o_global_{model_type}:0", + "t_force": "o_force:0", + "t_virial": "o_virial:0", + "t_atom_virial": "o_atom_virial:0", + } + try: + # first make sure these tensor all exists (but do not modify self attr) + for attr_name, tensor_name in optional_tensors.items(): + self._get_tensor(tensor_name) + # then put those into self.attrs + for attr_name, tensor_name in optional_tensors.items(): + self._get_tensor(tensor_name, attr_name) + except KeyError: + self._support_gfv = False + else: + self.tensors.update(optional_tensors) + self._support_gfv = True + + self._run_default_sess() + self.tmap = self.tmap.decode("UTF-8").split() + + def _run_default_sess(self): + [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] = run_sess( + self.sess, + [ + self.t_ntypes, + self.t_rcut, + self.t_tmap, + self.t_sel_type, + self.t_ouput_dim, + ], + ) + + def get_ntypes(self) -> int: + """Get the number of atom types of this model.""" + return self.ntypes + + def get_rcut(self) -> float: + """Get the cut-off radius of this model.""" + return self.rcut + + def get_type_map(self) -> List[str]: + """Get the type map (element name of the atom types) of this model.""" + return self.tmap + + def get_sel_type(self) -> List[int]: + """Get the selected atom types of this model.""" + return self.tselt + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this DP.""" + return self.dfparam + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this DP.""" + return self.daparam + + def eval( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: List[int], + atomic: bool = True, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + efield: Optional[np.ndarray] = None, + mixed_type: bool = False, + ) -> np.ndarray: + """Evaluate the model. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + If True (default), return the atomic tensor + Otherwise return the global tensor + fparam + Not used in this model + aparam + Not used in this model + efield + Not used in this model + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. + + Returns + ------- + tensor + The returned tensor + If atomic == False then of size nframes x output_dim + else of size nframes x natoms x output_dim + """ + # standarize the shape of inputs + if mixed_type: + natoms = atom_types[0].size + atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) + else: + atom_types = np.array(atom_types, dtype=int).reshape([-1]) + natoms = atom_types.size + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + if cells is None: + pbc = False + cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) + else: + pbc = True + cells = np.array(cells).reshape([nframes, 9]) + + # sort inputs + coords, atom_types, imap, sel_at, sel_imap = self.sort_input( + coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type + ) + + # make natoms_vec and default_mesh + if self.neighbor_list is None: + natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) + assert natoms_vec[0] == natoms + mesh = make_default_mesh(pbc, mixed_type) + else: + if nframes > 1: + raise NotImplementedError( + "neighbor_list does not support multiple frames" + ) + ( + natoms_vec, + coords, + atom_types, + mesh, + imap, + _, + ) = self.build_neighbor_list( + coords, + cells if cells is not None else None, + atom_types, + imap, + self.neighbor_list, + ) + + # evaluate + feed_dict_test = {} + feed_dict_test[self.t_natoms] = natoms_vec + if mixed_type: + feed_dict_test[self.t_type] = atom_types.reshape([-1]) + else: + feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( + [-1] + ) + feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) + feed_dict_test[self.t_box] = np.reshape(cells, [-1]) + feed_dict_test[self.t_mesh] = mesh + + if atomic: + assert ( + "global" not in self.model_type + ), f"cannot do atomic evaluation with model type {self.model_type}" + t_out = [self.t_tensor] + else: + assert ( + self._support_gfv or "global" in self.model_type + ), f"do not support global tensor evaluation with old {self.model_type} model" + t_out = [self.t_global_tensor if self._support_gfv else self.t_tensor] + v_out = self.sess.run(t_out, feed_dict=feed_dict_test) + tensor = v_out[0] + + # reverse map of the outputs + if atomic: + tensor = np.array(tensor) + tensor = self.reverse_map( + np.reshape(tensor, [nframes, -1, self.output_dim]), sel_imap + ) + tensor = np.reshape(tensor, [nframes, len(sel_at), self.output_dim]) + else: + tensor = np.reshape(tensor, [nframes, self.output_dim]) + + return tensor + + def eval_full( + self, + coords: np.ndarray, + cells: np.ndarray, + atom_types: List[int], + atomic: bool = False, + fparam: Optional[np.array] = None, + aparam: Optional[np.array] = None, + efield: Optional[np.array] = None, + mixed_type: bool = False, + ) -> Tuple[np.ndarray, ...]: + """Evaluate the model with interface similar to the energy model. + Will return global tensor, component-wise force and virial + and optionally atomic tensor and atomic virial. + + Parameters + ---------- + coords + The coordinates of atoms. + The array should be of size nframes x natoms x 3 + cells + The cell of the region. + If None then non-PBC is assumed, otherwise using PBC. + The array should be of size nframes x 9 + atom_types + The atom types + The list should contain natoms ints + atomic + Whether to calculate atomic tensor and virial + fparam + Not used in this model + aparam + Not used in this model + efield + Not used in this model + mixed_type + Whether to perform the mixed_type mode. + If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), + in which frames in a system may have different natoms_vec(s), with the same nloc. + + Returns + ------- + tensor + The global tensor. + shape: [nframes x nout] + force + The component-wise force (negative derivative) on each atom. + shape: [nframes x nout x natoms x 3] + virial + The component-wise virial of the tensor. + shape: [nframes x nout x 9] + atom_tensor + The atomic tensor. Only returned when atomic == True + shape: [nframes x natoms x nout] + atom_virial + The atomic virial. Only returned when atomic == True + shape: [nframes x nout x natoms x 9] + """ + assert self._support_gfv, "do not support eval_full with old tensor model" + + # standarize the shape of inputs + if mixed_type: + natoms = atom_types[0].size + atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms]) + else: + atom_types = np.array(atom_types, dtype=int).reshape([-1]) + natoms = atom_types.size + coords = np.reshape(np.array(coords), [-1, natoms * 3]) + nframes = coords.shape[0] + if cells is None: + pbc = False + cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9]) + else: + pbc = True + cells = np.array(cells).reshape([nframes, 9]) + nout = self.output_dim + + # sort inputs + coords, atom_types, imap, sel_at, sel_imap = self.sort_input( + coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type + ) + + # make natoms_vec and default_mesh + if self.neighbor_list is None: + natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type) + assert natoms_vec[0] == natoms + mesh = make_default_mesh(pbc, mixed_type) + ghost_map = None + else: + if nframes > 1: + raise NotImplementedError( + "neighbor_list does not support multiple frames" + ) + ( + natoms_vec, + coords, + atom_types, + mesh, + imap, + ghost_map, + ) = self.build_neighbor_list( + coords, + cells if cells is not None else None, + atom_types, + imap, + self.neighbor_list, + ) + + # evaluate + feed_dict_test = {} + feed_dict_test[self.t_natoms] = natoms_vec + if mixed_type: + feed_dict_test[self.t_type] = atom_types.reshape([-1]) + else: + feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape( + [-1] + ) + feed_dict_test[self.t_coord] = np.reshape(coords, [-1]) + feed_dict_test[self.t_box] = np.reshape(cells, [-1]) + feed_dict_test[self.t_mesh] = mesh + + t_out = [self.t_global_tensor, self.t_force, self.t_virial] + if atomic: + t_out += [self.t_tensor, self.t_atom_virial] + + v_out = self.sess.run(t_out, feed_dict=feed_dict_test) + gt = v_out[0] # global tensor + force = v_out[1] + virial = v_out[2] + if atomic: + at = v_out[3] # atom tensor + av = v_out[4] # atom virial + + nloc = natoms_vec[0] + nall = natoms_vec[1] + + if ghost_map is not None: + # add the value of ghost atoms to real atoms + force = np.reshape(force, [nframes * nout, -1, 3]) + for ii in range(nframes * nout): + np.add.at(force[ii], ghost_map, force[ii, nloc:]) + if atomic: + av = np.reshape(av, [nframes * nout, -1, 9]) + for ii in range(nframes * nout): + np.add.at(av[ii], ghost_map, av[ii, nloc:]) + + # please note here the shape are wrong! + force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap) + if atomic: + at = self.reverse_map( + np.reshape(at, [nframes, len(sel_at), nout]), sel_imap + ) + av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap) + + # make sure the shapes are correct here + gt = np.reshape(gt, [nframes, nout]) + force = np.reshape(force, [nframes, nout, nall, 3]) + if nloc < nall: + force = force[:, :, :nloc, :] + virial = np.reshape(virial, [nframes, nout, 9]) + if atomic: + at = np.reshape(at, [nframes, len(sel_at), self.output_dim]) + av = np.reshape(av, [nframes, nout, nall, 9]) + if nloc < nall: + av = av[:, :, :nloc, :] + return gt, force, virial, at, av + else: + return gt, force, virial diff --git a/deepmd/tf/infer/deep_wfc.py b/deepmd/tf/infer/deep_wfc.py new file mode 100644 index 0000000000..f7674bdde7 --- /dev/null +++ b/deepmd/tf/infer/deep_wfc.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.infer.deep_wfc import ( + DeepWFC, +) + +__all__ = [ + "DeepWFC", +] diff --git a/deepmd/infer/ewald_recp.py b/deepmd/tf/infer/ewald_recp.py similarity index 97% rename from deepmd/infer/ewald_recp.py rename to deepmd/tf/infer/ewald_recp.py index 429a3cdfd6..110188c34f 100644 --- a/deepmd/infer/ewald_recp.py +++ b/deepmd/tf/infer/ewald_recp.py @@ -5,13 +5,13 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, default_tf_session_config, op_module, tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/tf/infer/model_devi.py b/deepmd/tf/infer/model_devi.py new file mode 100644 index 0000000000..4ee979ac67 --- /dev/null +++ b/deepmd/tf/infer/model_devi.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.infer.model_devi import ( + calc_model_devi, + calc_model_devi_e, + calc_model_devi_f, + calc_model_devi_v, + make_model_devi, + write_model_devi_out, +) + +__all__ = [ + "make_model_devi", + "calc_model_devi", + "write_model_devi_out", + "calc_model_devi_e", + "calc_model_devi_f", + "calc_model_devi_v", +] diff --git a/deepmd/lmp.py b/deepmd/tf/lmp.py similarity index 99% rename from deepmd/lmp.py rename to deepmd/tf/lmp.py index 5238cd9935..b2e47308ed 100644 --- a/deepmd/lmp.py +++ b/deepmd/tf/lmp.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Register entry points for lammps-wheel.""" + import os import platform from importlib import ( @@ -17,7 +18,7 @@ Version, ) -from deepmd.env import ( +from deepmd.tf.env import ( SHARED_LIB_DIR, TF_VERSION, tf, diff --git a/deepmd_utils/loggers/__init__.py b/deepmd/tf/loggers/__init__.py similarity index 51% rename from deepmd_utils/loggers/__init__.py rename to deepmd/tf/loggers/__init__.py index 39aa76139d..d9227d3620 100644 --- a/deepmd_utils/loggers/__init__.py +++ b/deepmd/tf/loggers/__init__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Module taking care of logging duties.""" +"""Alias of deepmd.loggers for backward compatibility.""" -from .loggers import ( +from deepmd.loggers.loggers import ( set_log_handles, ) diff --git a/deepmd/tf/loggers/loggers.py b/deepmd/tf/loggers/loggers.py new file mode 100644 index 0000000000..be948c9858 --- /dev/null +++ b/deepmd/tf/loggers/loggers.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias of deepmd.loggers.loggers for backward compatibility.""" + +from deepmd.loggers.loggers import ( + set_log_handles, +) + +__all__ = ["set_log_handles"] diff --git a/deepmd/loss/__init__.py b/deepmd/tf/loss/__init__.py similarity index 100% rename from deepmd/loss/__init__.py rename to deepmd/tf/loss/__init__.py diff --git a/deepmd/loss/dos.py b/deepmd/tf/loss/dos.py similarity index 98% rename from deepmd/loss/dos.py rename to deepmd/tf/loss/dos.py index 7d38f2b17a..763e75638f 100644 --- a/deepmd/loss/dos.py +++ b/deepmd/tf/loss/dos.py @@ -1,15 +1,15 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, ) -from deepmd.env import ( +from deepmd.tf.env import ( global_cvt_2_ener_float, global_cvt_2_tf_float, tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/loss/ener.py b/deepmd/tf/loss/ener.py similarity index 99% rename from deepmd/loss/ener.py rename to deepmd/tf/loss/ener.py index d7f83f09e5..baa4aa3e02 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/tf/loss/ener.py @@ -5,15 +5,15 @@ import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, ) -from deepmd.env import ( +from deepmd.tf.env import ( global_cvt_2_ener_float, global_cvt_2_tf_float, tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) @@ -120,7 +120,6 @@ def __init__( "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3 ) # drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates - # TODO: could numb_generalized_coord decided from the training data? if self.has_gf > 0: add_data_requirement( "drdq", diff --git a/deepmd/loss/loss.py b/deepmd/tf/loss/loss.py similarity index 98% rename from deepmd/loss/loss.py rename to deepmd/tf/loss/loss.py index a719a08d81..327aea5230 100644 --- a/deepmd/loss/loss.py +++ b/deepmd/tf/loss/loss.py @@ -10,7 +10,7 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) diff --git a/deepmd/loss/tensor.py b/deepmd/tf/loss/tensor.py similarity index 98% rename from deepmd/loss/tensor.py rename to deepmd/tf/loss/tensor.py index a40f95a18e..3be01d3871 100644 --- a/deepmd/loss/tensor.py +++ b/deepmd/tf/loss/tensor.py @@ -1,14 +1,14 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, ) -from deepmd.env import ( +from deepmd.tf.env import ( global_cvt_2_tf_float, tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/model/__init__.py b/deepmd/tf/model/__init__.py similarity index 54% rename from deepmd/model/__init__.py rename to deepmd/tf/model/__init__.py index d366ca1441..1d100f2b09 100644 --- a/deepmd/model/__init__.py +++ b/deepmd/tf/model/__init__.py @@ -1,4 +1,17 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.tf.model.frozen import ( + FrozenModel, +) +from deepmd.tf.model.linear import ( + LinearEnergyModel, +) +from deepmd.tf.model.pairtab import ( + PairTabModel, +) +from deepmd.tf.model.pairwise_dprc import ( + PairwiseDPRc, +) + from .dos import ( DOSModel, ) @@ -23,4 +36,8 @@ "GlobalPolarModel", "PolarModel", "WFCModel", + "FrozenModel", + "LinearEnergyModel", + "PairTabModel", + "PairwiseDPRc", ] diff --git a/deepmd/model/dos.py b/deepmd/tf/model/dos.py similarity index 99% rename from deepmd/model/dos.py rename to deepmd/tf/model/dos.py index 22e291a0f0..265026b60a 100644 --- a/deepmd/model/dos.py +++ b/deepmd/tf/model/dos.py @@ -5,12 +5,12 @@ Union, ) -from deepmd.env import ( +from deepmd.tf.env import ( MODEL_VERSION, global_cvt_2_ener_float, tf, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) diff --git a/deepmd/model/ener.py b/deepmd/tf/model/ener.py similarity index 97% rename from deepmd/model/ener.py rename to deepmd/tf/model/ener.py index 0d8d66b305..a493fe0517 100644 --- a/deepmd/model/ener.py +++ b/deepmd/tf/model/ener.py @@ -7,19 +7,19 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( MODEL_VERSION, global_cvt_2_ener_float, op_module, tf, ) -from deepmd.utils.data_system import ( +from deepmd.tf.utils.data_system import ( DeepmdDataSystem, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) @@ -486,7 +486,7 @@ def change_energy_bias( frozen_model: str, origin_type_map: list, full_type_map: str, - bias_shift: str = "delta", + bias_adjust_mode: str = "change-by-statistic", ) -> None: """Change the energy bias according to the input data and the pretrained model. @@ -500,17 +500,17 @@ def change_energy_bias( The original type_map in dataset, they are targets to change the energy bias. full_type_map : str The full type_map in pretrained model - bias_shift : str - The mode for changing energy bias : ['delta', 'statistic'] - 'delta' : perform predictions on energies of target dataset, + bias_adjust_mode : str + The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on energies of target dataset, and do least sqaure on the errors to obtain the target shift as bias. - 'statistic' : directly use the statistic energy bias in the target dataset. + 'set-by-statistic' : directly use the statistic energy bias in the target dataset. """ self.fitting.change_energy_bias( data, frozen_model, origin_type_map, full_type_map, - bias_shift, + bias_adjust_mode, self.data_bias_nsample, ) diff --git a/deepmd/model/frozen.py b/deepmd/tf/model/frozen.py similarity index 68% rename from deepmd/model/frozen.py rename to deepmd/tf/model/frozen.py index 38f342ebec..86676bfe0b 100644 --- a/deepmd/model/frozen.py +++ b/deepmd/tf/model/frozen.py @@ -1,4 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import tempfile from enum import ( Enum, ) @@ -7,26 +10,37 @@ Union, ) -from deepmd.env import ( +from deepmd.entrypoints.convert_backend import ( + convert_backend, +) +from deepmd.infer.deep_pot import ( + DeepPot, +) +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, MODEL_VERSION, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.infer import ( +from deepmd.tf.infer import ( DeepPotential, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) +from deepmd.tf.utils.graph import ( + get_tensor_by_name_from_graph, + load_graph_def, +) from .model import ( Model, ) +@Model.register("frozen") class FrozenModel(Model): """Load model from a frozen model, which cannot be trained. @@ -39,8 +53,20 @@ class FrozenModel(Model): def __init__(self, model_file: str, **kwargs): super().__init__(**kwargs) self.model_file = model_file - self.model = DeepPotential(model_file) - self.model_type = self.model.model_type + if not model_file.endswith(".pb"): + # try to convert from other formats + with tempfile.NamedTemporaryFile( + suffix=".pb", dir=os.curdir, delete=False + ) as f: + convert_backend(INPUT=model_file, OUTPUT=f.name) + self.model_file = f.name + self.model = DeepPotential(self.model_file) + if isinstance(self.model, DeepPot): + self.model_type = "ener" + else: + raise NotImplementedError( + "This model type has not been implemented. " "Contribution is welcome!" + ) def build( self, @@ -122,14 +148,26 @@ def build( ) if self.model_type == "ener": return { - "energy": tf.identity(self.model.t_energy, name="o_energy" + suffix), - "force": tf.identity(self.model.t_force, name="o_force" + suffix), - "virial": tf.identity(self.model.t_virial, name="o_virial" + suffix), + # must visit the backend class + "energy": tf.identity( + self.model.deep_eval.output_tensors["energy_redu"], + name="o_energy" + suffix, + ), + "force": tf.identity( + self.model.deep_eval.output_tensors["energy_derv_r"], + name="o_force" + suffix, + ), + "virial": tf.identity( + self.model.deep_eval.output_tensors["energy_derv_c_redu"], + name="o_virial" + suffix, + ), "atom_ener": tf.identity( - self.model.t_ae, name="o_atom_energy" + suffix + self.model.deep_eval.output_tensors["energy"], + name="o_atom_energy" + suffix, ), "atom_virial": tf.identity( - self.model.t_av, name="o_atom_virial" + suffix + self.model.deep_eval.output_tensors["energy_derv_c"], + name="o_atom_virial" + suffix, ), "coord": coord_, "atype": atype_, @@ -207,3 +245,19 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): """ # we don't know how to compress it, so no neighbor statistics here return local_jdata + + def serialize(self, suffix: str = "") -> dict: + # try to recover the original model + # the current graph contains a prefix "load", + # so it cannot used to recover the original model + graph, graph_def = load_graph_def(self.model_file) + t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script") + jdata = json.loads(t_jdata) + model = Model(**jdata["model"]) + # important! must be called before serialize + model.init_variables(graph=graph, graph_def=graph_def) + return model.serialize() + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + raise RuntimeError("Should not touch here.") diff --git a/deepmd/model/linear.py b/deepmd/tf/model/linear.py similarity index 98% rename from deepmd/model/linear.py rename to deepmd/tf/model/linear.py index 7c527fe9dc..ae1b0b5c78 100644 --- a/deepmd/model/linear.py +++ b/deepmd/tf/model/linear.py @@ -11,15 +11,15 @@ Union, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, MODEL_VERSION, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) @@ -54,7 +54,6 @@ def __init__(self, models: List[dict], weights: List[float], **kwargs): self.weights = [1 / len(models) for _ in range(len(models))] elif weights == "sum": self.weights = [1 for _ in range(len(models))] - # TODO: add more weights, for example, so-called committee models else: raise ValueError(f"Invalid weights {weights}") @@ -147,6 +146,7 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): return local_jdata_cpy +@Model.register("linear_ener") class LinearEnergyModel(LinearModel): """Linear energy model make linear combinations of several existing energy models.""" diff --git a/deepmd/model/model.py b/deepmd/tf/model/model.py similarity index 78% rename from deepmd/model/model.py rename to deepmd/tf/model/model.py index 6117b4942d..76bcc6072b 100644 --- a/deepmd/model/model.py +++ b/deepmd/tf/model/model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import copy from abc import ( ABC, abstractmethod, @@ -13,40 +14,61 @@ Union, ) -from deepmd.descriptor.descriptor import ( +from deepmd.common import ( + j_get_type, +) +from deepmd.tf.descriptor.descriptor import ( Descriptor, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.dipole import ( + DipoleFittingSeA, +) +from deepmd.tf.fit.dos import ( + DOSFitting, +) +from deepmd.tf.fit.ener import ( + EnerFitting, +) +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.fit.polar import ( + PolarFittingSeA, +) +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.utils.argcheck import ( +from deepmd.tf.utils.argcheck import ( type_embedding_args, ) -from deepmd.utils.data_system import ( +from deepmd.tf.utils.data_system import ( DeepmdDataSystem, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( load_graph_def, ) -from deepmd.utils.pair_tab import ( +from deepmd.tf.utils.pair_tab import ( PairTab, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) +from deepmd.utils.plugin import ( + make_plugin_registry, +) +from deepmd.utils.version import ( + check_version_compatibility, +) -class Model(ABC): +class Model(ABC, make_plugin_registry("model")): """Abstract base model. Parameters @@ -78,52 +100,10 @@ class Model(ABC): Compression information for internal use """ - @classmethod - def get_class_by_input(cls, input: dict): - """Get the class by input data. - - Parameters - ---------- - input : dict - The input data - """ - # infer model type by fitting_type - from deepmd.model.frozen import ( - FrozenModel, - ) - from deepmd.model.linear import ( - LinearEnergyModel, - ) - from deepmd.model.multi import ( - MultiModel, - ) - from deepmd.model.pairtab import ( - PairTabModel, - ) - from deepmd.model.pairwise_dprc import ( - PairwiseDPRc, - ) - - model_type = input.get("type", "standard") - if model_type == "standard": - return StandardModel - elif model_type == "multi": - return MultiModel - elif model_type == "pairwise_dprc": - return PairwiseDPRc - elif model_type == "frozen": - return FrozenModel - elif model_type == "linear_ener": - return LinearEnergyModel - elif model_type == "pairtab": - return PairTabModel - else: - raise ValueError(f"unknown model type: {model_type}") - def __new__(cls, *args, **kwargs): if cls is Model: # init model - cls = cls.get_class_by_input(kwargs) + cls = cls.get_class_by_type(kwargs.get("type", "standard")) return cls.__new__(cls, *args, **kwargs) return super().__new__(cls) @@ -428,7 +408,7 @@ def change_energy_bias( frozen_model: str, origin_type_map: list, full_type_map: str, - bias_shift: str = "delta", + bias_adjust_mode: str = "change-by-statistic", ) -> None: """Change the energy bias according to the input data and the pretrained model. @@ -442,11 +422,11 @@ def change_energy_bias( The original type_map in dataset, they are targets to change the energy bias. full_type_map : str The full type_map in pretrained model - bias_shift : str - The mode for changing energy bias : ['delta', 'statistic'] - 'delta' : perform predictions on energies of target dataset, + bias_adjust_mode : str + The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on energies of target dataset, and do least sqaure on the errors to obtain the target shift as bias. - 'statistic' : directly use the statistic energy bias in the target dataset. + 'set-by-statistic' : directly use the statistic energy bias in the target dataset. """ raise RuntimeError("Not supported") @@ -515,7 +495,7 @@ def get_feed_dict( natoms[1]: total number of atoms held by this processor natoms[i]: 2 <= i < Ntypes+2, number of type i atoms box : tf.Tensor - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input mesh : tf.Tensor For historical reasons, only the length of the Tensor matters. if size of mesh == 6, pbc is assumed. @@ -562,10 +542,52 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: dict The updated local data """ - cls = cls.get_class_by_input(local_jdata) + cls = cls.get_class_by_type(local_jdata.get("type", "standard")) return cls.update_sel(global_jdata, local_jdata) + @classmethod + def deserialize(cls, data: dict, suffix: str = "") -> "Model": + """Deserialize the model. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Parameters + ---------- + data : dict + The serialized data + suffix : str, optional + Name suffix to identify this model + + Returns + ------- + Model + The deserialized Model + """ + if cls is Model: + return Model.get_class_by_type(data.get("type", "standard")).deserialize( + data, + suffix=suffix, + ) + raise NotImplementedError("Not implemented in class %s" % cls.__name__) + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + There is no suffix in a native DP model, but it is important + for the TF backend. + + Returns + ------- + dict + The serialized data + suffix : str, optional + Name suffix to identify this descriptor + """ + raise NotImplementedError("Not implemented in class %s" % self.__name__) + + +@Model.register("standard") class StandardModel(Model): """Standard model, which must contain a descriptor and a fitting. @@ -594,16 +616,23 @@ def __new__(cls, *args, **kwargs): ) if cls is StandardModel: - fitting_type = kwargs["fitting_net"]["type"] + if isinstance(kwargs["fitting_net"], dict): + fitting_type = Fitting.get_class_by_type( + j_get_type(kwargs["fitting_net"], cls.__name__) + ) + elif isinstance(kwargs["fitting_net"], Fitting): + fitting_type = type(kwargs["fitting_net"]) + else: + raise RuntimeError("get unknown fitting type when building model") # init model # infer model type by fitting_type - if fitting_type == "ener": + if issubclass(fitting_type, EnerFitting): cls = EnerModel - elif fitting_type == "dos": + elif issubclass(fitting_type, DOSFitting): cls = DOSModel - elif fitting_type == "dipole": + elif issubclass(fitting_type, DipoleFittingSeA): cls = DipoleModel - elif fitting_type == "polar": + elif issubclass(fitting_type, PolarFittingSeA): cls = PolarModel else: raise RuntimeError("get unknown fitting type when building model") @@ -631,7 +660,16 @@ def __init__( if isinstance(fitting_net, Fitting): self.fitting = fitting_net else: - self.fitting = Fitting(**fitting_net, descrpt=self.descrpt, spin=self.spin) + if fitting_net["type"] in ["dipole", "polar"]: + fitting_net["embedding_width"] = self.descrpt.get_dim_rot_mat_1() + self.fitting = Fitting( + **fitting_net, + descrpt=self.descrpt, + spin=self.spin, + ntypes=self.descrpt.get_ntypes(), + dim_descrpt=self.descrpt.get_dim_out(), + mixed_types=type_embedding is not None or self.descrpt.explicit_ntypes, + ) self.rcut = self.descrpt.get_rcut() self.ntypes = self.descrpt.get_ntypes() @@ -640,6 +678,7 @@ def __init__( self.typeebd = type_embedding elif type_embedding is not None: self.typeebd = TypeEmbedNet( + ntypes=self.ntypes, **type_embedding, padding=self.descrpt.explicit_ntypes, ) @@ -648,6 +687,7 @@ def __init__( default_args_dict = {i.name: i.default for i in default_args} default_args_dict["activation_function"] = None self.typeebd = TypeEmbedNet( + ntypes=self.ntypes, **default_args_dict, padding=True, ) @@ -724,3 +764,63 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): global_jdata, local_jdata["descriptor"] ) return local_jdata_cpy + + @classmethod + def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor": + """Deserialize the model. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Parameters + ---------- + data : dict + The serialized data + suffix : str, optional + Name suffix to identify this descriptor + + Returns + ------- + Descriptor + The deserialized descriptor + """ + data = copy.deepcopy(data) + check_version_compatibility(data.pop("@version", 1), 1, 1) + descriptor = Descriptor.deserialize(data.pop("descriptor"), suffix=suffix) + fitting = Fitting.deserialize(data.pop("fitting"), suffix=suffix) + data.pop("atom_exclude_types") + data.pop("pair_exclude_types") + return cls( + descriptor=descriptor, + fitting_net=fitting, + **data, + ) + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + There is no suffix in a native DP model, but it is important + for the TF backend. + + Returns + ------- + dict + The serialized data + suffix : str, optional + Name suffix to identify this descriptor + """ + if self.typeebd is not None: + raise NotImplementedError("type embedding is not supported") + if self.spin is not None: + raise NotImplementedError("spin is not supported") + return { + "@class": "Model", + "type": "standard", + "@version": 1, + "type_map": self.type_map, + "descriptor": self.descrpt.serialize(suffix=suffix), + "fitting": self.fitting.serialize(suffix=suffix), + # not supported yet + "atom_exclude_types": [], + "pair_exclude_types": [], + } diff --git a/deepmd/model/model_stat.py b/deepmd/tf/model/model_stat.py similarity index 85% rename from deepmd/model/model_stat.py rename to deepmd/tf/model/model_stat.py index 933a634ce8..db70262d50 100644 --- a/deepmd/model/model_stat.py +++ b/deepmd/tf/model/model_stat.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Alias for backward compatibility.""" -from deepmd_utils.utils.model_stat import ( + +from deepmd.utils.model_stat import ( _make_all_stat_ref, make_stat_input, merge_sys_stat, diff --git a/deepmd/model/multi.py b/deepmd/tf/model/multi.py similarity index 96% rename from deepmd/model/multi.py rename to deepmd/tf/model/multi.py index 83b231c0e8..e49ad47ee3 100644 --- a/deepmd/model/multi.py +++ b/deepmd/tf/model/multi.py @@ -8,41 +8,41 @@ import numpy as np -from deepmd.descriptor.descriptor import ( +from deepmd.tf.descriptor.descriptor import ( Descriptor, ) -from deepmd.env import ( +from deepmd.tf.env import ( MODEL_VERSION, global_cvt_2_ener_float, op_module, tf, ) -from deepmd.fit import ( +from deepmd.tf.fit import ( DipoleFittingSeA, DOSFitting, EnerFitting, GlobalPolarFittingSeA, PolarFittingSeA, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.utils.argcheck import ( +from deepmd.tf.utils.argcheck import ( type_embedding_args, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) -from deepmd.utils.pair_tab import ( +from deepmd.tf.utils.pair_tab import ( PairTab, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) @@ -55,6 +55,7 @@ ) +@Model.register("multi") class MultiModel(Model): """Multi-task model. @@ -133,15 +134,25 @@ def __init__( if isinstance(item_fitting_param, Fitting): fitting_dict[item] = item_fitting_param else: + if item_fitting_param["type"] in ["dipole", "polar"]: + item_fitting_param["embedding_width"] = ( + self.descrpt.get_dim_rot_mat_1() + ) fitting_dict[item] = Fitting( - **item_fitting_param, descrpt=self.descrpt, spin=self.spin + **item_fitting_param, + descrpt=self.descrpt, + spin=self.spin, + ntypes=self.descrpt.get_ntypes(), + dim_descrpt=self.descrpt.get_dim_out(), ) + self.ntypes = self.descrpt.get_ntypes() # type embedding if type_embedding is not None and isinstance(type_embedding, TypeEmbedNet): self.typeebd = type_embedding elif type_embedding is not None: self.typeebd = TypeEmbedNet( + ntypes=self.ntypes, **type_embedding, padding=self.descrpt.explicit_ntypes, ) @@ -150,6 +161,7 @@ def __init__( default_args_dict = {i.name: i.default for i in default_args} default_args_dict["activation_function"] = None self.typeebd = TypeEmbedNet( + ntypes=self.ntypes, **default_args_dict, padding=True, ) @@ -158,7 +170,6 @@ def __init__( # descriptor self.rcut = self.descrpt.get_rcut() - self.ntypes = self.descrpt.get_ntypes() # fitting self.fitting_dict = fitting_dict self.numb_fparam_dict = { diff --git a/deepmd/model/pairtab.py b/deepmd/tf/model/pairtab.py similarity index 96% rename from deepmd/model/pairtab.py rename to deepmd/tf/model/pairtab.py index 38934818e6..3cc1114f81 100644 --- a/deepmd/model/pairtab.py +++ b/deepmd/tf/model/pairtab.py @@ -10,27 +10,31 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, MODEL_VERSION, global_cvt_2_ener_float, op_module, tf, ) -from deepmd.fit.fitting import ( +from deepmd.tf.fit.fitting import ( Fitting, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.model.model import ( +from deepmd.tf.model.model import ( Model, ) -from deepmd.utils.pair_tab import ( +from deepmd.tf.utils.pair_tab import ( PairTab, ) +from deepmd.tf.utils.update_sel import ( + UpdateSel, +) +@Model.register("pairtab") class PairTabModel(Model): """Pairwise tabulation energy model. @@ -280,9 +284,5 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: dict The updated local data """ - from deepmd.entrypoints.train import ( - update_one_sel, - ) - local_jdata_cpy = local_jdata.copy() - return update_one_sel(global_jdata, local_jdata_cpy, True) + return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True) diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/tf/model/pairwise_dprc.py similarity index 96% rename from deepmd/model/pairwise_dprc.py rename to deepmd/tf/model/pairwise_dprc.py index f74571febb..92e943d486 100644 --- a/deepmd/model/pairwise_dprc.py +++ b/deepmd/tf/model/pairwise_dprc.py @@ -6,33 +6,37 @@ Union, ) -from deepmd.common import ( +from deepmd.tf.common import ( add_data_requirement, make_default_mesh, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, MODEL_VERSION, op_module, tf, ) -from deepmd.loss.loss import ( +from deepmd.tf.loss.loss import ( Loss, ) -from deepmd.model.model import ( +from deepmd.tf.model.model import ( Model, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( load_graph_def, ) -from deepmd.utils.spin import ( +from deepmd.tf.utils.spin import ( Spin, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) +from deepmd.tf.utils.update_sel import ( + UpdateSel, +) +@Model.register("pairwise_dprc") class PairwiseDPRc(Model): """Pairwise Deep Potential - Range Correction.""" @@ -73,11 +77,13 @@ def __init__( compress=compress, **kwargs, ) + self.ntypes = len(type_map) # type embedding if isinstance(type_embedding, TypeEmbedNet): self.typeebd = type_embedding else: self.typeebd = TypeEmbedNet( + ntypes=self.ntypes, **type_embedding, # must use se_atten, so it must be True padding=True, @@ -96,7 +102,6 @@ def __init__( compress=compress, ) add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False) - self.ntypes = len(type_map) self.rcut = max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut()) def build( @@ -377,7 +382,7 @@ def get_feed_dict( natoms[1]: total number of atoms held by this processor natoms[i]: 2 <= i < Ntypes+2, number of type i atoms box : tf.Tensor - The box. Can be generated by deepmd.model.make_stat_input + The box. Can be generated by deepmd.tf.model.make_stat_input mesh : tf.Tensor For historical reasons, only the length of the Tensor matters. if size of mesh == 6, pbc is assumed. @@ -412,13 +417,9 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): local_jdata : dict The local data refer to the current class """ - from deepmd.entrypoints.train import ( - get_min_nbor_dist, - ) - # do not update sel; only find min distance # rcut is not important here - get_min_nbor_dist(global_jdata, 6.0) + UpdateSel().get_min_nbor_dist(global_jdata, 6.0) return local_jdata diff --git a/deepmd/model/tensor.py b/deepmd/tf/model/tensor.py similarity index 98% rename from deepmd/model/tensor.py rename to deepmd/tf/model/tensor.py index 6a21e085f3..b232f40b13 100644 --- a/deepmd/model/tensor.py +++ b/deepmd/tf/model/tensor.py @@ -5,11 +5,11 @@ Union, ) -from deepmd.env import ( +from deepmd.tf.env import ( MODEL_VERSION, tf, ) -from deepmd.utils.type_embed import ( +from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) @@ -86,7 +86,7 @@ def data_stat(self, data): all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False) m_all_stat = merge_sys_stat(all_stat) self._compute_input_stat(m_all_stat, protection=self.data_stat_protect) - self._compute_output_stat(all_stat) + self._compute_output_stat(m_all_stat) def _compute_input_stat(self, all_stat, protection=1e-2): self.descrpt.compute_input_stats( diff --git a/deepmd/nvnmd/__init__.py b/deepmd/tf/nvnmd/__init__.py similarity index 100% rename from deepmd/nvnmd/__init__.py rename to deepmd/tf/nvnmd/__init__.py diff --git a/deepmd/nvnmd/data/__init__.py b/deepmd/tf/nvnmd/data/__init__.py similarity index 100% rename from deepmd/nvnmd/data/__init__.py rename to deepmd/tf/nvnmd/data/__init__.py diff --git a/deepmd/nvnmd/data/data.py b/deepmd/tf/nvnmd/data/data.py similarity index 100% rename from deepmd/nvnmd/data/data.py rename to deepmd/tf/nvnmd/data/data.py diff --git a/deepmd/nvnmd/descriptor/__init__.py b/deepmd/tf/nvnmd/descriptor/__init__.py similarity index 100% rename from deepmd/nvnmd/descriptor/__init__.py rename to deepmd/tf/nvnmd/descriptor/__init__.py diff --git a/deepmd/nvnmd/descriptor/se_a.py b/deepmd/tf/nvnmd/descriptor/se_a.py similarity index 98% rename from deepmd/nvnmd/descriptor/se_a.py rename to deepmd/tf/nvnmd/descriptor/se_a.py index 816f17cfa3..cc90df7a5c 100644 --- a/deepmd/nvnmd/descriptor/se_a.py +++ b/deepmd/tf/nvnmd/descriptor/se_a.py @@ -3,7 +3,7 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, op_module, @@ -11,16 +11,16 @@ ) # -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.weight import ( +from deepmd.tf.nvnmd.utils.weight import ( get_normalize, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, ) diff --git a/deepmd/nvnmd/descriptor/se_atten.py b/deepmd/tf/nvnmd/descriptor/se_atten.py similarity index 98% rename from deepmd/nvnmd/descriptor/se_atten.py rename to deepmd/tf/nvnmd/descriptor/se_atten.py index cfffb8a90b..474f6995cf 100644 --- a/deepmd/nvnmd/descriptor/se_atten.py +++ b/deepmd/tf/nvnmd/descriptor/se_atten.py @@ -3,20 +3,20 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, op_module, tf, ) # -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.weight import ( +from deepmd.tf.nvnmd.utils.weight import ( get_normalize, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) diff --git a/deepmd/nvnmd/entrypoints/__init__.py b/deepmd/tf/nvnmd/entrypoints/__init__.py similarity index 100% rename from deepmd/nvnmd/entrypoints/__init__.py rename to deepmd/tf/nvnmd/entrypoints/__init__.py diff --git a/deepmd/nvnmd/entrypoints/freeze.py b/deepmd/tf/nvnmd/entrypoints/freeze.py similarity index 96% rename from deepmd/nvnmd/entrypoints/freeze.py rename to deepmd/tf/nvnmd/entrypoints/freeze.py index e56a0c2130..2a2b8d9179 100644 --- a/deepmd/nvnmd/entrypoints/freeze.py +++ b/deepmd/tf/nvnmd/entrypoints/freeze.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.nvnmd.utils.fio import ( +from deepmd.tf.nvnmd.utils.fio import ( FioDic, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, ) diff --git a/deepmd/nvnmd/entrypoints/mapt.py b/deepmd/tf/nvnmd/entrypoints/mapt.py similarity index 98% rename from deepmd/nvnmd/entrypoints/mapt.py rename to deepmd/tf/nvnmd/entrypoints/mapt.py index 1299d7a74e..7401234e35 100644 --- a/deepmd/nvnmd/entrypoints/mapt.py +++ b/deepmd/tf/nvnmd/entrypoints/mapt.py @@ -6,30 +6,30 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) -from deepmd.nvnmd.data.data import ( +from deepmd.tf.nvnmd.data.data import ( jdata_deepmd_input_v0, jdata_sys, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.fio import ( +from deepmd.tf.nvnmd.utils.fio import ( FioDic, ) -from deepmd.nvnmd.utils.network import ( +from deepmd.tf.nvnmd.utils.network import ( get_sess, ) -from deepmd.nvnmd.utils.weight import ( +from deepmd.tf.nvnmd.utils.weight import ( get_filter_type_weight, get_filter_weight, get_normalize, get_type_embedding_weight, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/nvnmd/entrypoints/train.py b/deepmd/tf/nvnmd/entrypoints/train.py similarity index 94% rename from deepmd/nvnmd/entrypoints/train.py rename to deepmd/tf/nvnmd/entrypoints/train.py index 6e14b6f865..18c644a7f6 100644 --- a/deepmd/nvnmd/entrypoints/train.py +++ b/deepmd/tf/nvnmd/entrypoints/train.py @@ -5,28 +5,28 @@ Optional, ) -from deepmd.entrypoints.freeze import ( +from deepmd.tf.entrypoints.freeze import ( freeze, ) -from deepmd.entrypoints.train import ( +from deepmd.tf.entrypoints.train import ( train, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.nvnmd.data.data import ( +from deepmd.tf.nvnmd.data.data import ( jdata_deepmd_input_v0, ) -from deepmd.nvnmd.entrypoints.mapt import ( +from deepmd.tf.nvnmd.entrypoints.mapt import ( mapt, ) -from deepmd.nvnmd.entrypoints.wrap import ( +from deepmd.tf.nvnmd.entrypoints.wrap import ( wrap, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.fio import ( +from deepmd.tf.nvnmd.utils.fio import ( FioDic, ) diff --git a/deepmd/nvnmd/entrypoints/wrap.py b/deepmd/tf/nvnmd/entrypoints/wrap.py similarity index 98% rename from deepmd/nvnmd/entrypoints/wrap.py rename to deepmd/tf/nvnmd/entrypoints/wrap.py index 1ba2ed7384..f2be8352e2 100644 --- a/deepmd/nvnmd/entrypoints/wrap.py +++ b/deepmd/tf/nvnmd/entrypoints/wrap.py @@ -6,32 +6,32 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) -from deepmd.nvnmd.data.data import ( +from deepmd.tf.nvnmd.data.data import ( jdata_deepmd_input_v0, jdata_sys, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.encode import ( +from deepmd.tf.nvnmd.utils.encode import ( Encode, ) -from deepmd.nvnmd.utils.fio import ( +from deepmd.tf.nvnmd.utils.fio import ( FioBin, FioTxt, ) -from deepmd.nvnmd.utils.network import ( +from deepmd.tf.nvnmd.utils.network import ( get_sess, ) -from deepmd.nvnmd.utils.weight import ( +from deepmd.tf.nvnmd.utils.weight import ( get_fitnet_weight, get_type_weight, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) diff --git a/deepmd/nvnmd/fit/__init__.py b/deepmd/tf/nvnmd/fit/__init__.py similarity index 100% rename from deepmd/nvnmd/fit/__init__.py rename to deepmd/tf/nvnmd/fit/__init__.py diff --git a/deepmd/nvnmd/fit/ener.py b/deepmd/tf/nvnmd/fit/ener.py similarity index 58% rename from deepmd/nvnmd/fit/ener.py rename to deepmd/tf/nvnmd/fit/ener.py index 1f316a2145..20adda395c 100644 --- a/deepmd/nvnmd/fit/ener.py +++ b/deepmd/tf/nvnmd/fit/ener.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.network import one_layer as one_layer_nvnmd +from deepmd.tf.nvnmd.utils.network import one_layer as one_layer_nvnmd __all__ = [ "GLOBAL_TF_FLOAT_PRECISION", diff --git a/deepmd/nvnmd/utils/__init__.py b/deepmd/tf/nvnmd/utils/__init__.py similarity index 100% rename from deepmd/nvnmd/utils/__init__.py rename to deepmd/tf/nvnmd/utils/__init__.py diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/tf/nvnmd/utils/argcheck.py similarity index 73% rename from deepmd/nvnmd/utils/argcheck.py rename to deepmd/tf/nvnmd/utils/argcheck.py index 2b9362efb0..1f10a1c03e 100644 --- a/deepmd/nvnmd/utils/argcheck.py +++ b/deepmd/tf/nvnmd/utils/argcheck.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Alias for backward compatibility.""" -from deepmd_utils.utils.argcheck_nvnmd import ( + +from deepmd.utils.argcheck_nvnmd import ( nvnmd_args, ) diff --git a/deepmd/nvnmd/utils/config.py b/deepmd/tf/nvnmd/utils/config.py similarity index 99% rename from deepmd/nvnmd/utils/config.py rename to deepmd/tf/nvnmd/utils/config.py index 5bfd9ea54f..15998069b3 100644 --- a/deepmd/nvnmd/utils/config.py +++ b/deepmd/tf/nvnmd/utils/config.py @@ -3,7 +3,7 @@ import numpy as np -from deepmd.nvnmd.data.data import ( +from deepmd.tf.nvnmd.data.data import ( NVNMD_CITATION, NVNMD_WELCOME, jdata_config_v0, @@ -17,10 +17,10 @@ jdata_deepmd_input_v1_ni128, jdata_deepmd_input_v1_ni256, ) -from deepmd.nvnmd.utils.fio import ( +from deepmd.tf.nvnmd.utils.fio import ( FioDic, ) -from deepmd.nvnmd.utils.op import ( +from deepmd.tf.nvnmd.utils.op import ( r2s, ) diff --git a/deepmd/nvnmd/utils/encode.py b/deepmd/tf/nvnmd/utils/encode.py similarity index 99% rename from deepmd/nvnmd/utils/encode.py rename to deepmd/tf/nvnmd/utils/encode.py index 55f4efd52e..21398fbf23 100644 --- a/deepmd/nvnmd/utils/encode.py +++ b/deepmd/tf/nvnmd/utils/encode.py @@ -3,7 +3,7 @@ import numpy as np -from deepmd.nvnmd.data.data import ( +from deepmd.tf.nvnmd.data.data import ( jdata_sys, ) diff --git a/deepmd/nvnmd/utils/fio.py b/deepmd/tf/nvnmd/utils/fio.py similarity index 100% rename from deepmd/nvnmd/utils/fio.py rename to deepmd/tf/nvnmd/utils/fio.py diff --git a/deepmd/nvnmd/utils/network.py b/deepmd/tf/nvnmd/utils/network.py similarity index 98% rename from deepmd/nvnmd/utils/network.py rename to deepmd/tf/nvnmd/utils/network.py index f0c357eabe..76c80ed4e7 100644 --- a/deepmd/nvnmd/utils/network.py +++ b/deepmd/tf/nvnmd/utils/network.py @@ -3,18 +3,18 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, op_module, tf, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.nvnmd.utils.weight import ( +from deepmd.tf.nvnmd.utils.weight import ( get_constant_initializer, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( variable_summaries, ) diff --git a/deepmd/nvnmd/utils/op.py b/deepmd/tf/nvnmd/utils/op.py similarity index 100% rename from deepmd/nvnmd/utils/op.py rename to deepmd/tf/nvnmd/utils/op.py diff --git a/deepmd/nvnmd/utils/weight.py b/deepmd/tf/nvnmd/utils/weight.py similarity index 98% rename from deepmd/nvnmd/utils/weight.py rename to deepmd/tf/nvnmd/utils/weight.py index cc5ab15219..7a60712455 100644 --- a/deepmd/nvnmd/utils/weight.py +++ b/deepmd/tf/nvnmd/utils/weight.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) diff --git a/deepmd/op/__init__.py b/deepmd/tf/op/__init__.py similarity index 96% rename from deepmd/op/__init__.py rename to deepmd/tf/op/__init__.py index 9cdfec70cc..421ef0b123 100644 --- a/deepmd/op/__init__.py +++ b/deepmd/tf/op/__init__.py @@ -8,7 +8,7 @@ ) NOT_LOADABLE = ("__init__.py",) -PACKAGE_BASE = "deepmd.op" +PACKAGE_BASE = "deepmd.tf.op" log = logging.getLogger(__name__) diff --git a/deepmd/op/_add_flt_nvnmd_grad.py b/deepmd/tf/op/_add_flt_nvnmd_grad.py similarity index 90% rename from deepmd/op/_add_flt_nvnmd_grad.py rename to deepmd/tf/op/_add_flt_nvnmd_grad.py index 105ec1ec6d..3bea39fcec 100644 --- a/deepmd/op/_add_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_add_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) diff --git a/deepmd/op/_copy_flt_nvnmd_grad.py b/deepmd/tf/op/_copy_flt_nvnmd_grad.py similarity index 91% rename from deepmd/op/_copy_flt_nvnmd_grad.py rename to deepmd/tf/op/_copy_flt_nvnmd_grad.py index 09c4a72324..401acba22c 100644 --- a/deepmd/op/_copy_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_copy_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) diff --git a/deepmd/op/_dotmul_flt_nvnmd_grad.py b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py similarity index 95% rename from deepmd/op/_dotmul_flt_nvnmd_grad.py rename to deepmd/tf/op/_dotmul_flt_nvnmd_grad.py index 0f786a6d38..8a4ffb2d0c 100644 --- a/deepmd/op/_dotmul_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_flt_nvnmd_grad.py b/deepmd/tf/op/_flt_nvnmd_grad.py similarity index 90% rename from deepmd/op/_flt_nvnmd_grad.py rename to deepmd/tf/op/_flt_nvnmd_grad.py index 0dd67c2c57..b0fbaea11d 100644 --- a/deepmd/op/_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) diff --git a/deepmd/op/_gelu.py b/deepmd/tf/op/_gelu.py similarity index 97% rename from deepmd/op/_gelu.py rename to deepmd/tf/op/_gelu.py index 6768ac10b3..04ae124f70 100644 --- a/deepmd/op/_gelu.py +++ b/deepmd/tf/op/_gelu.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: LGPL-3.0-or-later """First-order derivatives and second-order derivatives for gelu function.""" + import tensorflow from tensorflow.python.framework import ( ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) diff --git a/deepmd/op/_map_flt_nvnmd_grad.py b/deepmd/tf/op/_map_flt_nvnmd_grad.py similarity index 97% rename from deepmd/op/_map_flt_nvnmd_grad.py rename to deepmd/tf/op/_map_flt_nvnmd_grad.py index 3e5749e74c..46f258cafe 100644 --- a/deepmd/op/_map_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_map_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_matmul_fitnet_nvnmd_grad.py b/deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py similarity index 94% rename from deepmd/op/_matmul_fitnet_nvnmd_grad.py rename to deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py index bab3905c5a..f8d566bd39 100644 --- a/deepmd/op/_matmul_fitnet_nvnmd_grad.py +++ b/deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_matmul_flt2fix_nvnmd.py b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py similarity index 97% rename from deepmd/op/_matmul_flt2fix_nvnmd.py rename to deepmd/tf/op/_matmul_flt2fix_nvnmd.py index db9af761de..319fb90ec8 100644 --- a/deepmd/op/_matmul_flt2fix_nvnmd.py +++ b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_matmul_flt_nvnmd_grad.py b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py similarity index 97% rename from deepmd/op/_matmul_flt_nvnmd_grad.py rename to deepmd/tf/op/_matmul_flt_nvnmd_grad.py index 1e3ed74c91..6493794b00 100644 --- a/deepmd/op/_matmul_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_mul_flt_nvnmd_grad.py b/deepmd/tf/op/_mul_flt_nvnmd_grad.py similarity index 96% rename from deepmd/op/_mul_flt_nvnmd_grad.py rename to deepmd/tf/op/_mul_flt_nvnmd_grad.py index c50baf8c12..d05daa7dfa 100644 --- a/deepmd/op/_mul_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_mul_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) diff --git a/deepmd/op/_prod_force_grad.py b/deepmd/tf/op/_prod_force_grad.py similarity index 95% rename from deepmd/op/_prod_force_grad.py rename to deepmd/tf/op/_prod_force_grad.py index ffa34a8126..449901c137 100644 --- a/deepmd/op/_prod_force_grad.py +++ b/deepmd/tf/op/_prod_force_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_force_se_a_grad.py b/deepmd/tf/op/_prod_force_se_a_grad.py similarity index 95% rename from deepmd/op/_prod_force_se_a_grad.py rename to deepmd/tf/op/_prod_force_se_a_grad.py index b58b819ee1..d732803bad 100644 --- a/deepmd/op/_prod_force_se_a_grad.py +++ b/deepmd/tf/op/_prod_force_se_a_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_force_se_a_mask_grad.py b/deepmd/tf/op/_prod_force_se_a_mask_grad.py similarity index 95% rename from deepmd/op/_prod_force_se_a_mask_grad.py rename to deepmd/tf/op/_prod_force_se_a_mask_grad.py index d5ef829da2..a7f2d72b16 100644 --- a/deepmd/op/_prod_force_se_a_mask_grad.py +++ b/deepmd/tf/op/_prod_force_se_a_mask_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_force_se_r_grad.py b/deepmd/tf/op/_prod_force_se_r_grad.py similarity index 93% rename from deepmd/op/_prod_force_se_r_grad.py rename to deepmd/tf/op/_prod_force_se_r_grad.py index 254e2e331a..4ec65b31f2 100644 --- a/deepmd/op/_prod_force_se_r_grad.py +++ b/deepmd/tf/op/_prod_force_se_r_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_virial_grad.py b/deepmd/tf/op/_prod_virial_grad.py similarity index 95% rename from deepmd/op/_prod_virial_grad.py rename to deepmd/tf/op/_prod_virial_grad.py index 4a946f3ba8..7fe245ed6b 100644 --- a/deepmd/op/_prod_virial_grad.py +++ b/deepmd/tf/op/_prod_virial_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_virial_se_a_grad.py b/deepmd/tf/op/_prod_virial_se_a_grad.py similarity index 95% rename from deepmd/op/_prod_virial_se_a_grad.py rename to deepmd/tf/op/_prod_virial_se_a_grad.py index 0e738f86b3..c95d3b58e2 100644 --- a/deepmd/op/_prod_virial_se_a_grad.py +++ b/deepmd/tf/op/_prod_virial_se_a_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_prod_virial_se_r_grad.py b/deepmd/tf/op/_prod_virial_se_r_grad.py similarity index 94% rename from deepmd/op/_prod_virial_se_r_grad.py rename to deepmd/tf/op/_prod_virial_se_r_grad.py index a943b35670..8f51310c8c 100644 --- a/deepmd/op/_prod_virial_se_r_grad.py +++ b/deepmd/tf/op/_prod_virial_se_r_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_quantize_nvnmd_grad.py b/deepmd/tf/op/_quantize_nvnmd_grad.py similarity index 93% rename from deepmd/op/_quantize_nvnmd_grad.py rename to deepmd/tf/op/_quantize_nvnmd_grad.py index 2ef282fa78..f1d99dc18d 100644 --- a/deepmd/op/_quantize_nvnmd_grad.py +++ b/deepmd/tf/op/_quantize_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) diff --git a/deepmd/op/_soft_min_force_grad.py b/deepmd/tf/op/_soft_min_force_grad.py similarity index 95% rename from deepmd/op/_soft_min_force_grad.py rename to deepmd/tf/op/_soft_min_force_grad.py index ae9cf882c8..cd18f3e186 100644 --- a/deepmd/op/_soft_min_force_grad.py +++ b/deepmd/tf/op/_soft_min_force_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_soft_min_virial_grad.py b/deepmd/tf/op/_soft_min_virial_grad.py similarity index 95% rename from deepmd/op/_soft_min_virial_grad.py rename to deepmd/tf/op/_soft_min_virial_grad.py index 56b828b12c..4d4f4790dd 100644 --- a/deepmd/op/_soft_min_virial_grad.py +++ b/deepmd/tf/op/_soft_min_virial_grad.py @@ -6,7 +6,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_grads_module, ) diff --git a/deepmd/op/_tabulate_grad.py b/deepmd/tf/op/_tabulate_grad.py similarity index 97% rename from deepmd/op/_tabulate_grad.py rename to deepmd/tf/op/_tabulate_grad.py index 8ad8908d7e..667981ef9f 100644 --- a/deepmd/op/_tabulate_grad.py +++ b/deepmd/tf/op/_tabulate_grad.py @@ -6,11 +6,11 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, ) -# from deepmd.DescrptSeATabulate import last_layer_size +# from deepmd.tf.DescrptSeATabulate import last_layer_size @ops.RegisterGradient("TabulateFusion") diff --git a/deepmd/op/_tanh4_flt_nvnmd_grad.py b/deepmd/tf/op/_tanh4_flt_nvnmd_grad.py similarity index 97% rename from deepmd/op/_tanh4_flt_nvnmd_grad.py rename to deepmd/tf/op/_tanh4_flt_nvnmd_grad.py index 45d7366545..04d1724d0b 100644 --- a/deepmd/op/_tanh4_flt_nvnmd_grad.py +++ b/deepmd/tf/op/_tanh4_flt_nvnmd_grad.py @@ -5,7 +5,7 @@ ops, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) diff --git a/deepmd/tf/train/__init__.py b/deepmd/tf/train/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/deepmd/tf/train/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/deepmd/train/run_options.py b/deepmd/tf/train/run_options.py similarity index 69% rename from deepmd/train/run_options.py rename to deepmd/tf/train/run_options.py index 451632949e..b835d63852 100644 --- a/deepmd/train/run_options.py +++ b/deepmd/tf/train/run_options.py @@ -16,63 +16,63 @@ Version, ) -from deepmd.cluster import ( +from deepmd.tf.cluster import ( get_resource, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_CONFIG, TF_VERSION, - get_tf_default_nthreads, - global_float_prec, tf, ) -from deepmd.loggers import ( +from deepmd.tf.loggers import ( set_log_handles, ) +from deepmd.utils.summary import SummaryPrinter as BaseSummaryPrinter if TYPE_CHECKING: import horovod.tensorflow as HVD __all__ = [ - "WELCOME", - "CITATION", - "BUILD", "RunOptions", ] log = logging.getLogger(__name__) -# http://patorjk.com/software/taag. Font:Big" -WELCOME = ( - r" _____ _____ __ __ _____ _ _ _ ", - r"| __ \ | __ \ | \/ || __ \ | | (_)| | ", - r"| | | | ___ ___ | |__) || \ / || | | | ______ | | __ _ | |_ ", - r"| | | | / _ \ / _ \| ___/ | |\/| || | | ||______|| |/ /| || __|", - r"| |__| || __/| __/| | | | | || |__| | | < | || |_ ", - r"|_____/ \___| \___||_| |_| |_||_____/ |_|\_\|_| \__|", -) +class SummaryPrinter(BaseSummaryPrinter): + """Summary printer for TensorFlow.""" -CITATION = ( - "Please read and cite:", - "Wang, Zhang, Han and E, Comput.Phys.Comm. 228, 178-184 (2018)", - "Zeng et al, J. Chem. Phys., 159, 054801 (2023)", - "See https://deepmd.rtfd.io/credits/ for details.", -) + def __init__(self, compute_device: str, ngpus: int) -> None: + super().__init__() + self.compute_device = compute_device + self.ngpus = ngpus -_sep = "\n " -BUILD = ( - f"installed to: {GLOBAL_CONFIG['install_prefix']}", - f"source : {GLOBAL_CONFIG['git_summ']}", - f"source brach: {GLOBAL_CONFIG['git_branch']}", - f"source commit: {GLOBAL_CONFIG['git_hash']}", - f"source commit at: {GLOBAL_CONFIG['git_date']}", - f"build float prec: {global_float_prec}", - f"build variant: {GLOBAL_CONFIG['dp_variant']}", - f"build with tf inc: {GLOBAL_CONFIG['tf_include_dir']}", - f"build with tf lib: {GLOBAL_CONFIG['tf_libs'].replace(';', _sep)}", -) + def is_built_with_cuda(self) -> bool: + """Check if the backend is built with CUDA.""" + return tf.test.is_built_with_cuda() + + def is_built_with_rocm(self) -> bool: + """Check if the backend is built with ROCm.""" + return tf.test.is_built_with_rocm() + + def get_compute_device(self) -> str: + """Get Compute device.""" + return self.compute_device + + def get_ngpus(self) -> int: + """Get the number of GPUs.""" + return self.ngpus + + def get_backend_info(self) -> dict: + """Get backend information.""" + return { + "Backend": "TensorFlow", + "TF ver": tf.version.GIT_VERSION, + "build with TF ver": TF_VERSION, + "build with TF inc": GLOBAL_CONFIG["tf_include_dir"].replace(";", "\n"), + "build with TF lib": GLOBAL_CONFIG["tf_libs"].replace(";", "\n"), + } class RunOptions: @@ -148,25 +148,7 @@ def is_chief(self): def print_resource_summary(self): """Print build and current running cluster configuration summary.""" - log.info("---Summary of the training---------------------------------------") - if self.is_distrib: - log.info("distributed") - log.info(f"world size: {self.world_size}") - log.info(f"my rank: {self.my_rank}") - log.info(f"node list: {self.nodelist}") - log.info(f"running on: {self.nodename}") - log.info(f"computing device: {self.my_device}") - if tf.test.is_built_with_cuda(): - env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "unset") - log.info(f"CUDA_VISIBLE_DEVICES: {env_value}") - if hasattr(tf.test, "is_built_with_rocm") and tf.test.is_built_with_rocm(): - env_value = os.environ.get("HIP_VISIBLE_DEVICES", "unset") - log.info(f"HIP_VISIBLE_DEVICES: {env_value}") - log.info(f"Count of visible GPU: {len(self.gpus or [])}") - intra, inter = get_tf_default_nthreads() - log.info(f"num_intra_threads: {intra:d}") - log.info(f"num_inter_threads: {inter:d}") - log.info("-----------------------------------------------------------------") + SummaryPrinter(self.my_device, len(self.gpus or []))() def _setup_logger( self, diff --git a/deepmd/train/trainer.py b/deepmd/tf/train/trainer.py similarity index 93% rename from deepmd/train/trainer.py rename to deepmd/tf/train/trainer.py index 3b81740a93..931cf87246 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/tf/train/trainer.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: LGPL-3.0-or-later -import glob import logging import os -import platform import shutil import time from typing import ( @@ -21,13 +19,20 @@ ) # load grad of force module -import deepmd.op # noqa: F401 +import deepmd.tf.op # noqa: F401 from deepmd.common import ( + symlink_prefix_files, +) +from deepmd.loggers.training import ( + format_training_message, + format_training_message_per_task, +) +from deepmd.tf.common import ( data_requirement, get_precision, j_must_have, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_ENER_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, TF_VERSION, @@ -35,38 +40,38 @@ tf, tfv2, ) -from deepmd.fit.ener import ( +from deepmd.tf.fit.ener import ( EnerFitting, ) -from deepmd.model import ( +from deepmd.tf.model import ( MultiModel, ) -from deepmd.model.model import ( +from deepmd.tf.model.model import ( Model, ) -from deepmd.utils import random as dp_random -from deepmd.utils.data_system import ( +from deepmd.tf.utils import random as dp_random +from deepmd.tf.utils.data_system import ( DeepmdDataSystem, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphTooLargeError, GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name_from_graph, load_graph_def, ) -from deepmd.utils.learning_rate import ( +from deepmd.tf.utils.learning_rate import ( LearningRateExp, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) log = logging.getLogger(__name__) # nvnmd -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) @@ -159,6 +164,7 @@ def get_lr_and_coef(lr_param): self.disp_freq = tr_data.get("disp_freq", 1000) self.save_freq = tr_data.get("save_freq", 1000) self.save_ckpt = tr_data.get("save_ckpt", "model.ckpt") + self.max_ckpt_keep = tr_data.get("max_ckpt_keep", 5) self.display_in_training = tr_data.get("disp_training", True) self.timing_in_training = tr_data.get("time_training", True) self.profiling = self.run_opt.is_chief and tr_data.get("profiling", False) @@ -230,9 +236,7 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""): if data[fitting_key].mixed_type: assert isinstance( self.fitting[fitting_key], EnerFitting - ), "Data for fitting net {} in mixed_type format must use ener fitting!".format( - fitting_key - ) + ), f"Data for fitting net {fitting_key} in mixed_type format must use ener fitting!" if self.numb_fparam_dict[fitting_key] > 0: log.info( "fitting net %s training with %d frame parameter(s)" @@ -292,8 +296,6 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""): ) # neighbor_stat is moved to train.py as duplicated - # TODO: this is a simple fix but we should have a clear - # architecture to call neighbor stat else: self.model.enable_compression() @@ -493,7 +495,9 @@ def _init_session(self): # Initializes or restore global variables init_op = tf.global_variables_initializer() if self.run_opt.is_chief: - self.saver = tf.train.Saver(save_relative_paths=True) + self.saver = tf.train.Saver( + save_relative_paths=True, max_to_keep=self.max_ckpt_keep + ) if self.run_opt.init_mode == "init_from_scratch": log.info("initialize model from scratch") run_sess(self.sess, init_op) @@ -773,8 +777,10 @@ def train(self, train_data=None, valid_data=None): test_time = toc - tic wall_time = toc - wall_time_tic log.info( - "batch %7d training time %.2f s, testing time %.2f s, total wall time %.2f s" - % (cur_batch, train_time, test_time, wall_time) + format_training_message( + batch=cur_batch, + wall_time=wall_time, + ) ) # the first training time is not accurate if cur_batch > self.disp_freq or stop_batch < 2 * self.disp_freq: @@ -830,19 +836,7 @@ def save_checkpoint(self, cur_batch: int): ) from e # make symlinks from prefix with step to that without step to break nothing # get all checkpoint files - original_files = glob.glob(ckpt_prefix + ".*") - for ori_ff in original_files: - new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :] - try: - # remove old one - os.remove(new_ff) - except OSError: - pass - if platform.system() != "Windows": - # by default one does not have access to create symlink on Windows - os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff) - else: - shutil.copyfile(ori_ff, new_ff) + symlink_prefix_files(ckpt_prefix, self.save_ckpt) log.info("saved checkpoint %s" % self.save_ckpt) def get_feed_dict(self, batch, is_training): @@ -970,6 +964,23 @@ def print_on_training( for k in train_results.keys(): print_str += prop_fmt % (train_results[k]) print_str += " %8.1e\n" % cur_lr + log.info( + format_training_message_per_task( + batch=cur_batch, + task_name="trn", + rmse=train_results, + learning_rate=cur_lr, + ) + ) + if valid_results is not None: + log.info( + format_training_message_per_task( + batch=cur_batch, + task_name="val", + rmse=valid_results, + learning_rate=None, + ) + ) else: for fitting_key in train_results: if valid_results[fitting_key] is not None: @@ -985,6 +996,23 @@ def print_on_training( for k in train_results[fitting_key].keys(): print_str += prop_fmt % (train_results[fitting_key][k]) print_str += " %8.1e\n" % cur_lr_dict[fitting_key] + log.info( + format_training_message_per_task( + batch=cur_batch, + task_name=f"{fitting_key}_trn", + rmse=train_results[fitting_key], + learning_rate=cur_lr_dict[fitting_key], + ) + ) + if valid_results is not None: + log.info( + format_training_message_per_task( + batch=cur_batch, + task_name=f"{fitting_key}_val", + rmse=valid_results[fitting_key], + learning_rate=None, + ) + ) fp.write(print_str) fp.flush() @@ -1054,10 +1082,7 @@ def _init_from_frz_model(self): except FileNotFoundError as e: # throw runtime error if there's no frozen model raise RuntimeError( - "The input frozen model {} ({}) does not exist! Please check the path of the frozen model. ".format( - self.run_opt.init_frz_model, - os.path.abspath(self.run_opt.init_frz_model), - ) + f"The input frozen model {self.run_opt.init_frz_model} ({os.path.abspath(self.run_opt.init_frz_model)}) does not exist! Please check the path of the frozen model. " ) from e # get the model type from the frozen model(self.run_opt.init_frz_model) try: @@ -1089,7 +1114,7 @@ def _init_from_ckpt(self, ckpt_meta: str): self.ckpt_meta = ckpt_meta def _init_from_pretrained_model( - self, data, origin_type_map=None, bias_shift="delta" + self, data, origin_type_map=None, bias_adjust_mode="change-by-statistic" ): """Init the embedding net variables with the given frozen model. @@ -1099,21 +1124,19 @@ def _init_from_pretrained_model( The training data. origin_type_map : list The original type_map in dataset, they are targets to change the energy bias. - bias_shift : str - The mode for changing energy bias : ['delta', 'statistic'] - 'delta' : perform predictions on energies of target dataset, + bias_adjust_mode : str + The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on energies of target dataset, and do least sqaure on the errors to obtain the target shift as bias. - 'statistic' : directly use the statistic energy bias in the target dataset. + 'set-by-statistic' : directly use the statistic energy bias in the target dataset. """ try: graph, graph_def = load_graph_def(self.run_opt.finetune) except FileNotFoundError as e: # throw runtime error if there's no frozen model raise RuntimeError( - "The input frozen pretrained model {} ({}) does not exist! " - "Please check the path of the frozen pretrained model. ".format( - self.run_opt.finetune, os.path.abspath(self.run_opt.finetune) - ) + f"The input frozen pretrained model {self.run_opt.finetune} ({os.path.abspath(self.run_opt.finetune)}) does not exist! " + "Please check the path of the frozen pretrained model. " ) from e # get the model type from the frozen model(self.run_opt.finetune) try: @@ -1132,15 +1155,19 @@ def _init_from_pretrained_model( ), "Compressed models are not supported for finetuning!" self.model.init_variables(graph, graph_def, model_type=self.model_type) log.info( - "Changing energy bias in pretrained model for types {}... " - "(this step may take long time)".format(str(origin_type_map)) + f"Changing energy bias in pretrained model for types {origin_type_map!s}... " + "(this step may take long time)" ) self._change_energy_bias( - data, self.run_opt.finetune, origin_type_map, bias_shift + data, self.run_opt.finetune, origin_type_map, bias_adjust_mode ) def _change_energy_bias( - self, data, frozen_model, origin_type_map, bias_shift="delta" + self, + data, + frozen_model, + origin_type_map, + bias_adjust_mode="change-by-statistic", ): full_type_map = data.get_type_map() self.model.change_energy_bias( @@ -1148,7 +1175,7 @@ def _change_energy_bias( frozen_model, origin_type_map, full_type_map, - bias_shift=bias_shift, + bias_adjust_mode=bias_adjust_mode, ) diff --git a/deepmd/tf/utils/__init__.py b/deepmd/tf/utils/__init__.py new file mode 100644 index 0000000000..7d1e7e67d0 --- /dev/null +++ b/deepmd/tf/utils/__init__.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# +from .data import ( + DeepmdData, +) +from .data_system import ( + DeepmdDataSystem, +) +from .learning_rate import ( + LearningRateExp, +) +from .pair_tab import ( + PairTab, +) +from .plugin import ( + Plugin, + PluginVariant, +) + +__all__ = [ + "DeepmdData", + "DeepmdDataSystem", + "LearningRateExp", + "PairTab", + "Plugin", + "PluginVariant", +] diff --git a/deepmd/tf/utils/argcheck.py b/deepmd/tf/utils/argcheck.py new file mode 100644 index 0000000000..caec33c319 --- /dev/null +++ b/deepmd/tf/utils/argcheck.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.argcheck import ( + gen_args, + gen_doc, + gen_json, + list_to_doc, + normalize, + type_embedding_args, +) + +__all__ = [ + "list_to_doc", + "normalize", + "gen_doc", + "gen_json", + "gen_args", + "type_embedding_args", +] diff --git a/deepmd/tf/utils/batch_size.py b/deepmd/tf/utils/batch_size.py new file mode 100644 index 0000000000..33f1ec0da0 --- /dev/null +++ b/deepmd/tf/utils/batch_size.py @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from packaging.version import ( + Version, +) + +from deepmd.tf.env import ( + TF_VERSION, + tf, +) +from deepmd.tf.utils.errors import ( + OutOfMemoryError, +) +from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase + + +class AutoBatchSize(AutoBatchSizeBase): + def is_gpu_available(self) -> bool: + """Check if GPU is available. + + Returns + ------- + bool + True if GPU is available + """ + return ( + Version(TF_VERSION) >= Version("1.14") + and tf.config.experimental.get_visible_devices("GPU") + ) or tf.test.is_gpu_available() + + def is_oom_error(self, e: Exception) -> bool: + """Check if the exception is an OOM error. + + Parameters + ---------- + e : Exception + Exception + """ + return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError)) diff --git a/deepmd/tf/utils/compat.py b/deepmd/tf/utils/compat.py new file mode 100644 index 0000000000..e80a366b83 --- /dev/null +++ b/deepmd/tf/utils/compat.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.compat import ( + convert_input_v0_v1, + convert_input_v1_v2, + deprecate_numb_test, + update_deepmd_input, +) + +__all__ = [ + "convert_input_v0_v1", + "convert_input_v1_v2", + "deprecate_numb_test", + "update_deepmd_input", +] diff --git a/deepmd/utils/compress.py b/deepmd/tf/utils/compress.py similarity index 98% rename from deepmd/utils/compress.py rename to deepmd/tf/utils/compress.py index 7a79dec520..0bce633573 100644 --- a/deepmd/utils/compress.py +++ b/deepmd/tf/utils/compress.py @@ -3,10 +3,10 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, ) diff --git a/deepmd/utils/convert.py b/deepmd/tf/utils/convert.py similarity index 99% rename from deepmd/utils/convert.py rename to deepmd/tf/utils/convert.py index 13e07f0885..625f54a9a0 100644 --- a/deepmd/utils/convert.py +++ b/deepmd/tf/utils/convert.py @@ -14,10 +14,10 @@ ) from packaging.version import parse as parse_version -from deepmd import ( +from deepmd.tf import ( __version__, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) diff --git a/deepmd/tf/utils/data.py b/deepmd/tf/utils/data.py new file mode 100644 index 0000000000..54130c18f4 --- /dev/null +++ b/deepmd/tf/utils/data.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.data import ( + DeepmdData, +) + +__all__ = [ + "DeepmdData", +] diff --git a/deepmd/tf/utils/data_system.py b/deepmd/tf/utils/data_system.py new file mode 100644 index 0000000000..da0cce28e8 --- /dev/null +++ b/deepmd/tf/utils/data_system.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.data_system import ( + DeepmdDataSystem, + prob_sys_size_ext, + process_sys_probs, +) + +__all__ = [ + "DeepmdDataSystem", + "process_sys_probs", + "prob_sys_size_ext", +] diff --git a/deepmd/tf/utils/errors.py b/deepmd/tf/utils/errors.py new file mode 100644 index 0000000000..5f7291c7ce --- /dev/null +++ b/deepmd/tf/utils/errors.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.utils.errors import ( + OutOfMemoryError, +) + + +class GraphTooLargeError(Exception): + """The graph is too large, exceeding protobuf's hard limit of 2GB.""" + + +class GraphWithoutTensorError(Exception): + pass + + +__all__ = [ + "OutOfMemoryError", + "GraphTooLargeError", + "GraphWithoutTensorError", +] diff --git a/deepmd/tf/utils/finetune.py b/deepmd/tf/utils/finetune.py new file mode 100644 index 0000000000..3d11130ba7 --- /dev/null +++ b/deepmd/tf/utils/finetune.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import logging +from typing import ( + Any, + Dict, +) + +from deepmd.tf.utils.errors import ( + GraphWithoutTensorError, +) +from deepmd.tf.utils.graph import ( + get_tensor_by_name, +) + +log = logging.getLogger(__name__) + + +def replace_model_params_with_pretrained_model( + jdata: Dict[str, Any], pretrained_model: str +): + """Replace the model params in input script according to pretrained model. + + Parameters + ---------- + jdata : Dict[str, Any] + input script + pretrained_model : str + filename of the pretrained model + """ + # Get the input script from the pretrained model + try: + t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script") + except GraphWithoutTensorError as e: + raise RuntimeError( + "The input frozen pretrained model: %s has no training script, " + "which is not supported to perform finetuning. " + "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit." + % input + ) from e + pretrained_jdata = json.loads(t_jdata) + + # Check the model type + assert ( + pretrained_jdata["model"]["descriptor"]["type"] + in [ + "se_atten", + "se_atten_v2", + ] + and pretrained_jdata["model"]["fitting_net"]["type"] in ["ener"] + ), "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!" + + # Check the type map + pretrained_type_map = pretrained_jdata["model"]["type_map"] + cur_type_map = jdata["model"].get("type_map", []) + out_line_type = [] + for i in cur_type_map: + if i not in pretrained_type_map: + out_line_type.append(i) + assert not out_line_type, ( + f"{out_line_type!s} type(s) not contained in the pretrained model! " + "Please choose another suitable one." + ) + if cur_type_map != pretrained_type_map: + log.info( + f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}." + ) + jdata["model"]["type_map"] = pretrained_type_map + + # Change model configurations + log.info("Change the model configurations according to the pretrained one...") + for config_key in ["type_embedding", "descriptor", "fitting_net"]: + if ( + config_key not in jdata["model"].keys() + and config_key in pretrained_jdata["model"].keys() + ): + log.info( + "Add the '{}' from pretrained model: {}.".format( + config_key, str(pretrained_jdata["model"][config_key]) + ) + ) + jdata["model"][config_key] = pretrained_jdata["model"][config_key] + elif ( + config_key == "type_embedding" + and config_key in jdata["model"].keys() + and config_key not in pretrained_jdata["model"].keys() + ): + # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None. + cur_para = jdata["model"].pop(config_key) + if "trainable" in cur_para and not cur_para["trainable"]: + jdata["model"][config_key] = { + "trainable": False, + "activation_function": "None", + } + log.info("The type_embeddings from pretrained model will be frozen.") + elif ( + config_key in jdata["model"].keys() + and config_key in pretrained_jdata["model"].keys() + and jdata["model"][config_key] != pretrained_jdata["model"][config_key] + ): + target_para = pretrained_jdata["model"][config_key] + cur_para = jdata["model"][config_key] + # TODO: keep some params that are irrelevant to model structures (need to discuss) + if "trainable" in cur_para.keys(): + target_para["trainable"] = cur_para["trainable"] + log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.") + jdata["model"][config_key] = target_para + + return jdata, cur_type_map diff --git a/deepmd/utils/graph.py b/deepmd/tf/utils/graph.py similarity index 73% rename from deepmd/utils/graph.py rename to deepmd/tf/utils/graph.py index ad4ee0224a..a6e2ab7422 100644 --- a/deepmd/utils/graph.py +++ b/deepmd/tf/utils/graph.py @@ -7,22 +7,21 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( ATTENTION_LAYER_PATTERN, EMBEDDING_NET_PATTERN, FITTING_NET_PATTERN, TYPE_EMBEDDING_PATTERN, tf, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphWithoutTensorError, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) -# TODO (JZ): I think in this file we can merge some duplicated lines into one method... def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]: """Load graph as well as the graph_def from the frozen model(model_file). @@ -99,30 +98,6 @@ def get_tensor_by_name(model_file: str, tensor_name: str) -> tf.Tensor: return get_tensor_by_name_from_graph(graph, tensor_name) -def get_tensor_by_type(node, data_type: np.dtype) -> tf.Tensor: - """Get the tensor value within the given node according to the input data_type. - - Parameters - ---------- - node - The given tensorflow graph node - data_type - The data type of the node - - Returns - ------- - tf.Tensor - The tensor value of the given node - """ - if data_type == np.float64: - tensor = np.array(node.double_val) - elif data_type == np.float32: - tensor = np.array(node.float_val) - else: - raise RuntimeError("model compression does not support the half precision") - return tensor - - def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Dict: """Get the pattern nodes with the given tf.GraphDef object. @@ -166,9 +141,9 @@ def get_embedding_net_nodes_from_graph_def( # embedding_net_pattern = f"filter_type_\d+{suffix}/matrix_\d+_\d+|filter_type_\d+{suffix}/bias_\d+_\d+|filter_type_\d+{suffix}/idt_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+_\d+|filter_type_all{suffix}/idt_\d+_\d+" if suffix != "": embedding_net_pattern = ( - EMBEDDING_NET_PATTERN.replace("/idt", suffix + "/idt") - .replace("/bias", suffix + "/bias") - .replace("/matrix", suffix + "/matrix") + EMBEDDING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") ) else: embedding_net_pattern = EMBEDDING_NET_PATTERN @@ -176,10 +151,6 @@ def get_embedding_net_nodes_from_graph_def( embedding_net_nodes = get_pattern_nodes_from_graph_def( graph_def, embedding_net_pattern ) - for key in embedding_net_nodes.keys(): - assert ( - key.find("bias") > 0 or key.find("matrix") > 0 - ), "currently, only support weight matrix and bias matrix at the tabulation op!" return embedding_net_nodes @@ -219,22 +190,10 @@ def get_embedding_net_variables_from_graph_def( Dict The embedding net variables within the given tf.GraphDef object """ - embedding_net_variables = {} embedding_net_nodes = get_embedding_net_nodes_from_graph_def( graph_def, suffix=suffix ) - for item in embedding_net_nodes: - node = embedding_net_nodes[item] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype - ) - else: - tensor_value = get_tensor_by_type(node, dtype) - embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape) - return embedding_net_variables + return convert_tensor_to_ndarray_in_dict(embedding_net_nodes) def get_extra_embedding_net_suffix(type_one_side: bool): @@ -273,16 +232,7 @@ def get_variables_from_graph_def_as_numpy_array(graph_def: tf.GraphDef, pattern: The numpy array of the variable """ node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, - dtype=tf.as_dtype(node.dtype).as_numpy_dtype, - ) - else: - tensor_value = get_tensor_by_type(node, dtype) - return np.reshape(tensor_value, tensor_shape) + return tf.make_ndarray(node) def get_extra_embedding_net_variables_from_graph_def( @@ -312,13 +262,13 @@ def get_extra_embedding_net_variables_from_graph_def( extra_embedding_net_variables = {} for i in range(1, layer_size + 1): matrix_pattern = f"filter_type_all{suffix}/matrix_{i}{extra_suffix}" - extra_embedding_net_variables[ - matrix_pattern - ] = get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern) + extra_embedding_net_variables[matrix_pattern] = ( + get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern) + ) bias_pattern = f"filter_type_all{suffix}/bias_{i}{extra_suffix}" - extra_embedding_net_variables[ - bias_pattern - ] = get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern) + extra_embedding_net_variables[bias_pattern] = ( + get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern) + ) return extra_embedding_net_variables @@ -360,9 +310,9 @@ def get_fitting_net_nodes_from_graph_def( """ if suffix != "": fitting_net_pattern = ( - FITTING_NET_PATTERN.replace("/idt", suffix + "/idt") - .replace("/bias", suffix + "/bias") - .replace("/matrix", suffix + "/matrix") + FITTING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") ) else: fitting_net_pattern = FITTING_NET_PATTERN @@ -408,20 +358,8 @@ def get_fitting_net_variables_from_graph_def( Dict The fitting net variables within the given tf.GraphDef object """ - fitting_net_variables = {} fitting_net_nodes = get_fitting_net_nodes_from_graph_def(graph_def, suffix=suffix) - for item in fitting_net_nodes: - node = fitting_net_nodes[item] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype - ) - else: - tensor_value = get_tensor_by_type(node, dtype) - fitting_net_variables[item] = np.reshape(tensor_value, tensor_shape) - return fitting_net_variables + return convert_tensor_to_ndarray_in_dict(fitting_net_nodes) def get_fitting_net_variables(model_file: str, suffix: str = "") -> Dict: @@ -462,9 +400,9 @@ def get_type_embedding_net_nodes_from_graph_def( """ if suffix != "": type_embedding_net_pattern = ( - TYPE_EMBEDDING_PATTERN.replace("/idt", suffix + "/idt") - .replace("/bias", suffix + "/bias") - .replace("/matrix", suffix + "/matrix") + TYPE_EMBEDDING_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") ) else: type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN @@ -492,22 +430,10 @@ def get_type_embedding_net_variables_from_graph_def( Dict The embedding net variables within the given tf.GraphDef object """ - type_embedding_net_variables = {} type_embedding_net_nodes = get_type_embedding_net_nodes_from_graph_def( graph_def, suffix=suffix ) - for item in type_embedding_net_nodes: - node = type_embedding_net_nodes[item] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype - ) - else: - tensor_value = get_tensor_by_type(node, dtype) - type_embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape) - return type_embedding_net_variables + return convert_tensor_to_ndarray_in_dict(type_embedding_net_nodes) def get_attention_layer_nodes_from_graph_def( @@ -561,19 +487,27 @@ def get_attention_layer_variables_from_graph_def( Dict The attention layer variables within the given tf.GraphDef object """ - attention_layer_variables = {} attention_layer_net_nodes = get_attention_layer_nodes_from_graph_def( graph_def, suffix=suffix ) - for item in attention_layer_net_nodes: - node = attention_layer_net_nodes[item] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype - ) - else: - tensor_value = get_tensor_by_type(node, dtype) - attention_layer_variables[item] = np.reshape(tensor_value, tensor_shape) - return attention_layer_variables + return convert_tensor_to_ndarray_in_dict(attention_layer_net_nodes) + + +def convert_tensor_to_ndarray_in_dict( + tensor_dict: Dict[str, tf.Tensor], +) -> Dict[str, np.ndarray]: + """Convert tensor to ndarray in dict. + + Parameters + ---------- + tensor_dict : Dict[str, tf.Tensor] + The input tensor dict + + Returns + ------- + Dict[str, np.ndarray] + The converted tensor dict + """ + for key in tensor_dict: + tensor_dict[key] = tf.make_ndarray(tensor_dict[key]) + return tensor_dict diff --git a/deepmd/utils/learning_rate.py b/deepmd/tf/utils/learning_rate.py similarity index 99% rename from deepmd/utils/learning_rate.py rename to deepmd/tf/utils/learning_rate.py index 5bec5120cd..519bf20bd0 100644 --- a/deepmd/utils/learning_rate.py +++ b/deepmd/tf/utils/learning_rate.py @@ -5,7 +5,7 @@ import numpy as np -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) diff --git a/deepmd/utils/multi_init.py b/deepmd/tf/utils/multi_init.py similarity index 95% rename from deepmd/utils/multi_init.py rename to deepmd/tf/utils/multi_init.py index 6c070dc67e..aafa9461b0 100644 --- a/deepmd/utils/multi_init.py +++ b/deepmd/tf/utils/multi_init.py @@ -6,10 +6,10 @@ Dict, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( GraphWithoutTensorError, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_tensor_by_name, ) @@ -59,9 +59,7 @@ def replace_model_params_with_frz_multi_model( ) if cur_type_map != pretrained_type_map: log.info( - "Change the type_map from {} to {}.".format( - str(cur_type_map), str(pretrained_type_map) - ) + f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}." ) jdata["model"]["type_map"] = pretrained_type_map @@ -166,7 +164,7 @@ def replace_model_params_with_frz_multi_model( def _change_sub_config(jdata: Dict[str, Any], src_jdata: Dict[str, Any], sub_key: str): target_para = src_jdata[sub_key] cur_para = jdata[sub_key] - # keep some params that are irrelevant to model structures (need to discuss) TODO + # TODO: keep some params that are irrelevant to model structures (need to discuss) if "trainable" in cur_para.keys(): target_para["trainable"] = cur_para["trainable"] log.info(f"Change the '{sub_key}' from {cur_para!s} to {target_para!s}.") diff --git a/deepmd/tf/utils/neighbor_stat.py b/deepmd/tf/utils/neighbor_stat.py new file mode 100644 index 0000000000..f668d4a4da --- /dev/null +++ b/deepmd/tf/utils/neighbor_stat.py @@ -0,0 +1,277 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + Iterator, + Optional, + Tuple, +) + +import numpy as np + +from deepmd.tf.env import ( + GLOBAL_NP_FLOAT_PRECISION, + GLOBAL_TF_FLOAT_PRECISION, + default_tf_session_config, + tf, +) +from deepmd.tf.utils.batch_size import ( + AutoBatchSize, +) +from deepmd.tf.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.tf.utils.nlist import ( + extend_coord_with_ghosts, +) +from deepmd.tf.utils.sess import ( + run_sess, +) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat + +log = logging.getLogger(__name__) + + +class NeighborStatOP: + """Class for getting neighbor statics data information. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + mixed_types : bool, optional + If True, treat neighbors of all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_types: bool, + ) -> None: + super().__init__() + self.rcut = rcut + self.ntypes = ntypes + self.mixed_types = mixed_types + + def build( + self, + coord: tf.Tensor, + atype: tf.Tensor, + cell: tf.Tensor, + pbc: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor]: + """Calculate the nearest neighbor distance between atoms, maximum nbor size of + atoms and the output data range of the environment matrix. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + + Returns + ------- + tf.Tensor + The minimal squared distance between two atoms, in the shape of (nframes,) + tf.Tensor + The maximal number of neighbors + """ + # generated by GitHub Copilot, converted from PT codes + nframes = tf.shape(coord)[0] + coord = tf.reshape(coord, [nframes, -1, 3]) + nloc = tf.shape(coord)[1] + coord = tf.reshape(coord, [nframes, nloc * 3]) + extend_coord, extend_atype, _ = extend_coord_with_ghosts( + coord, atype, cell, self.rcut, pbc + ) + + coord1 = tf.reshape(extend_coord, [nframes, -1]) + nall = tf.shape(coord1)[1] // 3 + coord0 = coord1[:, : nloc * 3] + diff = ( + tf.reshape(coord1, [nframes, -1, 3])[:, None, :, :] + - tf.reshape(coord0, [nframes, -1, 3])[:, :, None, :] + ) + # shape of diff: nframes, nloc, nall, 3 + # remove the diagonal elements + mask = tf.eye(nloc, nall, dtype=tf.bool) + # expand mask + mask = tf.tile(mask[None, :, :], [nframes, 1, 1]) + # expand inf + inf_mask = tf.constant( + float("inf"), dtype=GLOBAL_TF_FLOAT_PRECISION, shape=[1, 1, 1] + ) + inf_mask = tf.tile(inf_mask, [nframes, nloc, nall]) + # virtual type (<0) are not counted + virtual_type_mask_i = tf.tile(tf.less(atype, 0)[:, :, None], [1, 1, nall]) + virtual_type_mask_j = tf.tile( + tf.less(extend_atype, 0)[:, None, :], [1, nloc, 1] + ) + mask = mask | virtual_type_mask_i | virtual_type_mask_j + rr2 = tf.reduce_sum(tf.square(diff), axis=-1) + rr2 = tf.where(mask, inf_mask, rr2) + min_rr2 = tf.reduce_min(rr2, axis=(1, 2)) + # count the number of neighbors + if not self.mixed_types: + mask = rr2 < self.rcut**2 + nnei = [] + for ii in range(self.ntypes): + nnei.append( + tf.reduce_sum( + tf.cast( + mask & (tf.equal(extend_atype, ii))[:, None, :], tf.int32 + ), + axis=-1, + ) + ) + # shape: nframes, nloc, ntypes + nnei = tf.stack(nnei, axis=-1) + else: + mask = rr2 < self.rcut**2 + # virtual types (<0) are not counted + nnei = tf.reshape( + tf.reduce_sum( + tf.cast( + mask & tf.greater_equal(extend_atype, 0)[:, None, :], tf.int32 + ), + axis=-1, + ), + [nframes, nloc, 1], + ) + # nnei: nframes, nloc, ntypes + # virtual type i (<0) are not counted + nnei = tf.where( + tf.tile( + tf.less(atype, 0)[:, :, None], + [1, 1, self.ntypes if not self.mixed_types else 1], + ), + tf.zeros_like(nnei, dtype=tf.int32), + nnei, + ) + max_nnei = tf.reduce_max(nnei, axis=1) + return min_rr2, max_nnei + + +class NeighborStat(BaseNeighborStat): + """Class for getting training data information. + + It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + mixed_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + mixed_type: bool = False, + ) -> None: + """Constructor.""" + super().__init__(ntypes, rcut, mixed_type) + self.auto_batch_size = AutoBatchSize() + self.neighbor_stat = NeighborStatOP(ntypes, rcut, mixed_type) + self.place_holders = {} + with tf.Graph().as_default() as sub_graph: + self.op = self.build() + self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) + + def build(self) -> Tuple[tf.Tensor, tf.Tensor]: + """Build the graph. + + Returns + ------- + tf.Tensor + The minimal squared distance between two atoms, in the shape of (nframes,) + tf.Tensor + The maximal number of neighbors + """ + for ii in ["coord", "box"]: + self.place_holders[ii] = tf.placeholder( + GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii + ) + self.place_holders["type"] = tf.placeholder( + tf.int32, [None, None], name="t_type" + ) + self.place_holders["pbc"] = tf.placeholder(tf.bool, [], name="t_pbc") + ret = self.neighbor_stat.build( + self.place_holders["coord"], + self.place_holders["type"], + self.place_holders["box"], + self.place_holders["pbc"], + ) + return ret + + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Produce data. + + Parameters + ---------- + data + The data system + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system + """ + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii] + data_set_data = data_set._load_set(jj) + minrr2, max_nnei = self.auto_batch_size.execute_all( + self._execute, + data_set_data["coord"].shape[0], + data_set.get_natoms(), + data_set_data["coord"], + data_set_data["type"], + data_set_data["box"], + data_set.pbc, + ) + yield np.max(max_nnei, axis=0), np.min(minrr2), jj + + def _execute( + self, + coord: np.ndarray, + atype: np.ndarray, + box: Optional[np.ndarray], + pbc: bool, + ): + """Execute the operation. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + box + The box. + pbc + Whether the box is periodic. + """ + feed_dict = { + self.place_holders["coord"]: coord, + self.place_holders["type"]: atype, + self.place_holders["box"]: box, + self.place_holders["pbc"]: pbc, + } + minrr2, max_nnei = run_sess(self.sub_sess, self.op, feed_dict=feed_dict) + return minrr2, max_nnei diff --git a/deepmd/utils/network.py b/deepmd/tf/utils/network.py similarity index 99% rename from deepmd/utils/network.py rename to deepmd/tf/utils/network.py index 36d8c42f82..fb8e89c737 100644 --- a/deepmd/utils/network.py +++ b/deepmd/tf/utils/network.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import numpy as np -from deepmd.common import ( +from deepmd.tf.common import ( get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, tf, ) diff --git a/deepmd/tf/utils/nlist.py b/deepmd/tf/utils/nlist.py new file mode 100644 index 0000000000..87032c3e1d --- /dev/null +++ b/deepmd/tf/utils/nlist.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.tf.env import ( + GLOBAL_TF_FLOAT_PRECISION, + tf, +) +from deepmd.tf.utils.region import ( + to_face_distance, +) + + +def extend_coord_with_ghosts( + coord: tf.Tensor, + atype: tf.Tensor, + cell: tf.Tensor, + rcut: float, + pbc: tf.Tensor, +): + """Extend the coordinates of the atoms by appending peridoc images. + The number of images is large enough to ensure all the neighbors + within rcut are appended. + + Parameters + ---------- + coord : tf.Tensor + original coordinates of shape [-1, nloc*3]. + atype : tf.Tensor + atom type of shape [-1, nloc]. + cell : tf.Tensor + simulation cell tensor of shape [-1, 9]. + rcut : float + the cutoff radius + pbc : tf.Tensor + whether the simulation cell is periodic or not + + Returns + ------- + extended_coord: tf.Tensor + extended coordinates of shape [-1, nall*3]. + extended_atype: tf.Tensor + extended atom type of shape [-1, nall]. + index_mapping: tf.Tensor + maping extended index to the local index + + """ + # generated by GitHub Copilot, converted from PT codes + nf = tf.shape(atype)[0] + nloc = tf.shape(atype)[1] + aidx = tf.tile(tf.expand_dims(tf.range(nloc), 0), [nf, 1]) + + def extend_coord_with_ghosts_nopbc(coord, atype, cell): + return coord, atype, aidx, nloc + + def extend_coord_with_ghosts_pbc(coord, atype, cell): + coord = tf.reshape(coord, [nf, nloc, 3]) + cell = tf.reshape(cell, [nf, 3, 3]) + # nf x 3 + to_face = to_face_distance(cell) + # nf x 3 + # *2: ghost copies on + and - directions + # +1: central cell + nbuff = tf.cast(tf.math.ceil(rcut / to_face), tf.int32) + # 3 + nbuff = tf.reduce_max(nbuff, axis=0) + xi = tf.range(-nbuff[0], nbuff[0] + 1, 1) + yi = tf.range(-nbuff[1], nbuff[1] + 1, 1) + zi = tf.range(-nbuff[2], nbuff[2] + 1, 1) + xyz = tf.reshape(xi, [-1, 1, 1, 1]) * tf.constant([1, 0, 0], dtype=tf.int32) + xyz = xyz + tf.reshape(yi, [1, -1, 1, 1]) * tf.constant( + [0, 1, 0], dtype=tf.int32 + ) + xyz = xyz + tf.reshape(zi, [1, 1, -1, 1]) * tf.constant( + [0, 0, 1], dtype=tf.int32 + ) + xyz = tf.reshape(xyz, [-1, 3]) + # ns x 3 + shift_idx = tf.gather( + xyz, tf.argsort(tf.norm(tf.cast(xyz, GLOBAL_TF_FLOAT_PRECISION), axis=1)) + ) + ns = tf.shape(shift_idx)[0] + nall = ns * nloc + # nf x ns x 3 + shift_vec = tf.einsum( + "sd,fdk->fsk", tf.cast(shift_idx, GLOBAL_TF_FLOAT_PRECISION), cell + ) + # nf x ns x nloc x 3 + extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :] + # nf x ns x nloc + extend_atype = tf.tile(tf.expand_dims(atype, -2), [1, ns, 1]) + # nf x ns x nloc + extend_aidx = tf.tile(tf.expand_dims(aidx, -2), [1, ns, 1]) + return extend_coord, extend_atype, extend_aidx, nall + + extend_coord, extend_atype, extend_aidx, nall = tf.cond( + pbc, + lambda: extend_coord_with_ghosts_pbc(coord, atype, cell), + lambda: extend_coord_with_ghosts_nopbc(coord, atype, cell), + ) + + return ( + tf.reshape(extend_coord, [nf, nall * 3]), + tf.reshape(extend_atype, [nf, nall]), + tf.reshape(extend_aidx, [nf, nall]), + ) diff --git a/deepmd/tf/utils/pair_tab.py b/deepmd/tf/utils/pair_tab.py new file mode 100644 index 0000000000..a9747c4367 --- /dev/null +++ b/deepmd/tf/utils/pair_tab.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.pair_tab import ( + PairTab, +) + +__all__ = [ + "PairTab", +] diff --git a/deepmd/utils/parallel_op.py b/deepmd/tf/utils/parallel_op.py similarity index 94% rename from deepmd/utils/parallel_op.py rename to deepmd/tf/utils/parallel_op.py index 9ef68bbd84..5eeb1fab7f 100644 --- a/deepmd/utils/parallel_op.py +++ b/deepmd/tf/utils/parallel_op.py @@ -8,10 +8,10 @@ Tuple, ) -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.utils.sess import ( +from deepmd.tf.utils.sess import ( run_sess, ) @@ -30,17 +30,15 @@ class ParallelOp: Examples -------- - >>> from deepmd.env import tf - >>> from deepmd.utils.parallel_op import ParallelOp + >>> from deepmd.tf.env import tf + >>> from deepmd.tf.utils.parallel_op import ParallelOp >>> def builder(): ... x = tf.placeholder(tf.int32, [1]) ... return {"x": x}, (x + 1) - ... >>> p = ParallelOp(builder, nthreads=4) >>> def feed(): ... for ii in range(10): ... yield {"x": [ii]} - ... >>> print(*p.generate(tf.Session(), feed())) [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] """ diff --git a/deepmd/tf/utils/path.py b/deepmd/tf/utils/path.py new file mode 100644 index 0000000000..67990543ae --- /dev/null +++ b/deepmd/tf/utils/path.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.path import ( + DPH5Path, + DPOSPath, + DPPath, +) + +__all__ = [ + "DPPath", + "DPOSPath", + "DPH5Path", +] diff --git a/deepmd/tf/utils/plugin.py b/deepmd/tf/utils/plugin.py new file mode 100644 index 0000000000..f2f0336691 --- /dev/null +++ b/deepmd/tf/utils/plugin.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.plugin import ( + Plugin, + PluginVariant, + VariantABCMeta, + VariantMeta, +) + +__all__ = [ + "Plugin", + "VariantMeta", + "VariantABCMeta", + "PluginVariant", +] diff --git a/deepmd/tf/utils/random.py b/deepmd/tf/utils/random.py new file mode 100644 index 0000000000..55b8eba91e --- /dev/null +++ b/deepmd/tf/utils/random.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.random import ( + choice, + random, + seed, + shuffle, +) + +__all__ = [ + "choice", + "random", + "seed", + "shuffle", +] diff --git a/deepmd/tf/utils/region.py b/deepmd/tf/utils/region.py new file mode 100644 index 0000000000..82183a0413 --- /dev/null +++ b/deepmd/tf/utils/region.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.tf.env import ( + tf, +) + + +def to_face_distance(cell): + """Compute the to-face-distance of the simulation cell. + + Parameters + ---------- + cell : tf.Tensor + simulation cell tensor of shape [*, 3, 3]. + + Returns + ------- + dist: tf.Tensor + the to face distances of shape [*, 3] + """ + # generated by GitHub Copilot, converted from PT codes + cshape = tf.shape(cell) + cell_reshaped = tf.reshape(cell, [-1, 3, 3]) + dist = b_to_face_distance(cell_reshaped) + return tf.reshape(dist, tf.concat([cshape[:-2], [3]], 0)) + + +def b_to_face_distance(cell): + # generated by GitHub Copilot, converted from PT codes + volume = tf.linalg.det(cell) + c_yz = tf.linalg.cross(cell[:, 1], cell[:, 2]) + _h2yz = tf.divide(volume, tf.norm(c_yz, axis=-1)) + c_zx = tf.linalg.cross(cell[:, 2], cell[:, 0]) + _h2zx = tf.divide(volume, tf.norm(c_zx, axis=-1)) + c_xy = tf.linalg.cross(cell[:, 0], cell[:, 1]) + _h2xy = tf.divide(volume, tf.norm(c_xy, axis=-1)) + return tf.stack([_h2yz, _h2zx, _h2xy], axis=1) diff --git a/deepmd/tf/utils/serialization.py b/deepmd/tf/utils/serialization.py new file mode 100644 index 0000000000..7cf596f5bd --- /dev/null +++ b/deepmd/tf/utils/serialization.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import tempfile + +from deepmd.tf.entrypoints import ( + freeze, +) +from deepmd.tf.env import ( + GLOBAL_TF_FLOAT_PRECISION, + tf, +) +from deepmd.tf.model.model import ( + Model, +) +from deepmd.tf.utils.errors import ( + GraphWithoutTensorError, +) +from deepmd.tf.utils.graph import ( + get_tensor_by_name_from_graph, + load_graph_def, +) +from deepmd.tf.utils.sess import ( + run_sess, +) + + +def serialize_from_file(model_file: str) -> dict: + """Serialize the model file to a dictionary. + + Parameters + ---------- + model_file : str + The model file to be serialized. + + Returns + ------- + dict + The serialized model data. + """ + graph, graph_def = load_graph_def(model_file) + t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script") + jdata = json.loads(t_jdata) + model = Model(**jdata["model"]) + # important! must be called before serialize + model.init_variables(graph=graph, graph_def=graph_def) + model_dict = model.serialize() + data = { + "backend": "TensorFlow", + "tf_version": tf.__version__, + "model": model_dict, + "model_def_script": jdata["model"], + } + # neighbor stat information + try: + t_min_nbor_dist = get_tensor_by_name_from_graph( + graph, "train_attr/min_nbor_dist" + ) + except GraphWithoutTensorError as e: + pass + else: + data.setdefault("@variables", {}) + data["@variables"]["min_nbor_dist"] = t_min_nbor_dist + return data + + +def deserialize_to_file(model_file: str, data: dict) -> None: + """Deserialize the dictionary to a model file. + + Parameters + ---------- + model_file : str + The model file to be saved. + data : dict + The dictionary to be deserialized. + """ + model = Model.deserialize(data["model"]) + with tf.Graph().as_default() as graph, tf.Session(graph=graph) as sess: + place_holders = {} + for ii in ["coord", "box"]: + place_holders[ii] = tf.placeholder( + GLOBAL_TF_FLOAT_PRECISION, [None], name="t_" + ii + ) + place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type") + place_holders["natoms_vec"] = tf.placeholder( + tf.int32, [model.get_ntypes() + 2], name="t_natoms" + ) + place_holders["default_mesh"] = tf.placeholder(tf.int32, [None], name="t_mesh") + inputs = {} + # fparam, aparam + if model.get_numb_fparam() > 0: + inputs["fparam"] = tf.placeholder( + GLOBAL_TF_FLOAT_PRECISION, + [None, model.get_numb_fparam()], + name="t_fparam", + ) + if model.get_numb_aparam() > 0: + inputs["aparam"] = tf.placeholder( + GLOBAL_TF_FLOAT_PRECISION, + [None, model.get_numb_aparam()], + name="t_aparam", + ) + model.build( + place_holders["coord"], + place_holders["type"], + place_holders["natoms_vec"], + place_holders["box"], + place_holders["default_mesh"], + inputs, + reuse=False, + ) + init = tf.global_variables_initializer() + tf.constant( + json.dumps({"model": data["model_def_script"]}, separators=(",", ":")), + name="train_attr/training_script", + dtype=tf.string, + ) + if "min_nbor_dist" in data.get("@variables", {}): + tf.constant( + data["@variables"]["min_nbor_dist"], + name="train_attr/min_nbor_dist", + dtype=GLOBAL_TF_FLOAT_PRECISION, + ) + run_sess(sess, init) + saver = tf.train.Saver() + with tempfile.TemporaryDirectory() as nt: + saver.save( + sess, + os.path.join(nt, "model.ckpt"), + global_step=0, + ) + freeze(checkpoint_folder=nt, output=model_file, node_names=None) diff --git a/deepmd/utils/sess.py b/deepmd/tf/utils/sess.py similarity index 95% rename from deepmd/utils/sess.py rename to deepmd/tf/utils/sess.py index a87adffd91..ca98980f89 100644 --- a/deepmd/utils/sess.py +++ b/deepmd/tf/utils/sess.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import os -from deepmd.env import ( +from deepmd.tf.env import ( tf, ) -from deepmd.utils.errors import ( +from deepmd.tf.utils.errors import ( OutOfMemoryError, ) diff --git a/deepmd/tf/utils/spin.py b/deepmd/tf/utils/spin.py new file mode 100644 index 0000000000..c20d4dcc7b --- /dev/null +++ b/deepmd/tf/utils/spin.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, +) + +from deepmd.tf.env import ( + GLOBAL_TF_FLOAT_PRECISION, + tf, +) + + +class Spin: + """Class for spin. + + Parameters + ---------- + use_spin + Whether to use atomic spin model for each atom type + spin_norm + The magnitude of atomic spin for each atom type with spin + virtual_len + The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin + """ + + def __init__( + self, + use_spin: Optional[List[bool]] = None, + spin_norm: Optional[List[float]] = None, + virtual_len: Optional[List[float]] = None, + ) -> None: + """Constructor.""" + self.use_spin = use_spin + self.spin_norm = spin_norm + self.virtual_len = virtual_len + self.ntypes_spin = self.use_spin.count(True) + + def build( + self, + reuse=None, + suffix="", + ): + """Build the computational graph for the spin. + + Parameters + ---------- + reuse + The weights in the networks should be reused when get the variable. + suffix + Name suffix to identify this descriptor + + Returns + ------- + embedded_types + The computational graph for embedded types + """ + name = "spin_attr" + suffix + with tf.variable_scope(name, reuse=reuse): + t_ntypes_spin = tf.constant( + self.ntypes_spin, name="ntypes_spin", dtype=tf.int32 + ) + t_virtual_len = tf.constant( + self.virtual_len, + name="virtual_len", + dtype=GLOBAL_TF_FLOAT_PRECISION, + ) + t_spin_norm = tf.constant( + self.spin_norm, + name="spin_norm", + dtype=GLOBAL_TF_FLOAT_PRECISION, + ) + + def get_ntypes_spin(self) -> int: + """Returns the number of atom types which contain spin.""" + return self.ntypes_spin + + def get_use_spin(self) -> List[bool]: + """Returns the list of whether to use spin for each atom type.""" + return self.use_spin + + def get_spin_norm(self) -> List[float]: + """Returns the list of magnitude of atomic spin for each atom type.""" + return self.spin_norm + + def get_virtual_len(self) -> List[float]: + """Returns the list of distance between real atom and virtual atom for each atom type.""" + return self.virtual_len diff --git a/deepmd/utils/tabulate.py b/deepmd/tf/utils/tabulate.py similarity index 91% rename from deepmd/utils/tabulate.py rename to deepmd/tf/utils/tabulate.py index 2b270b1dbc..958e08dd86 100644 --- a/deepmd/utils/tabulate.py +++ b/deepmd/tf/utils/tabulate.py @@ -16,17 +16,17 @@ ) import deepmd -from deepmd.common import ( +from deepmd.tf.common import ( ACTIVATION_FN_DICT, ) -from deepmd.descriptor import ( +from deepmd.tf.descriptor import ( Descriptor, ) -from deepmd.env import ( +from deepmd.tf.env import ( op_module, tf, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_embedding_net_nodes_from_graph_def, get_tensor_by_name_from_graph, ) @@ -107,15 +107,15 @@ def __init__( self.sub_graph, self.sub_graph_def = self._load_sub_graph() self.sub_sess = tf.Session(graph=self.sub_graph) - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): self.sel_a = self.descrpt.sel_r self.rcut = self.descrpt.rcut self.rcut_smth = self.descrpt.rcut_smth - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): self.sel_a = self.descrpt.sel_a self.rcut = self.descrpt.rcut_r self.rcut_smth = self.descrpt.rcut_r_smth - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): self.sel_a = self.descrpt.sel_a self.rcut = self.descrpt.rcut_r self.rcut_smth = self.descrpt.rcut_r_smth @@ -133,6 +133,10 @@ def __init__( self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def( self.graph_def, suffix=self.suffix ) + for key in self.embedding_net_nodes.keys(): + assert ( + key.find("bias") > 0 or key.find("matrix") > 0 + ), "currently, only support weight matrix and bias matrix at the tabulation op!" # move it to the descriptor class # for tt in self.exclude_types: @@ -179,8 +183,8 @@ def build( """ # tabulate range [lower, upper] with stride0 'stride0' lower, upper = self._get_env_mat_range(min_nbor_dist) - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( - self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2 ): uu = np.max(upper) ll = np.min(lower) @@ -196,7 +200,7 @@ def build( self._build_lower( "filter_net", xx, 0, uu, ll, stride0, stride1, extrapolate, nspline ) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): for ii in range(self.table_size): if (self.type_one_side and not self._all_excluded(ii)) or ( not self.type_one_side @@ -233,7 +237,7 @@ def build( self._build_lower( net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline ) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): xx_all = [] for ii in range(self.ntypes): xx = np.arange( @@ -275,7 +279,7 @@ def build( nspline[ii], ) idx += 1 - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): for ii in range(self.table_size): if (self.type_one_side and not self._all_excluded(ii)) or ( not self.type_one_side @@ -327,10 +331,10 @@ def _build_lower( ) # tt.shape: [nspline, self.last_layer_size] - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): tt = np.full((nspline, self.last_layer_size), stride1) tt[: int((upper - lower) / stride0), :] = stride0 - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): tt = np.full((nspline, self.last_layer_size), stride1) tt[ int((lower - extrapolate * lower) / stride1) + 1 : ( @@ -339,7 +343,7 @@ def _build_lower( ), :, ] = stride0 - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): tt = np.full((nspline, self.last_layer_size), stride1) tt[: int((upper - lower) / stride0), :] = stride0 else: @@ -423,14 +427,14 @@ def _get_bias(self): bias = {} for layer in range(1, self.layer_size + 1): bias["layer_" + str(layer)] = [] - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( - self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 - ): + if isinstance( + self.descrpt, deepmd.tf.descriptor.DescrptSeAtten + ) or isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/bias_{layer}" ] bias["layer_" + str(layer)].append(tf.make_ndarray(node)) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): if self.type_one_side: for ii in range(0, self.ntypes): if not self._all_excluded(ii): @@ -452,14 +456,14 @@ def _get_bias(self): bias["layer_" + str(layer)].append(tf.make_ndarray(node)) else: bias["layer_" + str(layer)].append(np.array([])) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): for ii in range(self.ntypes): for jj in range(ii, self.ntypes): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/bias_{layer}_{ii}_{jj}" ] bias["layer_" + str(layer)].append(tf.make_ndarray(node)) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): if self.type_one_side: for ii in range(0, self.ntypes): if not self._all_excluded(ii): @@ -489,14 +493,14 @@ def _get_matrix(self): matrix = {} for layer in range(1, self.layer_size + 1): matrix["layer_" + str(layer)] = [] - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( - self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 - ): + if isinstance( + self.descrpt, deepmd.tf.descriptor.DescrptSeAtten + ) or isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/matrix_{layer}" ] matrix["layer_" + str(layer)].append(tf.make_ndarray(node)) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): if self.type_one_side: for ii in range(0, self.ntypes): if not self._all_excluded(ii): @@ -518,14 +522,14 @@ def _get_matrix(self): matrix["layer_" + str(layer)].append(tf.make_ndarray(node)) else: matrix["layer_" + str(layer)].append(np.array([])) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): for ii in range(self.ntypes): for jj in range(ii, self.ntypes): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/matrix_{layer}_{ii}_{jj}" ] matrix["layer_" + str(layer)].append(tf.make_ndarray(node)) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): if self.type_one_side: for ii in range(0, self.ntypes): if not self._all_excluded(ii): @@ -712,14 +716,14 @@ def _layer_1(self, x, w, b): # Change the embedding net range to sw / min_nbor_dist def _get_env_mat_range(self, min_nbor_dist): sw = self._spline5_switch(min_nbor_dist, self.rcut_smth, self.rcut) - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): lower = -self.davg[:, 0] / self.dstd[:, 0] upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0] - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): var = np.square(sw / (min_nbor_dist * self.dstd[:, 1:4])) lower = np.min(-var, axis=1) upper = np.max(var, axis=1) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): lower = -self.davg[:, 0] / self.dstd[:, 0] upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0] else: @@ -741,11 +745,11 @@ def _spline5_switch(self, xx, rmin, rmax): def _get_layer_size(self): layer_size = 0 - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( - self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2 ): layer_size = len(self.embedding_net_nodes) // 2 - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): layer_size = len(self.embedding_net_nodes) // ( (self.ntypes * self.ntypes - len(self.exclude_types)) * 2 ) @@ -753,11 +757,11 @@ def _get_layer_size(self): layer_size = len(self.embedding_net_nodes) // ( (self.ntypes - self._n_all_excluded) * 2 ) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): layer_size = len(self.embedding_net_nodes) // int( comb(self.ntypes + 1, 2) * 2 ) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): layer_size = len(self.embedding_net_nodes) // ( (self.ntypes * self.ntypes - len(self.exclude_types)) * 2 ) @@ -770,12 +774,12 @@ def _get_layer_size(self): return layer_size @property - @lru_cache() + @lru_cache def _n_all_excluded(self) -> int: """Then number of types excluding all types.""" return sum(int(self._all_excluded(ii)) for ii in range(0, self.ntypes)) - @lru_cache() + @lru_cache def _all_excluded(self, ii: int) -> bool: """Check if type ii excluds all types. @@ -793,17 +797,17 @@ def _all_excluded(self, ii: int) -> bool: def _get_table_size(self): table_size = 0 - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( - self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2 ): table_size = 1 - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA): table_size = self.ntypes * self.ntypes if self.type_one_side: table_size = self.ntypes - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT): table_size = int(comb(self.ntypes + 1, 2)) - elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR): + elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR): table_size = self.ntypes * self.ntypes if self.type_one_side: table_size = self.ntypes diff --git a/deepmd/utils/type_embed.py b/deepmd/tf/utils/type_embed.py similarity index 60% rename from deepmd/utils/type_embed.py rename to deepmd/tf/utils/type_embed.py index c8ab01f7f5..0f566027c1 100644 --- a/deepmd/utils/type_embed.py +++ b/deepmd/tf/utils/type_embed.py @@ -1,26 +1,34 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import re from typing import ( List, Optional, Union, ) -from deepmd.common import ( +from deepmd.dpmodel.utils.network import ( + EmbeddingNet, +) +from deepmd.tf.common import ( get_activation_func, get_precision, ) -from deepmd.env import ( +from deepmd.tf.env import ( + TYPE_EMBEDDING_PATTERN, tf, ) -from deepmd.nvnmd.utils.config import ( +from deepmd.tf.nvnmd.utils.config import ( nvnmd_cfg, ) -from deepmd.utils.graph import ( +from deepmd.tf.utils.graph import ( get_type_embedding_net_variables_from_graph_def, ) -from deepmd.utils.network import ( +from deepmd.tf.utils.network import ( embedding_net, ) +from deepmd.utils.version import ( + check_version_compatibility, +) def embed_atom_type( @@ -68,6 +76,8 @@ class TypeEmbedNet: Parameters ---------- + ntypes : int + Number of atom types neuron : list[int] Number of neurons in each hidden layers of the embedding net resnet_dt @@ -89,7 +99,9 @@ class TypeEmbedNet: def __init__( self, - neuron: List[int] = [], + *, + ntypes: int, + neuron: List[int], resnet_dt: bool = False, activation_function: Union[str, None] = "tanh", precision: str = "default", @@ -100,10 +112,12 @@ def __init__( **kwargs, ) -> None: """Constructor.""" + self.ntypes = ntypes self.neuron = neuron self.seed = seed self.filter_resnet_dt = resnet_dt self.filter_precision = get_precision(precision) + self.filter_activation_fn_name = str(activation_function) self.filter_activation_fn = get_activation_func(activation_function) self.trainable = trainable self.uniform_seed = uniform_seed @@ -133,6 +147,7 @@ def build( embedded_types The computational graph for embedded types """ + assert ntypes == self.ntypes types = tf.convert_to_tensor(list(range(ntypes)), dtype=tf.int32) ebd_type = tf.cast( tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)), @@ -189,3 +204,98 @@ def init_variables( self.type_embedding_net_variables = ( get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix) ) + + @classmethod + def deserialize(cls, data: dict, suffix: str = ""): + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + suffix : str, optional + The suffix of the scope + + Returns + ------- + Model + The deserialized model + """ + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data_cls = data.pop("@class") + assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}" + + embedding_net = EmbeddingNet.deserialize(data.pop("embedding")) + embedding_net_variables = {} + for layer_idx, layer in enumerate(embedding_net.layers): + embedding_net_variables[ + f"type_embed_net{suffix}/matrix_{layer_idx + 1}" + ] = layer.w + embedding_net_variables[f"type_embed_net{suffix}/bias_{layer_idx + 1}"] = ( + layer.b + ) + if layer.idt is not None: + embedding_net_variables[ + f"type_embed_net{suffix}/idt_{layer_idx + 1}" + ] = layer.idt.reshape(1, -1) + else: + # prevent keyError + embedding_net_variables[ + f"type_embed_net{suffix}/idt_{layer_idx + 1}" + ] = 0.0 + + type_embedding_net = cls(**data) + type_embedding_net.type_embedding_net_variables = embedding_net_variables + return type_embedding_net + + def serialize(self, suffix: str = "") -> dict: + """Serialize the model. + + Parameters + ---------- + suffix : str, optional + The suffix of the scope + + Returns + ------- + dict + The serialized data + """ + if suffix != "": + type_embedding_pattern = ( + TYPE_EMBEDDING_PATTERN.replace("/(idt)", suffix + "/(idt)") + .replace("/(bias)", suffix + "/(bias)") + .replace("/(matrix)", suffix + "/(matrix)") + ) + else: + type_embedding_pattern = TYPE_EMBEDDING_PATTERN + assert self.type_embedding_net_variables is not None + embedding_net = EmbeddingNet( + in_dim=self.ntypes, + neuron=self.neuron, + activation_function=self.filter_activation_fn_name, + resnet_dt=self.filter_resnet_dt, + precision=self.filter_precision.name, + ) + for key, value in self.type_embedding_net_variables.items(): + m = re.search(type_embedding_pattern, key) + m = [mm for mm in m.groups() if mm is not None] + layer_idx = int(m[1]) - 1 + weight_name = m[0] + if weight_name == "idt": + value = value.ravel() + embedding_net[layer_idx][weight_name] = value + + return { + "@class": "TypeEmbedNet", + "@version": 1, + "ntypes": self.ntypes, + "neuron": self.neuron, + "resnet_dt": self.filter_resnet_dt, + "precision": self.filter_precision.name, + "activation_function": self.filter_activation_fn_name, + "trainable": self.trainable, + "padding": self.padding, + "embedding": embedding_net.serialize(), + } diff --git a/deepmd/tf/utils/update_sel.py b/deepmd/tf/utils/update_sel.py new file mode 100644 index 0000000000..db0420dde8 --- /dev/null +++ b/deepmd/tf/utils/update_sel.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Type, +) + +from deepmd.env import ( + GLOBAL_ENER_FLOAT_PRECISION, +) +from deepmd.tf.env import ( + tf, +) +from deepmd.tf.utils.neighbor_stat import ( + NeighborStat, +) +from deepmd.utils.update_sel import ( + BaseUpdateSel, +) + + +class UpdateSel(BaseUpdateSel): + @property + def neighbor_stat(self) -> Type[NeighborStat]: + return NeighborStat + + def hook(self, min_nbor_dist, max_nbor_size): + # moved from traier.py as duplicated + tf.constant( + min_nbor_dist, + name="train_attr/min_nbor_dist", + dtype=GLOBAL_ENER_FLOAT_PRECISION, + ) + tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32) diff --git a/deepmd/tf/utils/weight_avg.py b/deepmd/tf/utils/weight_avg.py new file mode 100644 index 0000000000..fb3ae27934 --- /dev/null +++ b/deepmd/tf/utils/weight_avg.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Alias for backward compatibility.""" + +from deepmd.utils.weight_avg import ( + weighted_average, +) + +__all__ = [ + "weighted_average", +] diff --git a/deepmd/utils/__init__.py b/deepmd/utils/__init__.py index 7d1e7e67d0..bac6924ac1 100644 --- a/deepmd/utils/__init__.py +++ b/deepmd/utils/__init__.py @@ -1,27 +1,3 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -# -from .data import ( - DeepmdData, -) -from .data_system import ( - DeepmdDataSystem, -) -from .learning_rate import ( - LearningRateExp, -) -from .pair_tab import ( - PairTab, -) -from .plugin import ( - Plugin, - PluginVariant, -) - -__all__ = [ - "DeepmdData", - "DeepmdDataSystem", - "LearningRateExp", - "PairTab", - "Plugin", - "PluginVariant", -] +# For performance, do not add things to this file +# import submodules instead diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 05e7c767b8..2a98bee6fe 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1,19 +1,2543 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.argcheck import ( - gen_args, - gen_doc, - gen_json, - list_to_doc, - normalize, - type_embedding_args, +import json +import logging +from typing import ( + Callable, + List, + Optional, ) -__all__ = [ - "list_to_doc", - "normalize", - "gen_doc", - "gen_json", - "gen_args", - "type_embedding_args", -] +from dargs import ( + Argument, + ArgumentEncoder, + Variant, + dargs, +) + +from deepmd.common import ( + VALID_ACTIVATION, + VALID_PRECISION, +) +from deepmd.utils.argcheck_nvnmd import ( + nvnmd_args, +) +from deepmd.utils.plugin import ( + Plugin, +) + +log = logging.getLogger(__name__) + + +ACTIVATION_FN_DICT = dict.fromkeys(VALID_ACTIVATION) +PRECISION_DICT = dict.fromkeys(VALID_PRECISION) + +doc_only_tf_supported = "(Supported Backend: TensorFlow) " +doc_only_pt_supported = "(Supported Backend: PyTorch) " + + +def list_to_doc(xx): + items = [] + for ii in xx: + if len(items) == 0: + items.append(f'"{ii}"') + else: + items.append(f', "{ii}"') + items.append(".") + return "".join(items) + + +def make_link(content, ref_key): + return ( + f"`{content} <{ref_key}_>`_" + if not dargs.RAW_ANCHOR + else f"`{content} <#{ref_key}>`_" + ) + + +def type_embedding_args(): + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_seed = "Random seed for parameter initialization" + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_trainable = "If the parameters in the embedding net are trainable" + + return [ + Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, default=None, doc=doc_seed), + ] + + +def spin_args(): + doc_use_spin = ( + "Whether to use atomic spin model for each atom type. " + "List of boolean values with the shape of [ntypes] to specify which types use spin, " + f"or a list of integer values {doc_only_pt_supported} " + "to indicate the index of the type that uses spin." + ) + doc_spin_norm = "The magnitude of atomic spin for each atom type with spin" + doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin" + doc_virtual_scale = ( + "The scaling factor to determine the virtual distance between a virtual atom " + "representing spin and its corresponding real atom for each atom type with spin. " + "This factor is defined as the virtual distance divided by the magnitude of atomic spin " + "for each atom type with spin. The virtual coordinate is defined as the real coordinate " + "plus spin * virtual_scale. List of float values with shape of [ntypes] or [ntypes_spin] " + "or one single float value for all types, only used when use_spin is True for each atom type." + ) + + return [ + Argument("use_spin", [List[bool], List[int]], doc=doc_use_spin), + Argument( + "spin_norm", + List[float], + optional=True, + doc=doc_only_tf_supported + doc_spin_norm, + ), + Argument( + "virtual_len", + List[float], + optional=True, + doc=doc_only_tf_supported + doc_virtual_len, + ), + Argument( + "virtual_scale", + [List[float], float], + optional=True, + doc=doc_only_pt_supported + doc_virtual_scale, + ), + ] + + +# --- Descriptor configurations: --- # + + +class ArgsPlugin: + def __init__(self) -> None: + self.__plugin = Plugin() + + def register( + self, name: str, alias: Optional[List[str]] = None, doc: str = "" + ) -> Callable[[], List[Argument]]: + """Register a descriptor argument plugin. + + Parameters + ---------- + name : str + the name of a descriptor + alias : List[str], optional + the list of aliases of this descriptor + + Returns + ------- + Callable[[], List[Argument]] + the registered descriptor argument method + + Examples + -------- + >>> some_plugin = ArgsPlugin() + >>> @some_plugin.register("some_descrpt") + def descrpt_some_descrpt_args(): + return [] + """ + # convert alias to hashed item + if isinstance(alias, list): + alias = tuple(alias) + return self.__plugin.register((name, alias, doc)) + + def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]: + """Get all arguments. + + Parameters + ---------- + exclude_hybrid : bool + exclude hybrid descriptor to prevent circular calls + + Returns + ------- + List[Argument] + all arguments + """ + arguments = [] + for (name, alias, doc), metd in self.__plugin.plugins.items(): + if exclude_hybrid and name == "hybrid": + continue + arguments.append( + Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias, doc=doc) + ) + return arguments + + +descrpt_args_plugin = ArgsPlugin() + + +@descrpt_args_plugin.register("loc_frame", doc=doc_only_tf_supported) +def descrpt_local_frame_args(): + doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor." + doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius." + doc_rcut = "The cut-off radius. The default value is 6.0" + doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\ +- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ +- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\ +- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\ +- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ +- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\ +- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance." + + return [ + Argument("sel_a", List[int], optional=False, doc=doc_sel_a), + Argument("sel_r", List[int], optional=False, doc=doc_sel_r), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule), + ] + + +@descrpt_args_plugin.register("se_e2_a", alias=["se_a"]) +def descrpt_se_a_args(): + doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + doc_rcut = "The cut-off radius." + doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." + doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_trainable = "If the parameters in the embedding net is trainable" + doc_seed = "Random seed for parameter initialization" + doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection." + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" + + return [ + Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), + Argument( + "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + Argument( + "axis_neuron", + int, + optional=True, + default=4, + alias=["n_axis_neuron"], + doc=doc_axis_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument( + "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "exclude_types", + List[List[int]], + optional=True, + default=[], + doc=doc_exclude_types, + ), + Argument( + "env_protection", + float, + optional=True, + default=0.0, + doc=doc_only_tf_supported + doc_env_protection, + ), + Argument( + "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero + ), + ] + + +@descrpt_args_plugin.register( + "se_e3", alias=["se_at", "se_a_3be", "se_t"], doc=doc_only_tf_supported +) +def descrpt_se_t_args(): + doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + doc_rcut = "The cut-off radius." + doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_trainable = "If the parameters in the embedding net are trainable" + doc_seed = "Random seed for parameter initialization" + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" + + return [ + Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), + Argument( + "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero + ), + ] + + +@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"], doc=doc_only_tf_supported) +def descrpt_se_a_tpe_args(): + doc_type_nchanl = "number of channels for type embedding" + doc_type_nlayer = "number of hidden layers of type embedding net" + doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded." + + return [ + *descrpt_se_a_args(), + Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl), + Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer), + Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + ] + + +@descrpt_args_plugin.register("se_e2_r", alias=["se_r"]) +def descrpt_se_r_args(): + doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + doc_rcut = "The cut-off radius." + doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." + doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_trainable = "If the parameters in the embedding net are trainable" + doc_seed = "Random seed for parameter initialization" + doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" + + return [ + Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), + Argument( + "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument( + "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "exclude_types", + List[List[int]], + optional=True, + default=[], + doc=doc_exclude_types, + ), + Argument( + "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero + ), + ] + + +@descrpt_args_plugin.register("hybrid") +def descrpt_hybrid_args(): + doc_list = "A list of descriptor definitions" + + return [ + Argument( + "list", + list, + optional=False, + doc=doc_list, + repeat=True, + sub_fields=[], + sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)], + fold_subdoc=True, + ) + ] + + +def descrpt_se_atten_common_args(): + doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\ + - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + doc_rcut = "The cut-off radius." + doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = ( + doc_only_tf_supported + 'Whether to use a "Timestep" in the skip connection' + ) + doc_type_one_side = ( + doc_only_tf_supported + + r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." + ) + doc_precision = ( + doc_only_tf_supported + + f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + ) + doc_trainable = ( + doc_only_tf_supported + "If the parameters in the embedding net is trainable" + ) + doc_seed = "Random seed for parameter initialization" + doc_exclude_types = ( + doc_only_tf_supported + + "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + ) + doc_attn = "The length of hidden vectors in attention layers" + doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True" + doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates" + doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix" + + return [ + Argument( + "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel + ), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), + Argument( + "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + Argument( + "axis_neuron", + int, + optional=True, + default=4, + alias=["n_axis_neuron"], + doc=doc_axis_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument( + "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "exclude_types", + List[List[int]], + optional=True, + default=[], + doc=doc_exclude_types, + ), + Argument("attn", int, optional=True, default=128, doc=doc_attn), + Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer), + Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr), + Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask), + ] + + +@descrpt_args_plugin.register("se_atten", alias=["dpa1"]) +def descrpt_se_atten_args(): + doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible." + doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True." + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" + doc_tebd_dim = "The dimension of atom type embedding." + doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)." + doc_scaling_factor = ( + "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). " + "If `temperature` is None, the scaling of attention weights is (N_hidden_dim * scaling_factor)**0.5. " + "Else, the scaling of attention weights is setting to `temperature`." + ) + doc_normalize = ( + "Whether to normalize the hidden vectors during attention calculation." + ) + doc_concat_output_tebd = ( + "Whether to concat type embedding at the output of the descriptor." + ) + doc_deprecated = "This feature will be removed in a future release." + + return [ + *descrpt_se_atten_common_args(), + Argument( + "stripped_type_embedding", + bool, + optional=True, + default=False, + doc=doc_only_tf_supported + doc_stripped_type_embedding, + ), + Argument( + "smooth_type_embdding", + bool, + optional=True, + default=False, + doc=doc_only_tf_supported + doc_smooth_type_embdding, + ), + Argument( + "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero + ), + # pt only + Argument( + "tebd_dim", + int, + optional=True, + default=8, + doc=doc_only_pt_supported + doc_tebd_dim, + ), + Argument( + "tebd_input_mode", + str, + optional=True, + default="concat", + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "post_ln", + bool, + optional=True, + default=True, + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "ffn", + bool, + optional=True, + default=False, + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "ffn_embed_dim", + int, + optional=True, + default=1024, + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "scaling_factor", + float, + optional=True, + default=1.0, + doc=doc_only_pt_supported + doc_scaling_factor, + ), + Argument( + "head_num", + int, + optional=True, + default=1, + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "normalize", + bool, + optional=True, + default=True, + doc=doc_only_pt_supported + doc_normalize, + ), + Argument( + "temperature", + float, + optional=True, + doc=doc_only_pt_supported + doc_temperature, + ), + Argument( + "return_rot", + bool, + optional=True, + default=False, + doc=doc_only_pt_supported + doc_deprecated, + ), + Argument( + "concat_output_tebd", + bool, + optional=True, + default=True, + doc=doc_only_pt_supported + doc_concat_output_tebd, + ), + ] + + +@descrpt_args_plugin.register("se_atten_v2", doc=doc_only_tf_supported) +def descrpt_se_atten_v2_args(): + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" + + return [ + *descrpt_se_atten_common_args(), + Argument( + "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero + ), + ] + + +@descrpt_args_plugin.register("dpa2", doc=doc_only_pt_supported) +def descrpt_dpa2_args(): + # Generate by GitHub Copilot + doc_repinit_rcut = "The cut-off radius of the repinit block" + doc_repinit_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repinit block." + doc_repinit_nsel = "Maximally possible number of neighbors for repinit block." + doc_repformer_rcut = "The cut-off radius of the repformer block" + doc_repformer_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repformer block." + doc_repformer_nsel = "Maximally possible number of neighbors for repformer block." + doc_tebd_dim = "The dimension of atom type embedding" + doc_concat_output_tebd = ( + "Whether to concat type embedding at the output of the descriptor." + ) + doc_repinit_neuron = "repinit block: the number of neurons in the embedding net." + doc_repinit_axis_neuron = ( + "repinit block: the number of dimension of split in the symmetrization op." + ) + doc_repinit_activation = ( + "repinit block: the activation function in the embedding net" + ) + doc_repformer_nlayers = "repformers block: the number of repformer layers" + doc_repformer_g1_dim = "repformers block: the dimension of single-atom rep" + doc_repformer_g2_dim = "repformers block: the dimension of invariant pair-atom rep" + doc_repformer_axis_dim = ( + "repformers block: the number of dimension of split in the symmetrization ops." + ) + doc_repformer_do_bn_mode = "repformers block: do batch norm in the repformer layers" + doc_repformer_bn_momentum = "repformers block: moment in the batch normalization" + doc_repformer_update_g1_has_conv = ( + "repformers block: update the g1 rep with convolution term" + ) + doc_repformer_update_g1_has_drrd = ( + "repformers block: update the g1 rep with the drrd term" + ) + doc_repformer_update_g1_has_grrg = ( + "repformers block: update the g1 rep with the grrg term" + ) + doc_repformer_update_g1_has_attn = ( + "repformers block: update the g1 rep with the localized self-attention" + ) + doc_repformer_update_g2_has_g1g1 = ( + "repformers block: update the g2 rep with the g1xg1 term" + ) + doc_repformer_update_g2_has_attn = ( + "repformers block: update the g2 rep with the gated self-attention" + ) + doc_repformer_update_h2 = "repformers block: update the h2 rep" + doc_repformer_attn1_hidden = ( + "repformers block: the hidden dimension of localized self-attention" + ) + doc_repformer_attn1_nhead = ( + "repformers block: the number of heads in localized self-attention" + ) + doc_repformer_attn2_hidden = ( + "repformers block: the hidden dimension of gated self-attention" + ) + doc_repformer_attn2_nhead = ( + "repformers block: the number of heads in gated self-attention" + ) + doc_repformer_attn2_has_gate = ( + "repformers block: has gate in the gated self-attention" + ) + doc_repformer_activation = "repformers block: the activation function in the MLPs." + doc_repformer_update_style = "repformers block: style of update a rep. can be res_avg or res_incr. res_avg updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) res_incr updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n)" + doc_repformer_set_davg_zero = "repformers block: set the avg to zero in statistics" + doc_repformer_add_type_ebd_to_seq = ( + "repformers block: concatenate the type embedding at the output" + ) + return [ + Argument("repinit_rcut", float, doc=doc_repinit_rcut), + Argument("repinit_rcut_smth", float, doc=doc_repinit_rcut_smth), + Argument("repinit_nsel", int, doc=doc_repinit_nsel), + Argument("repformer_rcut", float, doc=doc_repformer_rcut), + Argument("repformer_rcut_smth", float, doc=doc_repformer_rcut_smth), + Argument("repformer_nsel", int, doc=doc_repformer_nsel), + Argument("tebd_dim", int, optional=True, default=8, doc=doc_tebd_dim), + Argument( + "concat_output_tebd", + bool, + optional=True, + default=True, + doc=doc_concat_output_tebd, + ), + Argument( + "repinit_neuron", + list, + optional=True, + default=[25, 50, 100], + doc=doc_repinit_neuron, + ), + Argument( + "repinit_axis_neuron", + int, + optional=True, + default=16, + doc=doc_repinit_axis_neuron, + ), + Argument("repinit_set_davg_zero", bool, optional=True, default=True), + Argument( + "repinit_activation", + str, + optional=True, + default="tanh", + doc=doc_repinit_activation, + ), + Argument( + "repformer_nlayers", + int, + optional=True, + default=3, + doc=doc_repformer_nlayers, + ), + Argument( + "repformer_g1_dim", + int, + optional=True, + default=128, + doc=doc_repformer_g1_dim, + ), + Argument( + "repformer_g2_dim", int, optional=True, default=16, doc=doc_repformer_g2_dim + ), + Argument( + "repformer_axis_dim", + int, + optional=True, + default=4, + doc=doc_repformer_axis_dim, + ), + Argument( + "repformer_do_bn_mode", + str, + optional=True, + default="no", + doc=doc_repformer_do_bn_mode, + ), + Argument( + "repformer_bn_momentum", + float, + optional=True, + default=0.1, + doc=doc_repformer_bn_momentum, + ), + Argument( + "repformer_update_g1_has_conv", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_conv, + ), + Argument( + "repformer_update_g1_has_drrd", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_drrd, + ), + Argument( + "repformer_update_g1_has_grrg", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_grrg, + ), + Argument( + "repformer_update_g1_has_attn", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g1_has_attn, + ), + Argument( + "repformer_update_g2_has_g1g1", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g2_has_g1g1, + ), + Argument( + "repformer_update_g2_has_attn", + bool, + optional=True, + default=True, + doc=doc_repformer_update_g2_has_attn, + ), + Argument( + "repformer_update_h2", + bool, + optional=True, + default=False, + doc=doc_repformer_update_h2, + ), + Argument( + "repformer_attn1_hidden", + int, + optional=True, + default=64, + doc=doc_repformer_attn1_hidden, + ), + Argument( + "repformer_attn1_nhead", + int, + optional=True, + default=4, + doc=doc_repformer_attn1_nhead, + ), + Argument( + "repformer_attn2_hidden", + int, + optional=True, + default=16, + doc=doc_repformer_attn2_hidden, + ), + Argument( + "repformer_attn2_nhead", + int, + optional=True, + default=4, + doc=doc_repformer_attn2_nhead, + ), + Argument( + "repformer_attn2_has_gate", + bool, + optional=True, + default=False, + doc=doc_repformer_attn2_has_gate, + ), + Argument( + "repformer_activation", + str, + optional=True, + default="tanh", + doc=doc_repformer_activation, + ), + Argument( + "repformer_update_style", + str, + optional=True, + default="res_avg", + doc=doc_repformer_update_style, + ), + Argument( + "repformer_set_davg_zero", + bool, + optional=True, + default=True, + doc=doc_repformer_set_davg_zero, + ), + Argument( + "repformer_add_type_ebd_to_seq", + bool, + optional=True, + default=False, + doc=doc_repformer_add_type_ebd_to_seq, + ), + ] + + +@descrpt_args_plugin.register( + "se_a_ebd_v2", alias=["se_a_tpe_v2"], doc=doc_only_tf_supported +) +def descrpt_se_a_ebd_v2_args(): + return descrpt_se_a_args() + + +@descrpt_args_plugin.register("se_a_mask", doc=doc_only_tf_supported) +def descrpt_se_a_mask_args(): + doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + + doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." + doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." + doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." + doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_trainable = "If the parameters in the embedding net is trainable" + doc_seed = "Random seed for parameter initialization" + + return [ + Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), + Argument( + "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + Argument( + "axis_neuron", + int, + optional=True, + default=4, + alias=["n_axis_neuron"], + doc=doc_axis_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), + Argument( + "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side + ), + Argument( + "exclude_types", + List[List[int]], + optional=True, + default=[], + doc=doc_exclude_types, + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), + Argument("seed", [int, None], optional=True, doc=doc_seed), + ] + + +def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant: + link_lf = make_link("loc_frame", "model/descriptor[loc_frame]") + link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]") + link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]") + link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]") + link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]") + link_hybrid = make_link("hybrid", "model/descriptor[hybrid]") + link_se_atten = make_link("se_atten", "model/descriptor[se_atten]") + link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]") + doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\ +- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\ +- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\ +- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\ +- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\ +- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\ +- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\ +- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\ +- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\ +- `hybrid`: Concatenate of a list of descriptors as a new descriptor." + + return Variant( + "type", + descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid), + doc=doc_descrpt_type, + ) + + +# --- Fitting net configurations: --- # +fitting_args_plugin = ArgsPlugin() + + +@fitting_args_plugin.register("ener") +def fitting_ener(): + doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." + doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\ +- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ +- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1." + doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." + doc_seed = "Random seed for parameter initialization of the fitting net" + doc_atom_ener = "Specify the atomic energy in vacuum for each type" + doc_layer_name = ( + "The name of the each layer. The length of this list should be equal to n_neuron + 1. " + "If two layers, either in the same fitting or different fittings, " + "have the same name, they will share the same neural network parameters. " + "The shape of these layers should be the same. " + "If null is given for a layer, parameters will not be shared." + ) + doc_use_aparam_as_mask = ( + "Whether to use the aparam as a mask in input." + "If True, the aparam will not be used in fitting net for embedding." + "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True." + ) + + return [ + Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), + Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "neuron", + List[int], + optional=True, + default=[120, 120, 120], + alias=["n_neuron"], + doc=doc_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), + Argument( + "trainable", + [List[bool], bool], + optional=True, + default=True, + doc=doc_trainable, + ), + Argument( + "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "atom_ener", + List[Optional[float]], + optional=True, + default=[], + doc=doc_atom_ener, + ), + Argument("layer_name", List[str], optional=True, doc=doc_layer_name), + Argument( + "use_aparam_as_mask", + bool, + optional=True, + default=False, + doc=doc_use_aparam_as_mask, + ), + ] + + +@fitting_args_plugin.register("dos", doc=doc_only_tf_supported) +def fitting_dos(): + doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." + doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\ +- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ +- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1." + doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." + doc_seed = "Random seed for parameter initialization of the fitting net" + doc_numb_dos = ( + "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)" + ) + + return [ + Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), + Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("precision", str, optional=True, default="float64", doc=doc_precision), + Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), + Argument( + "trainable", + [List[bool], bool], + optional=True, + default=True, + doc=doc_trainable, + ), + Argument( + "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos), + ] + + +@fitting_args_plugin.register("polar") +def fitting_polar(): + doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``" + # doc_diag_shift = 'The diagonal part of the polarizability matrix will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.' + doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix." + doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected." + doc_seed = "Random seed for parameter initialization of the fitting net" + + # YWolfeee: user can decide whether to use shift diag + doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true." + + return [ + Argument( + "neuron", + List[int], + optional=True, + default=[120, 120, 120], + alias=["n_neuron"], + doc=doc_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag), + Argument( + "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale + ), + # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift), + Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag), + Argument( + "sel_type", + [List[int], int, None], + optional=True, + alias=["pol_type"], + doc=doc_sel_type + doc_only_tf_supported, + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + ] + + +# def fitting_global_polar(): +# return fitting_polar() + + +@fitting_args_plugin.register("dipole") +def fitting_dipole(): + doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected." + doc_seed = "Random seed for parameter initialization of the fitting net" + return [ + Argument( + "neuron", + List[int], + optional=True, + default=[120, 120, 120], + alias=["n_neuron"], + doc=doc_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument( + "sel_type", + [List[int], int, None], + optional=True, + alias=["dipole_type"], + doc=doc_sel_type + doc_only_tf_supported, + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + ] + + +# YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support. +def fitting_variant_type_args(): + doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\ +- `ener`: Fit an energy model (potential energy surface).\n\n\ +- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\ +- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\ +- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n" + + return Variant( + "type", + fitting_args_plugin.get_all_argument(), + optional=True, + default_tag="ener", + doc=doc_descrpt_type, + ) + + +# --- Modifier configurations: --- # +def modifier_dipole_charge(): + doc_model_name = "The name of the frozen dipole model file." + doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. " + doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}" + doc_ewald_h = "The grid spacing of the FFT grid. Unit is A" + doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}" + + return [ + Argument("model_name", str, optional=False, doc=doc_model_name), + Argument( + "model_charge_map", List[float], optional=False, doc=doc_model_charge_map + ), + Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map), + Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta), + Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h), + ] + + +def modifier_variant_type_args(): + doc_modifier_type = "The type of modifier. See explanation below.\n\n\ +-`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction" + return Variant( + "type", + [ + Argument("dipole_charge", dict, modifier_dipole_charge()), + ], + optional=False, + doc=doc_modifier_type, + ) + + +# --- model compression configurations: --- # +def model_compression(): + doc_model_file = "The input model file, which will be compressed by the DeePMD-kit." + doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)." + doc_min_nbor_dist = ( + "The nearest distance between neighbor atoms saved in the frozen model." + ) + + return [ + Argument("model_file", str, optional=False, doc=doc_model_file), + Argument("table_config", List[float], optional=False, doc=doc_table_config), + Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist), + ] + + +# --- model compression configurations: --- # +def model_compression_type_args(): + doc_compress_type = "The type of model compression, which should be consistent with the descriptor type." + + return Variant( + "type", + [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])], + optional=True, + default_tag="se_e2_a", + doc=doc_compress_type, + ) + + +def model_args(exclude_hybrid=False): + doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect." + doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics." + doc_data_stat_protect = "Protect parameter for atomic energy regression." + doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias." + doc_type_embedding = "The type embedding." + doc_modifier = "The modifier of model output." + doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly." + doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided." + doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided." + doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided." + doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided." + doc_compress_config = "Model compression configurations" + doc_spin = "The settings for systems with spin." + doc_atom_exclude_types = "Exclude the atomic contribution of the listed atom types" + doc_pair_exclude_types = "The atom pairs of the listed types are not treated to be neighbors, i.e. they do not see each other." + + hybrid_models = [] + if not exclude_hybrid: + hybrid_models.extend( + [ + pairwise_dprc(), + linear_ener_model_args(), + ] + ) + return Argument( + "model", + dict, + [ + Argument("type_map", List[str], optional=True, doc=doc_type_map), + Argument( + "data_stat_nbatch", + int, + optional=True, + default=10, + doc=doc_data_stat_nbatch, + ), + Argument( + "data_stat_protect", + float, + optional=True, + default=1e-2, + doc=doc_data_stat_protect, + ), + Argument( + "data_bias_nsample", + int, + optional=True, + default=10, + doc=doc_data_bias_nsample, + ), + Argument( + "use_srtab", + str, + optional=True, + doc=doc_only_tf_supported + doc_use_srtab, + ), + Argument( + "smin_alpha", + float, + optional=True, + doc=doc_only_tf_supported + doc_smin_alpha, + ), + Argument( + "sw_rmin", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmin + ), + Argument( + "sw_rmax", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmax + ), + Argument( + "pair_exclude_types", + list, + optional=True, + default=[], + doc=doc_only_pt_supported + doc_pair_exclude_types, + ), + Argument( + "atom_exclude_types", + list, + optional=True, + default=[], + doc=doc_only_pt_supported + doc_atom_exclude_types, + ), + Argument( + "srtab_add_bias", + bool, + optional=True, + default=True, + doc=doc_only_tf_supported + doc_srtab_add_bias, + ), + Argument( + "type_embedding", + dict, + type_embedding_args(), + [], + optional=True, + doc=doc_only_tf_supported + doc_type_embedding, + ), + Argument( + "modifier", + dict, + [], + [modifier_variant_type_args()], + optional=True, + doc=doc_only_tf_supported + doc_modifier, + ), + Argument( + "compress", + dict, + [], + [model_compression_type_args()], + optional=True, + doc=doc_only_tf_supported + doc_compress_config, + fold_subdoc=True, + ), + Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin), + ], + [ + Variant( + "type", + [ + standard_model_args(), + multi_model_args(), + frozen_model_args(), + pairtab_model_args(), + *hybrid_models, + ], + optional=True, + default_tag="standard", + ), + ], + ) + + +def standard_model_args() -> Argument: + doc_descrpt = "The descriptor of atomic environment." + doc_fitting = "The fitting of physical properties." + + ca = Argument( + "standard", + dict, + [ + Argument( + "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt + ), + Argument( + "fitting_net", + dict, + [], + [fitting_variant_type_args()], + doc=doc_fitting, + ), + ], + doc="Stardard model, which contains a descriptor and a fitting.", + ) + return ca + + +def multi_model_args() -> Argument: + doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details." + doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`." + + ca = Argument( + "multi", + dict, + [ + Argument( + "descriptor", + dict, + [], + [descrpt_variant_type_args()], + doc=doc_descrpt, + fold_subdoc=True, + ), + Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict), + ], + doc=doc_only_tf_supported + "Multiple-task model.", + ) + return ca + + +def pairwise_dprc() -> Argument: + qm_model_args = model_args(exclude_hybrid=True) + qm_model_args.name = "qm_model" + qm_model_args.fold_subdoc = True + qmmm_model_args = model_args(exclude_hybrid=True) + qmmm_model_args.name = "qmmm_model" + qmmm_model_args.fold_subdoc = True + ca = Argument( + "pairwise_dprc", + dict, + [ + qm_model_args, + qmmm_model_args, + ], + doc=doc_only_tf_supported, + ) + return ca + + +def frozen_model_args() -> Argument: + doc_model_file = "Path to the frozen model file." + ca = Argument( + "frozen", + dict, + [ + Argument("model_file", str, optional=False, doc=doc_model_file), + ], + ) + return ca + + +def pairtab_model_args() -> Argument: + doc_tab_file = "Path to the tabulation file." + doc_rcut = "The cut-off radius." + doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\ + - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\ + - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\ + - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' + ca = Argument( + "pairtab", + dict, + [ + Argument("tab_file", str, optional=False, doc=doc_tab_file), + Argument("rcut", float, optional=False, doc=doc_rcut), + Argument("sel", [int, List[int], str], optional=False, doc=doc_sel), + ], + doc=doc_only_tf_supported + "Pairwise tabulation energy model.", + ) + return ca + + +def linear_ener_model_args() -> Argument: + doc_weights = ( + "If the type is list of float, a list of weights for each model. " + 'If "mean", the weights are set to be 1 / len(models). ' + 'If "sum", the weights are set to be 1.' + ) + models_args = model_args(exclude_hybrid=True) + models_args.name = "models" + models_args.fold_subdoc = True + models_args.set_dtype(list) + models_args.set_repeat(True) + models_args.doc = "The sub-models." + ca = Argument( + "linear_ener", + dict, + [ + models_args, + Argument( + "weights", + [list, str], + optional=False, + doc=doc_weights, + ), + ], + doc=doc_only_tf_supported, + ) + return ca + + +# --- Learning rate configurations: --- # +def learning_rate_exp(): + doc_start_lr = "The learning rate at the start of the training." + doc_stop_lr = ( + "The desired learning rate at the end of the training. " + f"When decay_rate {doc_only_pt_supported}is explicitly set, " + "this value will serve as the minimum learning rate during training. " + "In other words, if the learning rate decays below stop_lr, stop_lr will be applied instead." + ) + doc_decay_steps = ( + "The learning rate is decaying every this number of training steps." + ) + doc_decay_rate = ( + "The decay rate for the learning rate. " + "If this is provided, it will be used directly as the decay rate for learning rate " + "instead of calculating it through interpolation between start_lr and stop_lr." + ) + + args = [ + Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr), + Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr), + Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps), + Argument( + "decay_rate", + float, + optional=True, + default=None, + doc=doc_only_pt_supported + doc_decay_rate, + ), + ] + return args + + +def learning_rate_variant_type_args(): + doc_lr = "The type of the learning rate." + + return Variant( + "type", + [Argument("exp", dict, learning_rate_exp())], + optional=True, + default_tag="exp", + doc=doc_lr, + ) + + +def learning_rate_args(): + doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`." + doc_lr = "The definitio of learning rate" + return Argument( + "learning_rate", + dict, + [ + Argument( + "scale_by_worker", + str, + optional=True, + default="linear", + doc=doc_scale_by_worker, + ) + ], + [learning_rate_variant_type_args()], + optional=True, + doc=doc_lr, + ) + + +def learning_rate_dict_args(): + doc_learning_rate_dict = ( + "The dictionary of definitions of learning rates in multi-task mode. " + "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n" + ) + ca = Argument( + "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict + ) + return ca + + +# --- Loss configurations: --- # +def start_pref(item, label=None, abbr=None): + if label is None: + label = item + if abbr is None: + abbr = item + return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored." + + +def limit_pref(item): + return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity." + + +loss_args_plugin = ArgsPlugin() + + +@loss_args_plugin.register("ener") +def loss_ener(): + doc_start_pref_e = start_pref("energy", abbr="e") + doc_limit_pref_e = limit_pref("energy") + doc_start_pref_f = start_pref("force", abbr="f") + doc_limit_pref_f = limit_pref("force") + doc_start_pref_v = start_pref("virial", abbr="v") + doc_limit_pref_v = limit_pref("virial") + doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae") + doc_limit_pref_ae = limit_pref("atomic energy") + doc_start_pref_pf = start_pref( + "atomic prefactor force", label="atom_pref", abbr="pf" + ) + doc_limit_pref_pf = limit_pref("atomic prefactor force") + doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf") + doc_limit_pref_gf = limit_pref("generalized force") + doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used." + doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label." + doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1." + return [ + Argument( + "start_pref_e", + [float, int], + optional=True, + default=0.02, + doc=doc_start_pref_e, + ), + Argument( + "limit_pref_e", + [float, int], + optional=True, + default=1.00, + doc=doc_limit_pref_e, + ), + Argument( + "start_pref_f", + [float, int], + optional=True, + default=1000, + doc=doc_start_pref_f, + ), + Argument( + "limit_pref_f", + [float, int], + optional=True, + default=1.00, + doc=doc_limit_pref_f, + ), + Argument( + "start_pref_v", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_v, + ), + Argument( + "limit_pref_v", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_v, + ), + Argument( + "start_pref_ae", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_ae, + ), + Argument( + "limit_pref_ae", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_ae, + ), + Argument( + "start_pref_pf", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_pf, + ), + Argument( + "limit_pref_pf", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_pf, + ), + Argument("relative_f", [float, None], optional=True, doc=doc_relative_f), + Argument( + "enable_atom_ener_coeff", + [bool], + optional=True, + default=False, + doc=doc_enable_atom_ener_coeff, + ), + Argument( + "start_pref_gf", + float, + optional=True, + default=0.0, + doc=doc_start_pref_gf, + ), + Argument( + "limit_pref_gf", + float, + optional=True, + default=0.0, + doc=doc_limit_pref_gf, + ), + Argument( + "numb_generalized_coord", + int, + optional=True, + default=0, + doc=doc_numb_generalized_coord, + ), + ] + + +@loss_args_plugin.register("ener_spin") +def loss_ener_spin(): + doc_start_pref_e = start_pref("energy") + doc_limit_pref_e = limit_pref("energy") + doc_start_pref_fr = start_pref("force_real_atom") + doc_limit_pref_fr = limit_pref("force_real_atom") + doc_start_pref_fm = start_pref("force_magnetic") + doc_limit_pref_fm = limit_pref("force_magnetic") + doc_start_pref_v = start_pref("virial") + doc_limit_pref_v = limit_pref("virial") + doc_start_pref_ae = start_pref("atom_ener") + doc_limit_pref_ae = limit_pref("atom_ener") + doc_start_pref_pf = start_pref("atom_pref") + doc_limit_pref_pf = limit_pref("atom_pref") + doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label." + doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1." + return [ + Argument( + "start_pref_e", + [float, int], + optional=True, + default=0.02, + doc=doc_start_pref_e, + ), + Argument( + "limit_pref_e", + [float, int], + optional=True, + default=1.00, + doc=doc_limit_pref_e, + ), + Argument( + "start_pref_fr", + [float, int], + optional=True, + default=1000, + doc=doc_start_pref_fr, + ), + Argument( + "limit_pref_fr", + [float, int], + optional=True, + default=1.00, + doc=doc_limit_pref_fr, + ), + Argument( + "start_pref_fm", + [float, int], + optional=True, + default=10000, + doc=doc_start_pref_fm, + ), + Argument( + "limit_pref_fm", + [float, int], + optional=True, + default=10.0, + doc=doc_limit_pref_fm, + ), + Argument( + "start_pref_v", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_v, + ), + Argument( + "limit_pref_v", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_v, + ), + Argument( + "start_pref_ae", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_ae, + ), + Argument( + "limit_pref_ae", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_ae, + ), + Argument( + "start_pref_pf", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_pf, + ), + Argument( + "limit_pref_pf", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_pf, + ), + Argument("relative_f", [float, None], optional=True, doc=doc_relative_f), + Argument( + "enable_atom_ener_coeff", + [bool], + optional=True, + default=False, + doc=doc_enable_atom_ener_coeff, + ), + ] + + +@loss_args_plugin.register("dos", doc=doc_only_tf_supported) +def loss_dos(): + doc_start_pref_dos = start_pref("Density of State (DOS)") + doc_limit_pref_dos = limit_pref("Density of State (DOS)") + doc_start_pref_cdf = start_pref( + "Cumulative Distribution Function (cumulative intergral of DOS)" + ) + doc_limit_pref_cdf = limit_pref( + "Cumulative Distribution Function (cumulative intergral of DOS)" + ) + doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)") + doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)") + doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS") + doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS") + return [ + Argument( + "start_pref_dos", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_dos, + ), + Argument( + "limit_pref_dos", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_dos, + ), + Argument( + "start_pref_cdf", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_cdf, + ), + Argument( + "limit_pref_cdf", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_cdf, + ), + Argument( + "start_pref_ados", + [float, int], + optional=True, + default=1.00, + doc=doc_start_pref_ados, + ), + Argument( + "limit_pref_ados", + [float, int], + optional=True, + default=1.00, + doc=doc_limit_pref_ados, + ), + Argument( + "start_pref_acdf", + [float, int], + optional=True, + default=0.00, + doc=doc_start_pref_acdf, + ), + Argument( + "limit_pref_acdf", + [float, int], + optional=True, + default=0.00, + doc=doc_limit_pref_acdf, + ), + ] + + +# YWolfeee: Modified to support tensor type of loss args. +@loss_args_plugin.register("tensor", doc=doc_only_tf_supported) +def loss_tensor(): + # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]." + # doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well." + doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included." + doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0." + return [ + Argument( + "pref", [float, int], optional=False, default=None, doc=doc_global_weight + ), + Argument( + "pref_atomic", + [float, int], + optional=False, + default=None, + doc=doc_local_weight, + ), + ] + + +def loss_variant_type_args(): + doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`." + + return Variant( + "type", + loss_args_plugin.get_all_argument(), + optional=True, + default_tag="ener", + doc=doc_loss, + ) + + +def loss_args(): + doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset." + ca = Argument( + "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss + ) + return ca + + +def loss_dict_args(): + doc_loss_dict = ( + "The dictionary of definitions of multiple loss functions in multi-task mode. " + "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n" + ) + ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict) + return ca + + +# --- Training configurations: --- # +def training_data_args(): # ! added by Ziyao: new specification style for data systems. + link_sys = make_link("systems", "training/training_data/systems") + doc_systems = ( + "The data systems for training. " + "This key can be provided with a list that specifies the systems, or be provided with a string " + "by which the prefix of all systems are given and the list of the systems is automatically generated." + ) + doc_set_prefix = f"The prefix of the sets in the {link_sys}." + doc_batch_size = f'This key can be \n\n\ +- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ +- int: all {link_sys} use the same batch size.\n\n\ +- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ +- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\ +- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\ +If MPI is used, the value should be considered as the batch size per task.' + doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ +- "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ +- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ +- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.' + doc_sys_probs = ( + "A list of float if specified. " + "Should be of the same length as `systems`, " + "specifying the probability of each system." + ) + + args = [ + Argument( + "systems", [List[str], str], optional=False, default=".", doc=doc_systems + ), + Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix), + Argument( + "batch_size", + [List[int], int, str], + optional=True, + default="auto", + doc=doc_batch_size, + ), + Argument( + "auto_prob", + str, + optional=True, + default="prob_sys_size", + doc=doc_auto_prob_style, + alias=[ + "auto_prob_style", + ], + ), + Argument( + "sys_probs", + List[float], + optional=True, + default=None, + doc=doc_sys_probs, + alias=["sys_weights"], + ), + ] + + doc_training_data = "Configurations of training data." + return Argument( + "training_data", + dict, + optional=True, + sub_fields=args, + sub_variants=[], + doc=doc_training_data, + ) + + +def validation_data_args(): # ! added by Ziyao: new specification style for data systems. + link_sys = make_link("systems", "training/validation_data/systems") + doc_systems = ( + "The data systems for validation. " + "This key can be provided with a list that specifies the systems, or be provided with a string " + "by which the prefix of all systems are given and the list of the systems is automatically generated." + ) + doc_set_prefix = f"The prefix of the sets in the {link_sys}." + doc_batch_size = f'This key can be \n\n\ +- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ +- int: all {link_sys} use the same batch size.\n\n\ +- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ +- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.' + doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ +- "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ +- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ +- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.' + doc_sys_probs = ( + "A list of float if specified. " + "Should be of the same length as `systems`, " + "specifying the probability of each system." + ) + doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period." + + args = [ + Argument( + "systems", [List[str], str], optional=False, default=".", doc=doc_systems + ), + Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix), + Argument( + "batch_size", + [List[int], int, str], + optional=True, + default="auto", + doc=doc_batch_size, + ), + Argument( + "auto_prob", + str, + optional=True, + default="prob_sys_size", + doc=doc_auto_prob_style, + alias=[ + "auto_prob_style", + ], + ), + Argument( + "sys_probs", + List[float], + optional=True, + default=None, + doc=doc_sys_probs, + alias=["sys_weights"], + ), + Argument( + "numb_btch", + int, + optional=True, + default=1, + doc=doc_numb_btch, + alias=[ + "numb_batch", + ], + ), + ] + + doc_validation_data = ( + "Configurations of validation data. Similar to that of training data, " + "except that a `numb_btch` argument may be configured" + ) + return Argument( + "validation_data", + dict, + optional=True, + default=None, + sub_fields=args, + sub_variants=[], + doc=doc_validation_data, + ) + + +def mixed_precision_args(): # ! added by Denghui. + doc_output_prec = 'The precision for mixed precision params. " \ + "The trainable variables precision during the mixed precision training process, " \ + "supported options are float32 only currently.' + doc_compute_prec = 'The precision for mixed precision compute. " \ + "The compute precision during the mixed precision training process, "" \ + "supported options are float16 and bfloat16 currently.' + + args = [ + Argument( + "output_prec", str, optional=True, default="float32", doc=doc_output_prec + ), + Argument( + "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec + ), + ] + + doc_mixed_precision = "Configurations of mixed precision." + return Argument( + "mixed_precision", + dict, + optional=True, + sub_fields=args, + sub_variants=[], + doc=doc_mixed_precision, + ) + + +def training_args(): # ! modified by Ziyao: data configuration isolated. + doc_numb_steps = "Number of training batch. Each training uses one batch of data." + doc_seed = "The random seed for getting frames from the training data set." + doc_disp_file = "The file for printing learning curve." + doc_disp_freq = "The frequency of printing learning curve." + doc_save_freq = "The frequency of saving check point." + doc_save_ckpt = "The path prefix of saving check point files." + doc_max_ckpt_keep = ( + "The maximum number of checkpoints to keep. " + "The oldest checkpoints will be deleted once the number of checkpoints exceeds max_ckpt_keep. " + "Defaults to 5." + ) + doc_disp_training = "Displaying verbose information during training." + doc_time_training = "Timing durining training." + doc_profiling = "Profiling during training." + doc_profiling_file = "Output file for profiling." + doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) or PyTorch Profiler to analyze performance. The log will be saved to `tensorboard_log_dir`." + doc_tensorboard = "Enable tensorboard" + doc_tensorboard_log_dir = "The log directory of tensorboard outputs" + doc_tensorboard_freq = "The frequency of writing tensorboard events." + doc_data_dict = ( + "The dictionary of multi DataSystems in multi-task mode. " + "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, " + "contains training data and optional validation data definitions." + ) + doc_fitting_weight = ( + "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, " + "is the training weight of fitting net `fitting_key`. " + "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. " + "Weights will be normalized and minus ones will be ignored. " + "If not set, each fitting net will be equally selected when training." + ) + doc_warmup_steps = ( + "The number of steps for learning rate warmup. During warmup, " + "the learning rate begins at zero and progressively increases linearly to `start_lr`, " + "rather than starting directly from `start_lr`" + ) + doc_gradient_max_norm = ( + "Clips the gradient norm to a maximum value. " + "If the gradient norm exceeds this value, it will be clipped to this limit. " + "No gradient clipping will occur if set to 0." + ) + doc_stat_file = ( + "The file path for saving the data statistics results. " + "If set, the results will be saved and directly loaded during the next training session, " + "avoiding the need to recalculate the statistics" + ) + doc_opt_type = "The type of optimizer to use." + doc_kf_blocksize = "The blocksize for the Kalman filter." + + arg_training_data = training_data_args() + arg_validation_data = validation_data_args() + mixed_precision_data = mixed_precision_args() + + args = [ + arg_training_data, + arg_validation_data, + mixed_precision_data, + Argument( + "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"] + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file + ), + Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq), + Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq), + Argument( + "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt + ), + Argument("max_ckpt_keep", int, optional=True, default=5, doc=doc_max_ckpt_keep), + Argument( + "disp_training", bool, optional=True, default=True, doc=doc_disp_training + ), + Argument( + "time_training", bool, optional=True, default=True, doc=doc_time_training + ), + Argument( + "profiling", + bool, + optional=True, + default=False, + doc=doc_only_tf_supported + doc_profiling, + ), + Argument( + "profiling_file", + str, + optional=True, + default="timeline.json", + doc=doc_only_tf_supported + doc_profiling_file, + ), + Argument( + "enable_profiler", + bool, + optional=True, + default=False, + doc=doc_enable_profiler, + ), + Argument( + "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard + ), + Argument( + "tensorboard_log_dir", + str, + optional=True, + default="log", + doc=doc_tensorboard_log_dir, + ), + Argument( + "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq + ), + Argument("data_dict", dict, optional=True, doc=doc_data_dict), + Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight), + Argument( + "warmup_steps", + int, + optional=True, + doc=doc_only_pt_supported + doc_warmup_steps, + ), + Argument( + "gradient_max_norm", + float, + optional=True, + doc=doc_only_pt_supported + doc_gradient_max_norm, + ), + Argument( + "stat_file", str, optional=True, doc=doc_only_pt_supported + doc_stat_file + ), + ] + variants = [ + Variant( + "opt_type", + choices=[ + Argument("Adam", dict, [], [], optional=True), + Argument( + "LKF", + dict, + [ + Argument( + "kf_blocksize", + int, + optional=True, + doc=doc_only_pt_supported + doc_kf_blocksize, + ), + ], + [], + optional=True, + ), + ], + optional=True, + default_tag="Adam", + doc=doc_only_pt_supported + doc_opt_type, + ) + ] + + doc_training = "The training options." + return Argument("training", dict, args, variants, doc=doc_training) + + +def make_index(keys): + ret = [] + for ii in keys: + ret.append(make_link(ii, ii)) + return ", ".join(ret) + + +def gen_doc(*, make_anchor=True, make_link=True, **kwargs): + if make_link: + make_anchor = True + ptr = [] + for ii in gen_args(): + ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs)) + + key_words = [] + for ii in "\n\n".join(ptr).split("\n"): + if "argument path" in ii: + key_words.append(ii.split(":")[1].replace("`", "").strip()) + # ptr.insert(0, make_index(key_words)) + + return "\n\n".join(ptr) + + +def gen_json(**kwargs): + return json.dumps( + tuple(gen_args()), + cls=ArgumentEncoder, + ) + + +def gen_args(**kwargs) -> List[Argument]: + return [ + model_args(), + learning_rate_args(), + learning_rate_dict_args(), + loss_args(), + loss_dict_args(), + training_args(), + nvnmd_args(), + ] + + +def normalize_multi_task(data): + # single-task or multi-task mode + if data["model"].get("type", "standard") not in ("standard", "multi"): + return data + single_fitting_net = "fitting_net" in data["model"].keys() + single_training_data = "training_data" in data["training"].keys() + single_valid_data = "validation_data" in data["training"].keys() + single_loss = "loss" in data.keys() + single_learning_rate = "learning_rate" in data.keys() + multi_fitting_net = "fitting_net_dict" in data["model"].keys() + multi_training_data = "data_dict" in data["training"].keys() + multi_loss = "loss_dict" in data.keys() + multi_fitting_weight = "fitting_weight" in data["training"].keys() + multi_learning_rate = "learning_rate_dict" in data.keys() + assert (single_fitting_net == single_training_data) and ( + multi_fitting_net == multi_training_data + ), ( + "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! " + "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' " + "must be defined at the same time! Please check your input script. " + ) + assert not (single_fitting_net and multi_fitting_net), ( + "Single-task mode and multi-task mode can not be performed together. " + "Please check your input script and choose just one format! " + ) + assert ( + single_fitting_net or multi_fitting_net + ), "Please define your fitting net and training data! " + if multi_fitting_net: + assert not single_valid_data, ( + "In multi-task mode, 'training/validation_data' should not appear " + "outside 'training/data_dict'! Please check your input script." + ) + assert ( + not single_loss + ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! " + assert ( + "type_map" in data["model"] + ), "In multi-task mode, 'model/type_map' must be defined! " + data["model"]["type"] = "multi" + data["model"]["fitting_net_dict"] = normalize_fitting_net_dict( + data["model"]["fitting_net_dict"] + ) + data["training"]["data_dict"] = normalize_data_dict( + data["training"]["data_dict"] + ) + data["loss_dict"] = ( + normalize_loss_dict( + data["model"]["fitting_net_dict"].keys(), data["loss_dict"] + ) + if multi_loss + else {} + ) + if multi_learning_rate: + data["learning_rate_dict"] = normalize_learning_rate_dict( + data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"] + ) + elif single_learning_rate: + data["learning_rate_dict"] = ( + normalize_learning_rate_dict_with_single_learning_rate( + data["model"]["fitting_net_dict"].keys(), data["learning_rate"] + ) + ) + fitting_weight = ( + data["training"]["fitting_weight"] if multi_fitting_weight else None + ) + data["training"]["fitting_weight"] = normalize_fitting_weight( + data["model"]["fitting_net_dict"].keys(), + data["training"]["data_dict"].keys(), + fitting_weight=fitting_weight, + ) + else: + assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! " + assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! " + return data + + +def normalize_fitting_net_dict(fitting_net_dict): + new_dict = {} + base = Argument("base", dict, [], [fitting_variant_type_args()], doc="") + for fitting_key_item in fitting_net_dict: + data = base.normalize_value( + fitting_net_dict[fitting_key_item], trim_pattern="_*" + ) + base.check_value(data, strict=True) + new_dict[fitting_key_item] = data + return new_dict + + +def normalize_data_dict(data_dict): + new_dict = {} + base = Argument( + "base", dict, [training_data_args(), validation_data_args()], [], doc="" + ) + for data_system_key_item in data_dict: + data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*") + base.check_value(data, strict=True) + new_dict[data_system_key_item] = data + return new_dict + + +def normalize_loss_dict(fitting_keys, loss_dict): + # check the loss dict + failed_loss_keys = [item for item in loss_dict if item not in fitting_keys] + assert not failed_loss_keys, f"Loss dict key(s) {failed_loss_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! " + new_dict = {} + base = Argument("base", dict, [], [loss_variant_type_args()], doc="") + for item in loss_dict: + data = base.normalize_value(loss_dict[item], trim_pattern="_*") + base.check_value(data, strict=True) + new_dict[item] = data + return new_dict + + +def normalize_learning_rate_dict(fitting_keys, learning_rate_dict): + # check the learning_rate dict + failed_learning_rate_keys = [ + item for item in learning_rate_dict if item not in fitting_keys + ] + assert not failed_learning_rate_keys, f"Learning rate dict key(s) {failed_learning_rate_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! " + new_dict = {} + base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="") + for item in learning_rate_dict: + data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*") + base.check_value(data, strict=True) + new_dict[item] = data + return new_dict + + +def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate): + new_dict = {} + base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="") + data = base.normalize_value(learning_rate, trim_pattern="_*") + base.check_value(data, strict=True) + for fitting_key in fitting_keys: + new_dict[fitting_key] = data + return new_dict + + +def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None): + # check the mapping + failed_data_keys = [item for item in data_keys if item not in fitting_keys] + assert not failed_data_keys, f"Data dict key(s) {failed_data_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! " + empty_fitting_keys = [] + valid_fitting_keys = [] + for item in fitting_keys: + if item not in data_keys: + empty_fitting_keys.append(item) + else: + valid_fitting_keys.append(item) + if empty_fitting_keys: + log.warning( + f"Fitting net(s) {empty_fitting_keys!s} have no data and will not be used in training." + ) + num_pair = len(valid_fitting_keys) + assert num_pair > 0, "No valid training data systems for fitting nets!" + + # check and normalize the fitting weight + new_weight = {} + if fitting_weight is None: + equal_weight = 1.0 / num_pair + for item in fitting_keys: + new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0 + else: + failed_weight_keys = [ + item for item in fitting_weight if item not in fitting_keys + ] + assert not failed_weight_keys, f"Fitting weight key(s) {failed_weight_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! " + sum_prob = 0.0 + for item in fitting_keys: + if item in valid_fitting_keys: + if ( + item in fitting_weight + and isinstance(fitting_weight[item], (int, float)) + and fitting_weight[item] > 0.0 + ): + sum_prob += fitting_weight[item] + new_weight[item] = fitting_weight[item] + else: + valid_fitting_keys.remove(item) + log.warning( + f"Fitting net '{item}' has zero or invalid weight " + "and will not be used in training." + ) + new_weight[item] = 0.0 + else: + new_weight[item] = 0.0 + assert sum_prob > 0.0, "No valid training weight for fitting nets!" + # normalize + for item in new_weight: + new_weight[item] /= sum_prob + return new_weight + + +def normalize(data): + data = normalize_multi_task(data) + + base = Argument("base", dict, gen_args()) + data = base.normalize_value(data, trim_pattern="_*") + base.check_value(data, strict=True) + + return data + + +if __name__ == "__main__": + gen_doc() diff --git a/deepmd_utils/utils/argcheck_nvnmd.py b/deepmd/utils/argcheck_nvnmd.py similarity index 100% rename from deepmd_utils/utils/argcheck_nvnmd.py rename to deepmd/utils/argcheck_nvnmd.py diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index 863520b3f4..b35d9833d5 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -1,19 +1,212 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from packaging.version import ( - Version, +import logging +import os +from abc import ( + ABC, + abstractmethod, ) - -from deepmd.env import ( - TF_VERSION, - tf, +from typing import ( + Callable, + Tuple, ) + +import numpy as np + from deepmd.utils.errors import ( OutOfMemoryError, ) -from deepmd_utils.utils.batch_size import AutoBatchSize as AutoBatchSizeBase +log = logging.getLogger(__name__) + + +class AutoBatchSize(ABC): + """This class allows DeePMD-kit to automatically decide the maximum + batch size that will not cause an OOM error. + + Notes + ----- + In some CPU environments, the program may be directly killed when OOM. In + this case, by default the batch size will not be increased for CPUs. The + environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size. + + In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`. + + Parameters + ---------- + initial_batch_size : int, default: 1024 + initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE + is not set + factor : float, default: 2. + increased factor + + Attributes + ---------- + current_batch_size : int + current batch size (number of total atoms) + maximum_working_batch_size : int + maximum working batch size + minimal_not_working_batch_size : int + minimal not working batch size + """ -class AutoBatchSize(AutoBatchSizeBase): + def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: + # See also PyTorchLightning/pytorch-lightning#1638 + self.current_batch_size = initial_batch_size + DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0)) + if DP_INFER_BATCH_SIZE > 0: + self.current_batch_size = DP_INFER_BATCH_SIZE + self.maximum_working_batch_size = DP_INFER_BATCH_SIZE + self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1 + else: + self.maximum_working_batch_size = initial_batch_size + if self.is_gpu_available(): + self.minimal_not_working_batch_size = 2**31 + else: + self.minimal_not_working_batch_size = ( + self.maximum_working_batch_size + 1 + ) + log.warning( + "You can use the environment variable DP_INFER_BATCH_SIZE to" + "control the inference batch size (nframes * natoms). " + "The default value is %d." % initial_batch_size + ) + + self.factor = factor + + def execute( + self, callable: Callable, start_index: int, natoms: int + ) -> Tuple[int, tuple]: + """Excuate a method with given batch size. + + Parameters + ---------- + callable : Callable + The method should accept the batch size and start_index as parameters, + and returns executed batch size and data. + start_index : int + start index + natoms : int + natoms + + Returns + ------- + int + executed batch size * number of atoms + tuple + result from callable, None if failing to execute + + Raises + ------ + OutOfMemoryError + OOM when batch size is 1 + """ + if natoms > 0: + batch_nframes = self.current_batch_size // natoms + else: + batch_nframes = self.current_batch_size + try: + n_batch, result = callable(max(batch_nframes, 1), start_index) + except Exception as e: + if not self.is_oom_error(e): + raise e + self.minimal_not_working_batch_size = min( + self.minimal_not_working_batch_size, self.current_batch_size + ) + if self.maximum_working_batch_size >= self.minimal_not_working_batch_size: + self.maximum_working_batch_size = int( + self.minimal_not_working_batch_size / self.factor + ) + if self.minimal_not_working_batch_size <= natoms: + raise OutOfMemoryError( + "The callable still throws an out-of-memory (OOM) error even when batch size is 1!" + ) from e + # adjust the next batch size + self._adjust_batch_size(1.0 / self.factor) + return 0, None + else: + n_tot = n_batch * natoms + self.maximum_working_batch_size = max( + self.maximum_working_batch_size, n_tot + ) + # adjust the next batch size + if ( + n_tot + natoms > self.current_batch_size + and self.current_batch_size * self.factor + < self.minimal_not_working_batch_size + ): + self._adjust_batch_size(self.factor) + return n_batch, result + + def _adjust_batch_size(self, factor: float): + old_batch_size = self.current_batch_size + self.current_batch_size = int(self.current_batch_size * factor) + log.info( + "Adjust batch size from %d to %d" + % (old_batch_size, self.current_batch_size) + ) + + def execute_all( + self, callable: Callable, total_size: int, natoms: int, *args, **kwargs + ) -> Tuple[np.ndarray]: + """Excuate a method with all given data. + + Parameters + ---------- + callable : Callable + The method should accept *args and **kwargs as input and return the similiar array. + total_size : int + Total size + natoms : int + The number of atoms + *args + Variable length argument list. + **kwargs + If 2D np.ndarray, assume the first axis is batch; otherwise do nothing. + """ + + def execute_with_batch_size( + batch_size: int, start_index: int + ) -> Tuple[int, Tuple[np.ndarray]]: + end_index = start_index + batch_size + end_index = min(end_index, total_size) + return (end_index - start_index), callable( + *[ + ( + vv[start_index:end_index] + if isinstance(vv, np.ndarray) and vv.ndim > 1 + else vv + ) + for vv in args + ], + **{ + kk: ( + vv[start_index:end_index] + if isinstance(vv, np.ndarray) and vv.ndim > 1 + else vv + ) + for kk, vv in kwargs.items() + }, + ) + + index = 0 + results = [] + while index < total_size: + n_batch, result = self.execute(execute_with_batch_size, index, natoms) + if not isinstance(result, tuple): + result = (result,) + index += n_batch + if n_batch: + for rr in result: + rr.reshape((n_batch, -1)) + results.append(result) + + r = tuple([np.concatenate(r, axis=0) for r in zip(*results)]) + if len(r) == 1: + # avoid returning tuple if callable doesn't return tuple + r = r[0] + return r + + @abstractmethod def is_gpu_available(self) -> bool: """Check if GPU is available. @@ -22,11 +215,8 @@ def is_gpu_available(self) -> bool: bool True if GPU is available """ - return ( - Version(TF_VERSION) >= Version("1.14") - and tf.config.experimental.get_visible_devices("GPU") - ) or tf.test.is_gpu_available() + @abstractmethod def is_oom_error(self, e: Exception) -> bool: """Check if the exception is an OOM error. @@ -34,7 +224,9 @@ def is_oom_error(self, e: Exception) -> bool: ---------- e : Exception Exception + + Returns + ------- + bool + True if the exception is an OOM error """ - # TODO: it's very slow to catch OOM error; I don't know what TF is doing here - # but luckily we only need to catch once - return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError)) diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py index 91bf4021ee..5f9c14e6d8 100644 --- a/deepmd/utils/compat.py +++ b/deepmd/utils/compat.py @@ -1,15 +1,392 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.compat import ( - convert_input_v0_v1, - convert_input_v1_v2, - deprecate_numb_test, - update_deepmd_input, +"""Module providing compatibility between `0.x.x` and `1.x.x` input versions.""" + +import json +import warnings +from pathlib import ( + Path, +) +from typing import ( + Any, + Dict, + Optional, + Sequence, + Union, +) + +import numpy as np + +from deepmd.common import ( + j_must_have, ) -__all__ = [ - "convert_input_v0_v1", - "convert_input_v1_v2", - "deprecate_numb_test", - "update_deepmd_input", -] + +def convert_input_v0_v1( + jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None +) -> Dict[str, Any]: + """Convert input from v0 format to v1. + + Parameters + ---------- + jdata : Dict[str, Any] + loaded json/yaml file + warning : bool, optional + whether to show deprecation warning, by default True + dump : Optional[Union[str, Path]], optional + whether to dump converted file, by default None + + Returns + ------- + Dict[str, Any] + converted output + """ + output = {} + output["model"] = _model(jdata, jdata["use_smooth"]) + output["learning_rate"] = _learning_rate(jdata) + output["loss"] = _loss(jdata) + output["training"] = _training(jdata) + if warning: + _warning_input_v0_v1(dump) + if dump is not None: + with open(dump, "w") as fp: + json.dump(output, fp, indent=4) + return output + + +def _warning_input_v0_v1(fname: Optional[Union[str, Path]]): + msg = ( + "It seems that you are using a deepmd-kit input of version 0.x.x, " + "which is deprecated. we have converted the input to >2.0.0 compatible" + ) + if fname is not None: + msg += f", and output it to file {fname}" + warnings.warn(msg) + + +def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]: + """Convert data to v1 input for non-smooth model. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + smooth : bool + whether to use smooth or non-smooth descriptor version + + Returns + ------- + Dict[str, Dict[str, Any]] + dictionary with model input parameters and sub-dictionaries for descriptor and + fitting net + """ + model = {} + model["descriptor"] = ( + _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata) + ) + model["fitting_net"] = _fitting_net(jdata) + return model + + +def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for non-smooth descriptor. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with descriptor parameters + """ + descriptor = {} + descriptor["type"] = "loc_frame" + _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule")) + return descriptor + + +def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for smooth descriptor. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with descriptor parameters + """ + descriptor = {} + seed = jdata.get("seed", None) + if seed is not None: + descriptor["seed"] = seed + descriptor["type"] = "se_a" + descriptor["sel"] = jdata["sel_a"] + _jcopy(jdata, descriptor, ("rcut",)) + descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"]) + descriptor["neuron"] = j_must_have(jdata, "filter_neuron") + descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"]) + descriptor["resnet_dt"] = False + if "resnet_dt" in jdata: + descriptor["resnet_dt"] = jdata["filter_resnet_dt"] + + return descriptor + + +def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for fitting net. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with fitting net parameters + """ + fitting_net = {} + + seed = jdata.get("seed", None) + if seed is not None: + fitting_net["seed"] = seed + fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"]) + fitting_net["resnet_dt"] = True + if "resnet_dt" in jdata: + fitting_net["resnet_dt"] = jdata["resnet_dt"] + if "fitting_resnet_dt" in jdata: + fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"] + return fitting_net + + +def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for learning rate section. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with learning rate parameters + """ + learning_rate = {} + learning_rate["type"] = "exp" + _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr")) + return learning_rate + + +def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for loss function. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with loss function parameters + """ + loss: Dict[str, Any] = {} + _jcopy( + jdata, + loss, + ( + "start_pref_e", + "limit_pref_e", + "start_pref_f", + "limit_pref_f", + "start_pref_v", + "limit_pref_v", + ), + ) + if "start_pref_ae" in jdata: + loss["start_pref_ae"] = jdata["start_pref_ae"] + if "limit_pref_ae" in jdata: + loss["limit_pref_ae"] = jdata["limit_pref_ae"] + return loss + + +def _training(jdata: Dict[str, Any]) -> Dict[str, Any]: + """Convert data to v1 input for training. + + Parameters + ---------- + jdata : Dict[str, Any] + parsed input json/yaml data + + Returns + ------- + Dict[str, Any] + dict with training parameters + """ + training = {} + seed = jdata.get("seed", None) + if seed is not None: + training["seed"] = seed + + _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size")) + training["disp_file"] = "lcurve.out" + if "disp_file" in jdata: + training["disp_file"] = jdata["disp_file"] + training["disp_freq"] = j_must_have(jdata, "disp_freq") + training["numb_test"] = j_must_have(jdata, "numb_test") + training["save_freq"] = j_must_have(jdata, "save_freq") + training["save_ckpt"] = j_must_have(jdata, "save_ckpt") + training["disp_training"] = j_must_have(jdata, "disp_training") + training["time_training"] = j_must_have(jdata, "time_training") + if "profiling" in jdata: + training["profiling"] = jdata["profiling"] + if training["profiling"]: + training["profiling_file"] = j_must_have(jdata, "profiling_file") + return training + + +def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]): + """Copy specified keys from one dict to another. + + Parameters + ---------- + src : Dict[str, Any] + source dictionary + dst : Dict[str, Any] + destination dictionary, will be modified in place + keys : Sequence[str] + list of keys to copy + """ + for k in keys: + dst[k] = src[k] + + +def remove_decay_rate(jdata: Dict[str, Any]): + """Convert decay_rate to stop_lr. + + Parameters + ---------- + jdata : Dict[str, Any] + input data + """ + lr = jdata["learning_rate"] + if "decay_rate" in lr: + decay_rate = lr["decay_rate"] + start_lr = lr["start_lr"] + stop_step = jdata["training"]["stop_batch"] + decay_steps = lr["decay_steps"] + stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr + lr["stop_lr"] = stop_lr + lr.pop("decay_rate") + + +def convert_input_v1_v2( + jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None +) -> Dict[str, Any]: + tr_cfg = jdata["training"] + tr_data_keys = { + "systems", + "set_prefix", + "batch_size", + "sys_prob", + "auto_prob", + # alias included + "sys_weights", + "auto_prob_style", + } + + tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys} + new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys} + new_tr_cfg["training_data"] = tr_data_cfg + if "training_data" in tr_cfg: + raise RuntimeError( + "Both v1 (training/systems) and v2 (training/training_data) parameters are given." + ) + + jdata["training"] = new_tr_cfg + + # remove deprecated arguments + remove_decay_rate(jdata) + + if warning: + _warning_input_v1_v2(dump) + if dump is not None: + with open(dump, "w") as fp: + json.dump(jdata, fp, indent=4) + + return jdata + + +def _warning_input_v1_v2(fname: Optional[Union[str, Path]]): + msg = ( + "It seems that you are using a deepmd-kit input of version 1.x.x, " + "which is deprecated. we have converted the input to >2.0.0 compatible" + ) + if fname is not None: + msg += f", and output it to file {fname}" + warnings.warn(msg) + + +def deprecate_numb_test( + jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None +) -> Dict[str, Any]: + """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0. + + See `#1243 `_. + + Parameters + ---------- + jdata : Dict[str, Any] + loaded json/yaml file + warning : bool, optional + whether to show deprecation warning, by default True + dump : Optional[Union[str, Path]], optional + whether to dump converted file, by default None + + Returns + ------- + Dict[str, Any] + converted output + """ + try: + jdata.get("training", {}).pop("numb_test") + except KeyError: + pass + else: + if warning: + warnings.warn( + "The argument training->numb_test has been deprecated since v2.0.0. " + "Use training->validation_data->batch_size instead." + ) + + if dump is not None: + with open(dump, "w") as fp: + json.dump(jdata, fp, indent=4) + return jdata + + +def update_deepmd_input( + jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None +) -> Dict[str, Any]: + def is_deepmd_v0_input(jdata): + return "model" not in jdata.keys() + + def is_deepmd_v1_input(jdata): + return "systems" in j_must_have(jdata, "training").keys() + + if is_deepmd_v0_input(jdata): + jdata = convert_input_v0_v1(jdata, warning, None) + jdata = convert_input_v1_v2(jdata, False, None) + jdata = deprecate_numb_test(jdata, False, dump) + elif is_deepmd_v1_input(jdata): + jdata = convert_input_v1_v2(jdata, warning, None) + jdata = deprecate_numb_test(jdata, False, dump) + else: + jdata = deprecate_numb_test(jdata, warning, dump) + + return jdata diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index a6f888beac..3cf73dc093 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -1,9 +1,787 @@ +#!/usr/bin/env python3 + # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.data import ( - DeepmdData, +import bisect +import logging +from typing import ( + List, + Optional, +) + +import numpy as np + +from deepmd.env import ( + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, ) +from deepmd.utils import random as dp_random +from deepmd.utils.path import ( + DPPath, +) + +log = logging.getLogger(__name__) + + +class DeepmdData: + """Class for a data system. + + It loads data from hard disk, and mantains the data as a `data_dict` + + Parameters + ---------- + sys_path + Path to the data system + set_prefix + Prefix for the directories of different sets + shuffle_test + If the test data are shuffled + type_map + Gives the name of different atom types + optional_type_map + If the type_map.raw in each system is optional + modifier + Data modifier that has the method `modify_data` + trn_all_set + Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test. + sort_atoms : bool + Sort atoms by atom types. Required to enable when the data is directly feeded to + descriptors except mixed types. + """ + + def __init__( + self, + sys_path: str, + set_prefix: str = "set", + shuffle_test: bool = True, + type_map: Optional[List[str]] = None, + optional_type_map: bool = True, + modifier=None, + trn_all_set: bool = False, + sort_atoms: bool = True, + ): + """Constructor.""" + root = DPPath(sys_path) + self.dirs = root.glob(set_prefix + ".*") + if not len(self.dirs): + raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}") + self.dirs.sort() + # check mix_type format + error_format_msg = ( + "if one of the set is of mixed_type format, " + "then all of the sets in this system should be of mixed_type format!" + ) + self.mixed_type = self._check_mode(self.dirs[0]) + for set_item in self.dirs[1:]: + assert self._check_mode(set_item) == self.mixed_type, error_format_msg + # load atom type + self.atom_type = self._load_type(root) + self.natoms = len(self.atom_type) + # load atom type map + self.type_map = self._load_type_map(root) + assert ( + optional_type_map or self.type_map is not None + ), f"System {sys_path} must have type_map.raw in this mode! " + if self.type_map is not None: + assert len(self.type_map) >= max(self.atom_type) + 1 + # check pbc + self.pbc = self._check_pbc(root) + # enforce type_map if necessary + self.enforce_type_map = False + if type_map is not None and self.type_map is not None and len(type_map): + if not self.mixed_type: + atom_type_ = [ + type_map.index(self.type_map[ii]) for ii in self.atom_type + ] + self.atom_type = np.array(atom_type_, dtype=np.int32) + else: + self.enforce_type_map = True + sorter = np.argsort(type_map) + self.type_idx_map = np.array( + sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)] + ) + # padding for virtual atom + self.type_idx_map = np.append( + self.type_idx_map, np.array([-1], dtype=np.int32) + ) + self.type_map = type_map + if type_map is None and self.type_map is None and self.mixed_type: + raise RuntimeError("mixed_type format must have type_map!") + # make idx map + self.sort_atoms = sort_atoms + self.idx_map = self._make_idx_map(self.atom_type) + # train dirs + self.test_dir = self.dirs[-1] + if trn_all_set: + self.train_dirs = self.dirs + else: + if len(self.dirs) == 1: + self.train_dirs = self.dirs + else: + self.train_dirs = self.dirs[:-1] + self.data_dict = {} + # add box and coord + self.add("box", 9, must=self.pbc) + self.add("coord", 3, atomic=True, must=True) + # the training times of each frame + self.add("numb_copy", 1, must=False, default=1, dtype=int) + # set counters + self.set_count = 0 + self.iterator = 0 + self.shuffle_test = shuffle_test + # set modifier + self.modifier = modifier + # calculate prefix sum for get_item method + frames_list = [self._get_nframes(item) for item in self.dirs] + self.nframes = np.sum(frames_list) + # The prefix sum stores the range of indices contained in each directory, which is needed by get_item method + self.prefix_sum = np.cumsum(frames_list).tolist() + + def add( + self, + key: str, + ndof: int, + atomic: bool = False, + must: bool = False, + high_prec: bool = False, + type_sel: Optional[List[int]] = None, + repeat: int = 1, + default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, + ): + """Add a data item that to be loaded. + + Parameters + ---------- + key + The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy` + ndof + The number of dof + atomic + The item is an atomic property. + If False, the size of the data should be nframes x ndof + If True, the size of data should be nframes x natoms x ndof + must + The data file `sys_path/set.*/key.npy` must exist. + If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0 + high_prec + Load the data and store in float64, otherwise in float32 + type_sel + Select certain type of atoms + repeat + The data will be repeated `repeat` times. + default : float, default=0. + default value of data + dtype : np.dtype, optional + the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel + """ + self.data_dict[key] = { + "ndof": ndof, + "atomic": atomic, + "must": must, + "high_prec": high_prec, + "type_sel": type_sel, + "repeat": repeat, + "reduce": None, + "default": default, + "dtype": dtype, + "output_natoms_for_type_sel": output_natoms_for_type_sel, + } + return self + + def reduce(self, key_out: str, key_in: str): + """Generate a new item from the reduction of another atom. + + Parameters + ---------- + key_out + The name of the reduced item + key_in + The name of the data item to be reduced + """ + assert key_in in self.data_dict, "cannot find input key" + assert self.data_dict[key_in]["atomic"], "reduced property should be atomic" + assert key_out not in self.data_dict, "output key should not have been added" + assert ( + self.data_dict[key_in]["repeat"] == 1 + ), "reduced proerties should not have been repeated" + + self.data_dict[key_out] = { + "ndof": self.data_dict[key_in]["ndof"], + "atomic": False, + "must": True, + "high_prec": True, + "type_sel": None, + "repeat": 1, + "reduce": key_in, + } + return self + + def get_data_dict(self) -> dict: + """Get the `data_dict`.""" + return self.data_dict + + def check_batch_size(self, batch_size): + """Check if the system can get a batch of data with `batch_size` frames.""" + for ii in self.train_dirs: + if self.data_dict["coord"]["high_prec"]: + tmpe = ( + (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION) + ) + else: + tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION) + if tmpe.ndim == 1: + tmpe = tmpe.reshape([1, -1]) + if tmpe.shape[0] < batch_size: + return ii, tmpe.shape[0] + return None + + def check_test_size(self, test_size): + """Check if the system can get a test dataset with `test_size` frames.""" + if self.data_dict["coord"]["high_prec"]: + tmpe = ( + (self.test_dir / "coord.npy") + .load_numpy() + .astype(GLOBAL_ENER_FLOAT_PRECISION) + ) + else: + tmpe = ( + (self.test_dir / "coord.npy") + .load_numpy() + .astype(GLOBAL_NP_FLOAT_PRECISION) + ) + if tmpe.ndim == 1: + tmpe = tmpe.reshape([1, -1]) + if tmpe.shape[0] < test_size: + return self.test_dir, tmpe.shape[0] + else: + return None + + def get_item_torch(self, index: int) -> dict: + """Get a single frame data . The frame is picked from the data system by index. The index is coded across all the sets. + + Parameters + ---------- + index + index of the frame + """ + i = bisect.bisect_right(self.prefix_sum, index) + frames = self._load_set(self.dirs[i]) + frame = self._get_subdata(frames, index - self.prefix_sum[i]) + frame = self.reformat_data_torch(frame) + frame["fid"] = index + return frame + + def get_batch(self, batch_size: int) -> dict: + """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system. + + Parameters + ---------- + batch_size + size of the batch + """ + if hasattr(self, "batch_set"): + set_size = self.batch_set["coord"].shape[0] + else: + set_size = 0 + if self.iterator + batch_size > set_size: + self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()]) + self.set_count += 1 + set_size = self.batch_set["coord"].shape[0] + iterator_1 = self.iterator + batch_size + if iterator_1 >= set_size: + iterator_1 = set_size + idx = np.arange(self.iterator, iterator_1) + self.iterator += batch_size + ret = self._get_subdata(self.batch_set, idx) + return ret + + def get_test(self, ntests: int = -1) -> dict: + """Get the test data with `ntests` frames. + + Parameters + ---------- + ntests + Size of the test data set. If `ntests` is -1, all test data will be get. + """ + if not hasattr(self, "test_set"): + self._load_test_set(self.test_dir, self.shuffle_test) + if ntests == -1: + idx = None + else: + ntests_ = ( + ntests + if ntests < self.test_set["type"].shape[0] + else self.test_set["type"].shape[0] + ) + # print('ntest', self.test_set['type'].shape[0], ntests, ntests_) + idx = np.arange(ntests_) + ret = self._get_subdata(self.test_set, idx=idx) + if self.modifier is not None: + self.modifier.modify_data(ret, self) + return ret + + def get_ntypes(self) -> int: + """Number of atom types in the system.""" + if self.type_map is not None: + return len(self.type_map) + else: + return max(self.get_atom_type()) + 1 + + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.type_map + + def get_atom_type(self) -> List[int]: + """Get atom types.""" + return self.atom_type + + def get_numb_set(self) -> int: + """Get number of training sets.""" + return len(self.train_dirs) + + def get_numb_batch(self, batch_size: int, set_idx: int) -> int: + """Get the number of batches in a set.""" + data = self._load_set(self.train_dirs[set_idx]) + ret = data["coord"].shape[0] // batch_size + if ret == 0: + ret = 1 + return ret + + def get_sys_numb_batch(self, batch_size: int) -> int: + """Get the number of batches in the data system.""" + ret = 0 + for ii in range(len(self.train_dirs)): + ret += self.get_numb_batch(batch_size, ii) + return ret + + def get_natoms(self): + """Get number of atoms.""" + return len(self.atom_type) + + def get_natoms_vec(self, ntypes: int): + """Get number of atoms and number of atoms in different types. + + Parameters + ---------- + ntypes + Number of types (may be larger than the actual number of types in the system). + + Returns + ------- + natoms + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + """ + natoms, natoms_vec = self._get_natoms_2(ntypes) + tmp = [natoms, natoms] + tmp = np.append(tmp, natoms_vec) + return tmp.astype(np.int32) + + def avg(self, key): + """Return the average value of an item.""" + if key not in self.data_dict.keys(): + raise RuntimeError("key %s has not been added" % key) + info = self.data_dict[key] + ndof = info["ndof"] + eners = [] + for ii in self.train_dirs: + data = self._load_set(ii) + ei = data[key].reshape([-1, ndof]) + eners.append(ei) + eners = np.concatenate(eners, axis=0) + if eners.size == 0: + return 0 + else: + return np.average(eners, axis=0) + + def _idx_map_sel(self, atom_type, type_sel): + new_types = [] + for ii in atom_type: + if ii in type_sel: + new_types.append(ii) + new_types = np.array(new_types, dtype=int) + natoms = new_types.shape[0] + idx = np.arange(natoms) + idx_map = np.lexsort((idx, new_types)) + return idx_map + + def _get_natoms_2(self, ntypes): + sample_type = self.atom_type + natoms = len(sample_type) + natoms_vec = np.zeros(ntypes).astype(int) + for ii in range(ntypes): + natoms_vec[ii] = np.count_nonzero(sample_type == ii) + return natoms, natoms_vec + + def _get_subdata(self, data, idx=None): + new_data = {} + for ii in data: + dd = data[ii] + if "find_" in ii: + new_data[ii] = dd + else: + if idx is not None: + new_data[ii] = dd[idx] + else: + new_data[ii] = dd + return new_data + + def _load_batch_set(self, set_name: DPPath): + if not hasattr(self, "batch_set") or self.get_numb_set() > 1: + self.batch_set = self._load_set(set_name) + if self.modifier is not None: + self.modifier.modify_data(self.batch_set, self) + self.batch_set, _ = self._shuffle_data(self.batch_set) + self.reset_get_batch() + + def reset_get_batch(self): + self.iterator = 0 + + def _load_test_set(self, set_name: DPPath, shuffle_test): + self.test_set = self._load_set(set_name) + if shuffle_test: + self.test_set, _ = self._shuffle_data(self.test_set) + + def _shuffle_data(self, data): + ret = {} + nframes = data["coord"].shape[0] + idx = np.arange(nframes) + # the training times of each frame + idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,))) + dp_random.shuffle(idx) + for kk in data: + if ( + type(data[kk]) == np.ndarray + and len(data[kk].shape) == 2 + and data[kk].shape[0] == nframes + and "find_" not in kk + ): + ret[kk] = data[kk][idx] + else: + ret[kk] = data[kk] + return ret, idx + + def _get_nframes(self, set_name: DPPath): + # get nframes + if not isinstance(set_name, DPPath): + set_name = DPPath(set_name) + path = set_name / "coord.npy" + if self.data_dict["coord"]["high_prec"]: + coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION) + else: + coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION) + if coord.ndim == 1: + coord = coord.reshape([1, -1]) + nframes = coord.shape[0] + return nframes + + def reformat_data_torch(self, data): + """Modify the data format for the requirements of Torch backend. + + Parameters + ---------- + data + original data + """ + for kk in self.data_dict.keys(): + if "find_" in kk: + pass + else: + if kk in data and self.data_dict[kk]["atomic"]: + data[kk] = data[kk].reshape(-1, self.data_dict[kk]["ndof"]) + data["atype"] = data["type"] + if not self.pbc: + data["box"] = None + return data + + def _load_set(self, set_name: DPPath): + # get nframes + if not isinstance(set_name, DPPath): + set_name = DPPath(set_name) + path = set_name / "coord.npy" + if self.data_dict["coord"]["high_prec"]: + coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION) + else: + coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION) + if coord.ndim == 1: + coord = coord.reshape([1, -1]) + nframes = coord.shape[0] + assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms + # load keys + data = {} + for kk in self.data_dict.keys(): + if self.data_dict[kk]["reduce"] is None: + data["find_" + kk], data[kk] = self._load_data( + set_name, + kk, + nframes, + self.data_dict[kk]["ndof"], + atomic=self.data_dict[kk]["atomic"], + high_prec=self.data_dict[kk]["high_prec"], + must=self.data_dict[kk]["must"], + type_sel=self.data_dict[kk]["type_sel"], + repeat=self.data_dict[kk]["repeat"], + default=self.data_dict[kk]["default"], + dtype=self.data_dict[kk]["dtype"], + output_natoms_for_type_sel=self.data_dict[kk][ + "output_natoms_for_type_sel" + ], + ) + for kk in self.data_dict.keys(): + if self.data_dict[kk]["reduce"] is not None: + k_in = self.data_dict[kk]["reduce"] + ndof = self.data_dict[kk]["ndof"] + data["find_" + kk] = data["find_" + k_in] + tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION) + data[kk] = np.sum( + np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1 + ) + + if self.mixed_type: + # nframes x natoms + atom_type_mix = self._load_type_mix(set_name) + if self.enforce_type_map: + try: + atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32) + except IndexError as e: + raise IndexError( + f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!" + ) from e + atom_type_mix = atom_type_mix_ + real_type = atom_type_mix.reshape([nframes, self.natoms]) + data["type"] = real_type + natoms = data["type"].shape[1] + # nframes x ntypes + atom_type_nums = np.array( + [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())], + dtype=np.int32, + ).T + ghost_nums = np.array( + [(real_type == -1).sum(axis=-1)], + dtype=np.int32, + ).T + assert ( + atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms + ).all(), f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!" + data["real_natoms_vec"] = np.concatenate( + ( + np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)), + atom_type_nums, + ), + axis=-1, + ) + else: + data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1)) + + return data + + def _load_data( + self, + set_name, + key, + nframes, + ndof_, + atomic=False, + must=True, + repeat=1, + high_prec=False, + type_sel=None, + default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, + ): + if atomic: + natoms = self.natoms + idx_map = self.idx_map + # if type_sel, then revise natoms and idx_map + if type_sel is not None: + natoms_sel = 0 + for jj in type_sel: + natoms_sel += np.sum(self.atom_type == jj) + idx_map_sel = self._idx_map_sel(self.atom_type, type_sel) + else: + natoms_sel = natoms + idx_map_sel = idx_map + ndof = ndof_ * natoms + else: + ndof = ndof_ + natoms_sel = 0 + idx_map_sel = None + if dtype is not None: + pass + elif high_prec: + dtype = GLOBAL_ENER_FLOAT_PRECISION + else: + dtype = GLOBAL_NP_FLOAT_PRECISION + path = set_name / (key + ".npy") + if path.is_file(): + data = path.load_numpy().astype(dtype) + try: # YWolfeee: deal with data shape error + if atomic: + if type_sel is not None: + # check the data shape is nsel or natoms + if data.size == nframes * natoms_sel * ndof_: + if output_natoms_for_type_sel: + tmp = np.zeros( + [nframes, natoms, ndof_], dtype=data.dtype + ) + sel_mask = np.isin(self.atom_type, type_sel) + tmp[:, sel_mask] = data.reshape( + [nframes, natoms_sel, ndof_] + ) + data = tmp + else: + natoms = natoms_sel + idx_map = idx_map_sel + ndof = ndof_ * natoms + elif data.size == nframes * natoms * ndof_: + if output_natoms_for_type_sel: + pass + else: + sel_mask = np.isin(self.atom_type, type_sel) + data = data.reshape([nframes, natoms, ndof_]) + data = data[:, sel_mask] + natoms = natoms_sel + idx_map = idx_map_sel + ndof = ndof_ * natoms + else: + raise ValueError( + f"The shape of the data {key} in {set_name}" + f"is {data.shape}, which doesn't match either" + f"({nframes}, {natoms_sel}, {ndof_}) or" + f"({nframes}, {natoms}, {ndof_})" + ) + data = data.reshape([nframes, natoms, -1]) + data = data[:, idx_map, :] + data = data.reshape([nframes, -1]) + data = np.reshape(data, [nframes, ndof]) + except ValueError as err_message: + explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`." + log.error(str(err_message)) + log.error(explanation) + raise ValueError(str(err_message) + ". " + explanation) from err_message + if repeat != 1: + data = np.repeat(data, repeat).reshape([nframes, -1]) + return np.float32(1.0), data + elif must: + raise RuntimeError("%s not found!" % path) + else: + if atomic and type_sel is not None and not output_natoms_for_type_sel: + ndof = ndof_ * natoms_sel + data = np.full([nframes, ndof], default, dtype=dtype) + if repeat != 1: + data = np.repeat(data, repeat).reshape([nframes, -1]) + return np.float32(0.0), data + + def _load_type(self, sys_path: DPPath): + atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32) + return atom_type + + def _load_type_mix(self, set_name: DPPath): + type_path = set_name / "real_atom_types.npy" + real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms]) + return real_type + + def _make_idx_map(self, atom_type): + natoms = atom_type.shape[0] + idx = np.arange(natoms) + if self.sort_atoms: + idx_map = np.lexsort((idx, atom_type)) + else: + idx_map = idx + return idx_map + + def _load_type_map(self, sys_path: DPPath): + fname = sys_path / "type_map.raw" + if fname.is_file(): + return fname.load_txt(dtype=str, ndmin=1).tolist() + else: + return None + + def _check_pbc(self, sys_path: DPPath): + pbc = True + if (sys_path / "nopbc").is_file(): + pbc = False + return pbc + + def _check_mode(self, set_path: DPPath): + return (set_path / "real_atom_types.npy").is_file() + + +class DataRequirementItem: + """A class to store the data requirement for data systems. + + Parameters + ---------- + key + The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy` + ndof + The number of dof + atomic + The item is an atomic property. + If False, the size of the data should be nframes x ndof + If True, the size of data should be nframes x natoms x ndof + must + The data file `sys_path/set.*/key.npy` must exist. + If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0 + high_prec + Load the data and store in float64, otherwise in float32 + type_sel + Select certain type of atoms + repeat + The data will be repeated `repeat` times. + default : float, default=0. + default value of data + dtype : np.dtype, optional + the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel + """ + + def __init__( + self, + key: str, + ndof: int, + atomic: bool = False, + must: bool = False, + high_prec: bool = False, + type_sel: Optional[List[int]] = None, + repeat: int = 1, + default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, + ) -> None: + self.key = key + self.ndof = ndof + self.atomic = atomic + self.must = must + self.high_prec = high_prec + self.type_sel = type_sel + self.repeat = repeat + self.default = default + self.dtype = dtype + self.output_natoms_for_type_sel = output_natoms_for_type_sel + self.dict = self.to_dict() + + def to_dict(self) -> dict: + return { + "key": self.key, + "ndof": self.ndof, + "atomic": self.atomic, + "must": self.must, + "high_prec": self.high_prec, + "type_sel": self.type_sel, + "repeat": self.repeat, + "default": self.default, + "dtype": self.dtype, + "output_natoms_for_type_sel": self.output_natoms_for_type_sel, + } -__all__ = [ - "DeepmdData", -] + def __getitem__(self, key: str): + if key not in self.dict: + raise KeyError(key) + return self.dict[key] diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py index 65e87d8ebc..640083bc33 100644 --- a/deepmd/utils/data_system.py +++ b/deepmd/utils/data_system.py @@ -1,13 +1,812 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.data_system import ( - DeepmdDataSystem, - prob_sys_size_ext, - process_sys_probs, -) - -__all__ = [ - "DeepmdDataSystem", - "process_sys_probs", - "prob_sys_size_ext", -] +import collections +import logging +import warnings +from functools import ( + lru_cache, +) +from typing import ( + Any, + Dict, + List, + Optional, + Union, +) + +import numpy as np + +import deepmd.utils.random as dp_random +from deepmd.common import ( + data_requirement, + expand_sys_str, + j_must_have, + make_default_mesh, +) +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.utils.data import ( + DeepmdData, +) +from deepmd.utils.out_stat import ( + compute_stats_from_redu, +) +from deepmd.utils.path import ( + DPPath, +) + +log = logging.getLogger(__name__) + + +class DeepmdDataSystem: + """Class for manipulating many data systems. + + It is implemented with the help of DeepmdData + """ + + def __init__( + self, + systems: List[str], + batch_size: int, + test_size: int, + rcut: Optional[float] = None, + set_prefix: str = "set", + shuffle_test: bool = True, + type_map: Optional[List[str]] = None, + optional_type_map: bool = True, + modifier=None, + trn_all_set=False, + sys_probs=None, + auto_prob_style="prob_sys_size", + sort_atoms: bool = True, + ): + """Constructor. + + Parameters + ---------- + systems + Specifying the paths to systems + batch_size + The batch size + test_size + The size of test data + rcut + The cut-off radius. Not used. + set_prefix + Prefix for the directories of different sets + shuffle_test + If the test data are shuffled + type_map + Gives the name of different atom types + optional_type_map + If the type_map.raw in each system is optional + modifier + Data modifier that has the method `modify_data` + trn_all_set + Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test. + sys_probs : list of float + The probabilitis of systems to get the batch. + Summation of positive elements of this list should be no greater than 1. + Element of this list can be negative, the probability of the corresponding system is determined + automatically by the number of batches in the system. + auto_prob_style : str + Determine the probability of systems automatically. The method is assigned by this key and can be + - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems() + - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system + - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : + the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, + where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, + the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional + to the number of batches in the system. + sort_atoms : bool + Sort atoms by atom types. Required to enable when the data is directly feeded to + descriptors except mixed types. + """ + # init data + del rcut + self.system_dirs = systems + self.nsystems = len(self.system_dirs) + self.data_systems = [] + for ii in self.system_dirs: + self.data_systems.append( + DeepmdData( + ii, + set_prefix=set_prefix, + shuffle_test=shuffle_test, + type_map=type_map, + optional_type_map=optional_type_map, + modifier=modifier, + trn_all_set=trn_all_set, + sort_atoms=sort_atoms, + ) + ) + # check mix_type format + error_format_msg = ( + "if one of the system is of mixed_type format, " + "then all of the systems should be of mixed_type format!" + ) + if self.data_systems[0].mixed_type: + for data_sys in self.data_systems[1:]: + assert data_sys.mixed_type, error_format_msg + self.mixed_type = True + else: + for data_sys in self.data_systems[1:]: + assert not data_sys.mixed_type, error_format_msg + self.mixed_type = False + # batch size + self.batch_size = batch_size + is_auto_bs = False + self.mixed_systems = False + if isinstance(self.batch_size, int): + self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int) + elif isinstance(self.batch_size, str): + words = self.batch_size.split(":") + if "auto" == words[0]: + is_auto_bs = True + rule = 32 + if len(words) == 2: + rule = int(words[1]) + self.batch_size = self._make_auto_bs(rule) + elif "mixed" == words[0]: + self.mixed_type = True + self.mixed_systems = True + if len(words) == 2: + rule = int(words[1]) + else: + raise RuntimeError("batch size must be specified for mixed systems") + self.batch_size = rule * np.ones(self.nsystems, dtype=int) + else: + raise RuntimeError("unknown batch_size rule " + words[0]) + elif isinstance(self.batch_size, list): + pass + else: + raise RuntimeError("invalid batch_size") + assert isinstance(self.batch_size, (list, np.ndarray)) + assert len(self.batch_size) == self.nsystems + + # natoms, nbatches + ntypes = [] + for ii in self.data_systems: + ntypes.append(ii.get_ntypes()) + self.sys_ntypes = max(ntypes) + self.natoms = [] + self.natoms_vec = [] + self.nbatches = [] + type_map_list = [] + for ii in range(self.nsystems): + self.natoms.append(self.data_systems[ii].get_natoms()) + self.natoms_vec.append( + self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int) + ) + self.nbatches.append( + self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii]) + ) + type_map_list.append(self.data_systems[ii].get_type_map()) + self.type_map = self._check_type_map_consistency(type_map_list) + + # ! altered by Marián Rynik + # test size + # now test size can be set as a percentage of systems data or test size + # can be set for each system individualy in the same manner as batch + # size. This enables one to use systems with diverse number of + # structures and different number of atoms. + self.test_size = test_size + if isinstance(self.test_size, int): + self.test_size = self.test_size * np.ones(self.nsystems, dtype=int) + elif isinstance(self.test_size, str): + words = self.test_size.split("%") + try: + percent = int(words[0]) + except ValueError: + raise RuntimeError("unknown test_size rule " + words[0]) + self.test_size = self._make_auto_ts(percent) + elif isinstance(self.test_size, list): + pass + else: + raise RuntimeError("invalid test_size") + assert isinstance(self.test_size, (list, np.ndarray)) + assert len(self.test_size) == self.nsystems + + # init pick idx + self.pick_idx = 0 + + # derive system probabilities + self.sys_probs = None + self.set_sys_probs(sys_probs, auto_prob_style) + + # check batch and test size + for ii in range(self.nsystems): + chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii]) + if chk_ret is not None and not is_auto_bs and not self.mixed_systems: + warnings.warn( + "system %s required batch size is larger than the size of the dataset %s (%d > %d)" + % ( + self.system_dirs[ii], + chk_ret[0], + self.batch_size[ii], + chk_ret[1], + ) + ) + chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii]) + if chk_ret is not None and not is_auto_bs and not self.mixed_systems: + warnings.warn( + "system %s required test size is larger than the size of the dataset %s (%d > %d)" + % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1]) + ) + + def _load_test(self, ntests=-1): + self.test_data = collections.defaultdict(list) + for ii in range(self.nsystems): + test_system_data = self.data_systems[ii].get_test(ntests=ntests) + for nn in test_system_data: + self.test_data[nn].append(test_system_data[nn]) + + @property + @lru_cache(maxsize=None) + def default_mesh(self) -> List[np.ndarray]: + """Mesh for each system.""" + return [ + make_default_mesh( + self.data_systems[ii].pbc, self.data_systems[ii].mixed_type + ) + for ii in range(self.nsystems) + ] + + def compute_energy_shift(self, rcond=None, key="energy"): + sys_ener = [] + for ss in self.data_systems: + sys_ener.append(ss.avg(key)) + sys_ener = np.concatenate(sys_ener) + sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION) + sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1]) + sys_tynatom = sys_tynatom[:, 2:] + energy_shift, _ = compute_stats_from_redu( + sys_ener.reshape(-1, 1), + sys_tynatom, + rcond=rcond, + ) + return energy_shift.ravel() + + def add_dict(self, adict: dict) -> None: + """Add items to the data system by a `dict`. + `adict` should have items like + .. code-block:: python. + + adict[key] = { + "ndof": ndof, + "atomic": atomic, + "must": must, + "high_prec": high_prec, + "type_sel": type_sel, + "repeat": repeat, + } + + For the explaination of the keys see `add` + """ + for kk in adict: + self.add( + kk, + adict[kk]["ndof"], + atomic=adict[kk]["atomic"], + must=adict[kk]["must"], + high_prec=adict[kk]["high_prec"], + type_sel=adict[kk]["type_sel"], + repeat=adict[kk]["repeat"], + default=adict[kk]["default"], + dtype=adict[kk].get("dtype"), + output_natoms_for_type_sel=adict[kk].get( + "output_natoms_for_type_sel", False + ), + ) + + def add( + self, + key: str, + ndof: int, + atomic: bool = False, + must: bool = False, + high_prec: bool = False, + type_sel: Optional[List[int]] = None, + repeat: int = 1, + default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, + ): + """Add a data item that to be loaded. + + Parameters + ---------- + key + The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy` + ndof + The number of dof + atomic + The item is an atomic property. + If False, the size of the data should be nframes x ndof + If True, the size of data should be nframes x natoms x ndof + must + The data file `sys_path/set.*/key.npy` must exist. + If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0 + high_prec + Load the data and store in float64, otherwise in float32 + type_sel + Select certain type of atoms + repeat + The data will be repeated `repeat` times. + default, default=0. + Default value of data + dtype + The dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool + If True and type_sel is True, the atomic dimension will be natoms instead of nsel + """ + for ii in self.data_systems: + ii.add( + key, + ndof, + atomic=atomic, + must=must, + high_prec=high_prec, + repeat=repeat, + type_sel=type_sel, + default=default, + dtype=dtype, + output_natoms_for_type_sel=output_natoms_for_type_sel, + ) + + def reduce(self, key_out, key_in): + """Generate a new item from the reduction of another atom. + + Parameters + ---------- + key_out + The name of the reduced item + key_in + The name of the data item to be reduced + """ + for ii in self.data_systems: + ii.reduce(key_out, key_in) + + def get_data_dict(self, ii: int = 0) -> dict: + return self.data_systems[ii].get_data_dict() + + def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"): + if sys_probs is None: + if auto_prob_style == "prob_uniform": + prob_v = 1.0 / float(self.nsystems) + probs = [prob_v for ii in range(self.nsystems)] + elif auto_prob_style[:13] == "prob_sys_size": + if auto_prob_style == "prob_sys_size": + prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0" + else: + prob_style = auto_prob_style + probs = prob_sys_size_ext( + prob_style, self.get_nsystems(), self.nbatches + ) + else: + raise RuntimeError("Unknown auto prob style: " + auto_prob_style) + else: + probs = process_sys_probs(sys_probs, self.nbatches) + self.sys_probs = probs + + def get_batch(self, sys_idx: Optional[int] = None) -> dict: + # batch generation style altered by Ziyao Li: + # one should specify the "sys_prob" and "auto_prob_style" params + # via set_sys_prob() function. The sys_probs this function uses is + # defined as a private variable, self.sys_probs, initialized in __init__(). + # This is to optimize the (vain) efforts in evaluating sys_probs every batch. + """Get a batch of data from the data systems. + + Parameters + ---------- + sys_idx : int + The index of system from which the batch is get. + If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored + If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. + This option does not work for mixed systems. + + Returns + ------- + dict + The batch data + """ + if not self.mixed_systems: + b_data = self.get_batch_standard(sys_idx) + else: + b_data = self.get_batch_mixed() + return b_data + + def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict: + """Get a batch of data from the data systems in the standard way. + + Parameters + ---------- + sys_idx : int + The index of system from which the batch is get. + If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored + If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. + + Returns + ------- + dict + The batch data + """ + if sys_idx is not None: + self.pick_idx = sys_idx + else: + # prob = self._get_sys_probs(sys_probs, auto_prob_style) + self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs) + b_data = self.data_systems[self.pick_idx].get_batch( + self.batch_size[self.pick_idx] + ) + b_data["natoms_vec"] = self.natoms_vec[self.pick_idx] + b_data["default_mesh"] = self.default_mesh[self.pick_idx] + return b_data + + def get_batch_mixed(self) -> dict: + """Get a batch of data from the data systems in the mixed way. + + Returns + ------- + dict + The batch data + """ + # mixed systems have a global batch size + batch_size = self.batch_size[0] + batch_data = [] + for _ in range(batch_size): + self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs) + bb_data = self.data_systems[self.pick_idx].get_batch(1) + bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx] + bb_data["default_mesh"] = self.default_mesh[self.pick_idx] + batch_data.append(bb_data) + b_data = self._merge_batch_data(batch_data) + return b_data + + def _merge_batch_data(self, batch_data: List[dict]) -> dict: + """Merge batch data from different systems. + + Parameters + ---------- + batch_data : list of dict + A list of batch data from different systems. + + Returns + ------- + dict + The merged batch data. + """ + b_data = {} + max_natoms = max(bb["natoms_vec"][0] for bb in batch_data) + # natoms_vec + natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int) + natoms_vec[0:3] = max_natoms + b_data["natoms_vec"] = natoms_vec + # real_natoms_vec + real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data]) + b_data["real_natoms_vec"] = real_natoms_vec + # type + type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int) + for ii, bb in enumerate(batch_data): + type_vec[ii, : bb["type"].shape[1]] = bb["type"][0] + b_data["type"] = type_vec + # default_mesh + default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0) + b_data["default_mesh"] = default_mesh + # other data + data_dict = self.get_data_dict(0) + for kk, vv in data_dict.items(): + if kk not in batch_data[0]: + continue + b_data["find_" + kk] = batch_data[0]["find_" + kk] + if not vv["atomic"]: + b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0) + else: + b_data[kk] = np.zeros( + (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]), + dtype=batch_data[0][kk].dtype, + ) + for ii, bb in enumerate(batch_data): + b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0] + return b_data + + # ! altered by Marián Rynik + def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1): # depreciated + """Get test data from the the data systems. + + Parameters + ---------- + sys_idx + The test dat of system with index `sys_idx` will be returned. + If is None, the currently selected system will be returned. + n_test + Number of test data. If set to -1 all test data will be get. + """ + if not hasattr(self, "test_data"): + self._load_test(ntests=n_test) + if sys_idx is not None: + idx = sys_idx + else: + idx = self.pick_idx + + test_system_data = {} + for nn in self.test_data: + test_system_data[nn] = self.test_data[nn][idx] + test_system_data["natoms_vec"] = self.natoms_vec[idx] + test_system_data["default_mesh"] = self.default_mesh[idx] + return test_system_data + + def get_sys_ntest(self, sys_idx=None): + """Get number of tests for the currently selected system, + or one defined by sys_idx. + """ + if sys_idx is not None: + return self.test_size[sys_idx] + else: + return self.test_size[self.pick_idx] + + def get_type_map(self) -> List[str]: + """Get the type map.""" + return self.type_map + + def get_nbatches(self) -> int: + """Get the total number of batches.""" + return self.nbatches + + def get_ntypes(self) -> int: + """Get the number of types.""" + return self.sys_ntypes + + def get_nsystems(self) -> int: + """Get the number of data systems.""" + return self.nsystems + + def get_sys(self, idx: int) -> DeepmdData: + """Get a certain data system.""" + return self.data_systems[idx] + + def get_batch_size(self) -> int: + """Get the batch size.""" + return self.batch_size + + def print_summary(self, name: str): + print_summary( + name, + self.nsystems, + self.system_dirs, + self.natoms, + self.batch_size, + self.nbatches, + self.sys_probs, + [ii.pbc for ii in self.data_systems], + ) + + def _make_auto_bs(self, rule): + bs = [] + for ii in self.data_systems: + ni = ii.get_natoms() + bsi = rule // ni + if bsi * ni < rule: + bsi += 1 + bs.append(bsi) + return bs + + # ! added by Marián Rynik + def _make_auto_ts(self, percent): + ts = [] + for ii in range(self.nsystems): + ni = self.batch_size[ii] * self.nbatches[ii] + tsi = int(ni * percent / 100) + ts.append(tsi) + + return ts + + def _check_type_map_consistency(self, type_map_list): + ret = [] + for ii in type_map_list: + if ii is not None: + min_len = min([len(ii), len(ret)]) + for idx in range(min_len): + if ii[idx] != ret[idx]: + raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}") + if len(ii) > len(ret): + ret = ii + return ret + + +def _format_name_length(name, width): + if len(name) <= width: + return "{: >{}}".format(name, width) + else: + name = name[-(width - 3) :] + name = "-- " + name + return name + + +def print_summary( + name: str, + nsystems: int, + system_dirs: List[str], + natoms: List[int], + batch_size: List[int], + nbatches: List[int], + sys_probs: List[float], + pbc: List[bool], +): + """Print summary of systems. + + Parameters + ---------- + name : str + The name of the system + nsystems : int + The number of systems + system_dirs : list of str + The directories of the systems + natoms : list of int + The number of atoms + batch_size : list of int + The batch size + nbatches : list of int + The number of batches + sys_probs : list of float + The probabilities + pbc : list of bool + The periodic boundary conditions + """ + # width 65 + sys_width = 42 + log.info( + f"---Summary of DataSystem: {name:13s}-----------------------------------------------" + ) + log.info("found %d system(s):" % nsystems) + log.info( + ("%s " % _format_name_length("system", sys_width)) + + ("%6s %6s %6s %9s %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc")) + ) + for ii in range(nsystems): + log.info( + "%s %6d %6d %6d %9.3e %3s" + % ( + _format_name_length(system_dirs[ii], sys_width), + natoms[ii], + batch_size[ii], + nbatches[ii], + sys_probs[ii], + "T" if pbc[ii] else "F", + ) + ) + log.info( + "--------------------------------------------------------------------------------------" + ) + + +def process_sys_probs(sys_probs, nbatch): + sys_probs = np.array(sys_probs) + type_filter = sys_probs >= 0 + assigned_sum_prob = np.sum(type_filter * sys_probs) + # 1e-8 is to handle floating point error; See #1917 + assert ( + assigned_sum_prob <= 1.0 + 1e-8 + ), "the sum of assigned probability should be less than 1" + rest_sum_prob = 1.0 - assigned_sum_prob + if not np.isclose(rest_sum_prob, 0): + rest_nbatch = (1 - type_filter) * nbatch + rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch) + ret_prob = rest_prob + type_filter * sys_probs + else: + ret_prob = sys_probs + assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1" + return ret_prob + + +def prob_sys_size_ext(keywords, nsystems, nbatch): + block_str = keywords.split(";")[1:] + block_stt = [] + block_end = [] + block_weights = [] + for ii in block_str: + stt = int(ii.split(":")[0]) + end = int(ii.split(":")[1]) + weight = float(ii.split(":")[2]) + assert weight >= 0, "the weight of a block should be no less than 0" + block_stt.append(stt) + block_end.append(end) + block_weights.append(weight) + nblocks = len(block_str) + block_probs = np.array(block_weights) / np.sum(block_weights) + sys_probs = np.zeros([nsystems]) + for ii in range(nblocks): + nbatch_block = nbatch[block_stt[ii] : block_end[ii]] + tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block) + sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii] + return sys_probs + + +def process_systems(systems: Union[str, List[str]]) -> List[str]: + """Process the user-input systems. + + If it is a single directory, search for all the systems in the directory. + Check if the systems are valid. + + Parameters + ---------- + systems : str or list of str + The user-input systems + + Returns + ------- + list of str + The valid systems + """ + if isinstance(systems, str): + systems = expand_sys_str(systems) + elif isinstance(systems, list): + systems = systems.copy() + help_msg = "Please check your setting for data systems" + # check length of systems + if len(systems) == 0: + msg = "cannot find valid a data system" + log.fatal(msg) + raise OSError(msg, help_msg) + # rougly check all items in systems are valid + for ii in systems: + ii = DPPath(ii) + if not ii.is_dir(): + msg = f"dir {ii} is not a valid dir" + log.fatal(msg) + raise OSError(msg, help_msg) + if not (ii / "type.raw").is_file(): + msg = f"dir {ii} is not a valid data system dir" + log.fatal(msg) + raise OSError(msg, help_msg) + return systems + + +def get_data( + jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=False +) -> DeepmdDataSystem: + """Get the data system. + + Parameters + ---------- + jdata + The json data + rcut + The cut-off radius, not used + type_map + The type map + modifier + The data modifier + multi_task_mode + If in multi task mode + + Returns + ------- + DeepmdDataSystem + The data system + """ + systems = j_must_have(jdata, "systems") + systems = process_systems(systems) + + batch_size = j_must_have(jdata, "batch_size") + sys_probs = jdata.get("sys_probs", None) + auto_prob = jdata.get("auto_prob", "prob_sys_size") + optional_type_map = not multi_task_mode + + data = DeepmdDataSystem( + systems=systems, + batch_size=batch_size, + test_size=1, # to satisfy the old api + shuffle_test=True, # to satisfy the old api + rcut=rcut, + type_map=type_map, + optional_type_map=optional_type_map, + modifier=modifier, + trn_all_set=True, # sample from all sets + sys_probs=sys_probs, + auto_prob_style=auto_prob, + ) + data.add_dict(data_requirement) + + return data diff --git a/deepmd/utils/econf_embd.py b/deepmd/utils/econf_embd.py new file mode 100644 index 0000000000..3940db65ba --- /dev/null +++ b/deepmd/utils/econf_embd.py @@ -0,0 +1,209 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + +try: + import dpdata + from mendeleev import ( + element, + ) +except ImportError: + pass + +### +# made by command +# ret = make_econf_embedding(type_map, flatten=True) +# print_econf_embedding(ret) +### +# fmt: off +electronic_configuration_embedding = \ +{ kk: np.array(vv, dtype=np.int32) for kk,vv in { + "H" : [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "He" : [2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Li" : [2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Be" : [2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "B" : [2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "C" : [2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "N" : [2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "O" : [2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "F" : [2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ne" : [2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Na" : [2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Mg" : [2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Al" : [2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Si" : [2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "P" : [2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "S" : [2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Cl" : [2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ar" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "K" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ca" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Sc" : [2,2,2,2,2,2,2,2,2,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ti" : [2,2,2,2,2,2,2,2,2,1,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "V" : [2,2,2,2,2,2,2,2,2,1,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Cr" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Mn" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Fe" : [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Co" : [2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ni" : [2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Cu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Zn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ga" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ge" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "As" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Se" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Br" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Kr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Rb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Sr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Y" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Zr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Nb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Mo" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Tc" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ru" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Rh" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Pd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Ag" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Cd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "In" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Sn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Sb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Te" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "I" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Xe" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], + "Cs" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0], + "Ba" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "La" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Ce" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Pr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Nd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Pm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Sm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Eu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Gd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Tb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Dy" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Ho" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Er" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Tm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Yb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Lu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Hf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Ta" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "W" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Re" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Os" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Ir" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Pt" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0], + "Au" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0], + "Hg" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0], + "Tl" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0], + "Pb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0], + "Bi" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0], + "Po" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0], + "At" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0], + "Rn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0], + "Fr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,1], + "Ra" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,2], + "Ac" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2], + "Th" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,1,0,0,0,2], + "Pa" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2], + "U" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,1,0,0,0,0,2], + "Np" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,1,0,0,0,0,2], + "Pu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,2], + "Am" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2], + "Cm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,2], + "Bk" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2], + "Cf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,2], + "Es" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,2], + "Fm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,2], + "Md" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,2], + "No" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,2], + "Lr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,2], +}.items()} +# fmt: on + +ln_to_lett = { + 0: "s", + 1: "p", + 2: "d", + 3: "f", + 4: "g", +} +lett_to_ln = {vv: kk for kk, vv in ln_to_lett.items()} + +conf_keys = [ + (1, "s"), + (2, "s"), + (2, "p"), + (3, "s"), + (3, "p"), + (3, "d"), + (4, "s"), + (4, "p"), + (4, "d"), + (4, "f"), + (5, "s"), + (5, "p"), + (5, "d"), + (5, "f"), + (6, "s"), + (6, "p"), + (6, "d"), + (7, "s"), +] + +maxn = 7 +maxl = maxn +maxm = 2 * maxl + 1 + +type_map = dpdata.periodic_table.ELEMENTS + + +def make_empty_list_vec(): + ret = {} + for kk in conf_keys: + ll = lett_to_ln[kk[1]] + ret[kk] = np.zeros([2 * ll + 1], dtype=np.int32) + return ret + + +def flatten_list_vec(lv): + ret = np.array([], dtype=np.int32) + for kk in conf_keys: + ret = np.append(ret, lv[kk]) + return ret + + +def make_element_embedding_list_vec( + ename: str, +) -> np.ndarray: + """Compute the embedding of one element.""" + ret = make_empty_list_vec() + ele = element(ename) + ec = ele.ec + occ = ec.spin_occupations() + for kk, vv in occ.items(): + assert kk in conf_keys + for ip in range(vv["pairs"]): + ret[kk][ip] = 2 + for iu in range(vv["pairs"], vv["pairs"] + vv["unpaired"]): + ret[kk][iu] = 1 + return ret + + +def make_econf_embedding(types, flatten=True): + all_ret = {} + for ii in types: + ir = make_element_embedding_list_vec(ii) + if flatten: + ir = flatten_list_vec(ir) + all_ret[ii] = ir + return all_ret + + +def print_econf_embedding(res): + for kk, vv in res.items(): + vvstr = ",".join([str(ii) for ii in vv]) + space = " " * (2 - len(kk)) + print(f'"{kk}"{space} : [{vvstr}],') # noqa: T201 diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py new file mode 100644 index 0000000000..217c46844b --- /dev/null +++ b/deepmd/utils/env_mat_stat.py @@ -0,0 +1,218 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from abc import ( + ABC, + abstractmethod, +) +from collections import ( + defaultdict, +) +from typing import ( + Dict, + Iterator, + List, + Optional, +) + +import numpy as np + +from deepmd.utils.path import ( + DPPath, +) + +log = logging.getLogger(__name__) + + +class StatItem: + """A class to store the statistics of the environment matrix. + + Parameters + ---------- + number : int + The total size of given array. + sum : float + The sum value of the matrix. + squared_sum : float + The sum squared value of the matrix. + """ + + def __init__(self, number: int = 0, sum: float = 0, squared_sum: float = 0) -> None: + self.number = number + self.sum = sum + self.squared_sum = squared_sum + + def __add__(self, other: "StatItem") -> "StatItem": + return StatItem( + number=self.number + other.number, + sum=self.sum + other.sum, + squared_sum=self.squared_sum + other.squared_sum, + ) + + def compute_avg(self, default: float = 0) -> float: + """Compute the average of the environment matrix. + + Parameters + ---------- + default : float, optional + The default value of the average, by default 0. + + Returns + ------- + float + The average of the environment matrix. + """ + if self.number == 0: + return default + return self.sum / self.number + + def compute_std(self, default: float = 1e-1, protection: float = 1e-2) -> float: + """Compute the standard deviation of the environment matrix. + + Parameters + ---------- + default : float, optional + The default value of the standard deviation, by default 1e-1. + protection : float, optional + The protection value for the standard deviation, by default 1e-2. + + Returns + ------- + float + The standard deviation of the environment matrix. + """ + if self.number == 0: + return default + val = np.sqrt( + self.squared_sum / self.number + - np.multiply(self.sum / self.number, self.sum / self.number) + ) + if np.abs(val) < protection: + val = protection + return val + + +class EnvMatStat(ABC): + """A base class to store and calculate the statistics of the environment matrix.""" + + def __init__(self) -> None: + super().__init__() + self.stats = defaultdict(StatItem) + + def compute_stats(self, data: List[Dict[str, np.ndarray]]) -> None: + """Compute the statistics of the environment matrix. + + Parameters + ---------- + data : List[Dict[str, np.ndarray]] + The environment matrix. + """ + if len(self.stats) > 0: + raise ValueError("The statistics has already been computed.") + for iter_stats in self.iter(data): + for kk in iter_stats: + self.stats[kk] += iter_stats[kk] + + @abstractmethod + def iter(self, data: List[Dict[str, np.ndarray]]) -> Iterator[Dict[str, StatItem]]: + """Get the iterator of the environment matrix. + + Parameters + ---------- + data : List[Dict[str, np.ndarray]] + The environment matrix. + + Yields + ------ + Dict[str, StatItem] + The statistics of the environment matrix. + """ + + def save_stats(self, path: DPPath) -> None: + """Save the statistics of the environment matrix. + + Parameters + ---------- + path : DPH5Path + The path to save the statistics of the environment matrix. + """ + if len(self.stats) == 0: + raise ValueError("The statistics hasn't been computed.") + for kk, vv in self.stats.items(): + path.mkdir(parents=True, exist_ok=True) + (path / kk).save_numpy(np.array([vv.number, vv.sum, vv.squared_sum])) + + def load_stats(self, path: DPPath) -> None: + """Load the statistics of the environment matrix. + + Parameters + ---------- + path : DPH5Path + The path to load the statistics of the environment matrix. + """ + if len(self.stats) > 0: + raise ValueError("The statistics has already been computed.") + for kk in path.glob("*"): + arr = kk.load_numpy() + self.stats[kk.name] = StatItem( + number=arr[0], + sum=arr[1], + squared_sum=arr[2], + ) + + def load_or_compute_stats( + self, data: List[Dict[str, np.ndarray]], path: Optional[DPPath] = None + ) -> None: + """Load the statistics of the environment matrix if it exists, otherwise compute and save it. + + Parameters + ---------- + path : DPH5Path + The path to load the statistics of the environment matrix. + data : List[Dict[str, np.ndarray]] + The environment matrix. + """ + if path is not None and path.is_dir(): + self.load_stats(path) + log.info(f"Load stats from {path}.") + else: + self.compute_stats(data) + if path is not None: + self.save_stats(path) + log.info(f"Save stats to {path}.") + + def get_avg(self, default: float = 0) -> Dict[str, float]: + """Get the average of the environment matrix. + + Parameters + ---------- + default : float, optional + The default value of the average, by default 0. + + Returns + ------- + Dict[str, float] + The average of the environment matrix. + """ + return {kk: vv.compute_avg(default=default) for kk, vv in self.stats.items()} + + def get_std( + self, default: float = 1e-1, protection: float = 1e-2 + ) -> Dict[str, float]: + """Get the standard deviation of the environment matrix. + + Parameters + ---------- + default : float, optional + The default value of the standard deviation, by default 1e-1. + protection : float, optional + The protection value for the standard deviation, by default 1e-2. + + Returns + ------- + Dict[str, float] + The standard deviation of the environment matrix. + """ + return { + kk: vv.compute_std(default=default, protection=protection) + for kk, vv in self.stats.items() + } diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py index 683131e48a..11f42ede96 100644 --- a/deepmd/utils/errors.py +++ b/deepmd/utils/errors.py @@ -1,19 +1,3 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from deepmd_utils.utils.errors import ( - OutOfMemoryError, -) - - -class GraphTooLargeError(Exception): - """The graph is too large, exceeding protobuf's hard limit of 2GB.""" - - -class GraphWithoutTensorError(Exception): - pass - - -__all__ = [ - "OutOfMemoryError", - "GraphTooLargeError", - "GraphWithoutTensorError", -] +class OutOfMemoryError(Exception): + """This error is caused by out-of-memory (OOM).""" diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py index cc6c0224de..1150fe2701 100644 --- a/deepmd/utils/finetune.py +++ b/deepmd/utils/finetune.py @@ -1,111 +1,140 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -import json import logging from typing import ( - Any, - Dict, + TYPE_CHECKING, + List, ) -from deepmd.utils.errors import ( - GraphWithoutTensorError, +import numpy as np + +from deepmd.infer.deep_eval import ( + DeepEval, ) -from deepmd.utils.graph import ( - get_tensor_by_name, +from deepmd.utils.data_system import ( + DeepmdDataSystem, ) +if TYPE_CHECKING: + pass + log = logging.getLogger(__name__) -def replace_model_params_with_pretrained_model( - jdata: Dict[str, Any], pretrained_model: str +def change_energy_bias_lower( + data: DeepmdDataSystem, + dp: DeepEval, + origin_type_map: List[str], + full_type_map: List[str], + bias_atom_e: np.ndarray, + bias_adjust_mode="change-by-statistic", + ntest=10, ): - """Replace the model params in input script according to pretrained model. + """Change the energy bias according to the input data and the pretrained model. Parameters ---------- - jdata : Dict[str, Any] - input script - pretrained_model : str - filename of the pretrained model + data : DeepmdDataSystem + The training data. + dp : str + The DeepEval object. + origin_type_map : list + The original type_map in dataset, they are targets to change the energy bias. + full_type_map : str + The full type_map in pretrained model + bias_atom_e : np.ndarray + The old energy bias in the pretrained model. + bias_adjust_mode : str + The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic'] + 'change-by-statistic' : perform predictions on energies of target dataset, + and do least sqaure on the errors to obtain the target shift as bias. + 'set-by-statistic' : directly use the statistic energy bias in the target dataset. + ntest : int + The number of test samples in a system to change the energy bias. """ - # Get the input script from the pretrained model - try: - t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script") - except GraphWithoutTensorError as e: - raise RuntimeError( - "The input frozen pretrained model: %s has no training script, " - "which is not supported to perform finetuning. " - "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit." - % input - ) from e - pretrained_jdata = json.loads(t_jdata) - - # Check the model type - assert ( - pretrained_jdata["model"]["descriptor"]["type"] - in [ - "se_atten", - "se_atten_v2", - ] - and pretrained_jdata["model"]["fitting_net"]["type"] in ["ener"] - ), "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!" - - # Check the type map - pretrained_type_map = pretrained_jdata["model"]["type_map"] - cur_type_map = jdata["model"].get("type_map", []) - out_line_type = [] - for i in cur_type_map: - if i not in pretrained_type_map: - out_line_type.append(i) - assert not out_line_type, ( - f"{out_line_type!s} type(s) not contained in the pretrained model! " - "Please choose another suitable one." - ) - if cur_type_map != pretrained_type_map: - log.info( - "Change the type_map from {} to {}.".format( - str(cur_type_map), str(pretrained_type_map) - ) + type_numbs = [] + energy_ground_truth = [] + energy_predict = [] + sorter = np.argsort(full_type_map) + idx_type_map = sorter[ + np.searchsorted(full_type_map, origin_type_map, sorter=sorter) + ] + mixed_type = data.mixed_type + numb_type = len(full_type_map) + for sys in data.data_systems: + test_data = sys.get_test() + nframes = test_data["box"].shape[0] + numb_test = min(nframes, ntest) + if mixed_type: + atype = test_data["type"][:numb_test].reshape([numb_test, -1]) + else: + atype = test_data["type"][0] + assert np.array( + [i in idx_type_map for i in list(set(atype.reshape(-1)))] + ).all(), "Some types are not in 'type_map'!" + energy_ground_truth.append( + test_data["energy"][:numb_test].reshape([numb_test, 1]) ) - jdata["model"]["type_map"] = pretrained_type_map - - # Change model configurations - log.info("Change the model configurations according to the pretrained one...") - for config_key in ["type_embedding", "descriptor", "fitting_net"]: - if ( - config_key not in jdata["model"].keys() - and config_key in pretrained_jdata["model"].keys() - ): - log.info( - "Add the '{}' from pretrained model: {}.".format( - config_key, str(pretrained_jdata["model"][config_key]) + if mixed_type: + type_numbs.append( + np.array( + [(atype == i).sum(axis=-1) for i in idx_type_map], + dtype=np.int32, + ).T + ) + else: + type_numbs.append( + np.tile( + np.bincount(atype, minlength=numb_type)[idx_type_map], + (numb_test, 1), ) ) - jdata["model"][config_key] = pretrained_jdata["model"][config_key] - elif ( - config_key == "type_embedding" - and config_key in jdata["model"].keys() - and config_key not in pretrained_jdata["model"].keys() - ): - # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None. - cur_para = jdata["model"].pop(config_key) - if "trainable" in cur_para and not cur_para["trainable"]: - jdata["model"][config_key] = { - "trainable": False, - "activation_function": "None", - } - log.info("The type_embeddings from pretrained model will be frozen.") - elif ( - config_key in jdata["model"].keys() - and config_key in pretrained_jdata["model"].keys() - and jdata["model"][config_key] != pretrained_jdata["model"][config_key] - ): - target_para = pretrained_jdata["model"][config_key] - cur_para = jdata["model"][config_key] - # keep some params that are irrelevant to model structures (need to discuss) TODO - if "trainable" in cur_para.keys(): - target_para["trainable"] = cur_para["trainable"] - log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.") - jdata["model"][config_key] = target_para - - return jdata, cur_type_map + if bias_adjust_mode == "change-by-statistic": + coord = test_data["coord"][:numb_test].reshape([numb_test, -1]) + if sys.pbc: + box = test_data["box"][:numb_test] + else: + box = None + if dp.get_dim_fparam() > 0: + fparam = test_data["fparam"][:numb_test] + else: + fparam = None + if dp.get_dim_aparam() > 0: + aparam = test_data["aparam"][:numb_test] + else: + aparam = None + ret = dp.eval( + coord, + box, + atype, + mixed_type=mixed_type, + fparam=fparam, + aparam=aparam, + ) + energy_predict.append(ret[0].reshape([numb_test, 1])) + type_numbs = np.concatenate(type_numbs) + energy_ground_truth = np.concatenate(energy_ground_truth) + old_bias = bias_atom_e[idx_type_map] + if bias_adjust_mode == "change-by-statistic": + energy_predict = np.concatenate(energy_predict) + bias_diff = energy_ground_truth - energy_predict + delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0] + unbias_e = energy_predict + type_numbs @ delta_bias + atom_numbs = type_numbs.sum(-1) + rmse_ae = np.sqrt( + np.mean( + np.square((unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs) + ) + ) + bias_atom_e[idx_type_map] += delta_bias.reshape(-1) + log.info( + f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom." + ) + elif bias_adjust_mode == "set-by-statistic": + statistic_bias = np.linalg.lstsq(type_numbs, energy_ground_truth, rcond=None)[0] + bias_atom_e[idx_type_map] = statistic_bias.reshape(-1) + else: + raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode) + log.info( + f"Change energy bias of {origin_type_map!s} from {old_bias!s} to {bias_atom_e[idx_type_map]!s}." + ) + return bias_atom_e diff --git a/deepmd/utils/hostlist.py b/deepmd/utils/hostlist.py new file mode 100644 index 0000000000..c184b04031 --- /dev/null +++ b/deepmd/utils/hostlist.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import socket +from typing import ( + List, + Tuple, +) + + +def get_host_names() -> Tuple[str, List[str]]: + """Get host names of all nodes in the cluster. + + If mpi4py is not installed or MPI is not used, then the + host name of the current node is returned as those of all nodes. + + Returns + ------- + str + Host name of the current node + List[str] + List of host names of all nodes in the cluster + """ + host_name = socket.gethostname() + try: + from mpi4py import ( + MPI, + ) + except ImportError: + return host_name, [host_name] + + comm = MPI.COMM_WORLD + if comm.Get_size() == 1: + return host_name, [host_name] + host_names = comm.allgather(host_name) + return host_name, host_names diff --git a/deepmd_utils/utils/model_stat.py b/deepmd/utils/model_stat.py similarity index 100% rename from deepmd_utils/utils/model_stat.py rename to deepmd/utils/model_stat.py diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index fa9325937e..34200df007 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -1,41 +1,38 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging import math +from abc import ( + ABC, + abstractmethod, +) from typing import ( - List, + Iterator, Tuple, ) import numpy as np -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - default_tf_session_config, - op_module, - tf, -) from deepmd.utils.data_system import ( DeepmdDataSystem, ) -from deepmd.utils.parallel_op import ( - ParallelOp, -) log = logging.getLogger(__name__) -class NeighborStat: - """Class for getting training data information. +class NeighborStat(ABC): + """Abstract base class for getting training data information. - It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix. + It loads data from DeepmdData object, and measures the data info, including + neareest nbor distance between atoms, max nbor size of atoms and the output + data range of the environment matrix. Parameters ---------- - ntypes - The num of atom types - rcut - The cut-off radius - one_type : bool, optional, default=False + ntypes : int + The num of atom types + rcut : float + The cut-off radius + mixed_type : bool, optional, default=False Treat all types as a single type. """ @@ -43,55 +40,13 @@ def __init__( self, ntypes: int, rcut: float, - one_type: bool = False, + mixed_type: bool = False, ) -> None: - """Constructor.""" self.rcut = rcut self.ntypes = ntypes - self.one_type = one_type - sub_graph = tf.Graph() - - def builder(): - place_holders = {} - for ii in ["coord", "box"]: - place_holders[ii] = tf.placeholder( - GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii - ) - place_holders["type"] = tf.placeholder( - tf.int32, [None, None], name="t_type" - ) - place_holders["natoms_vec"] = tf.placeholder( - tf.int32, [self.ntypes + 2], name="t_natoms" - ) - place_holders["default_mesh"] = tf.placeholder( - tf.int32, [None], name="t_mesh" - ) - t_type = place_holders["type"] - t_natoms = place_holders["natoms_vec"] - if self.one_type: - # all types = 0, natoms_vec = [natoms, natoms, natoms] - t_type = tf.clip_by_value(t_type, -1, 0) - t_natoms = tf.tile(t_natoms[0:1], [3]) + self.mixed_type = mixed_type - _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( - place_holders["coord"], - t_type, - t_natoms, - place_holders["box"], - place_holders["default_mesh"], - rcut=self.rcut, - ) - place_holders["dir"] = tf.placeholder(tf.string) - _min_nbor_dist = tf.reduce_min(_min_nbor_dist) - _max_nbor_size = tf.reduce_max(_max_nbor_size, axis=0) - return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"]) - - with sub_graph.as_default(): - self.p = ParallelOp(builder, config=default_tf_session_config) - - self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) - - def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: + def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, np.ndarray]: """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms. Parameters @@ -104,38 +59,18 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: min_nbor_dist The nearest distance between neighbor atoms max_nbor_size - A list with ntypes integers, denotes the actual achieved max sel + An array with ntypes integers, denotes the actual achieved max sel """ - self.min_nbor_dist = 100.0 - self.max_nbor_size = [0] - if not self.one_type: - self.max_nbor_size *= self.ntypes + min_nbor_dist = 100.0 + max_nbor_size = np.zeros(1 if self.mixed_type else self.ntypes, dtype=int) - def feed(): - for ii in range(len(data.system_dirs)): - for jj in data.data_systems[ii].dirs: - data_set = data.data_systems[ii]._load_set(jj) - for kk in range(np.array(data_set["type"]).shape[0]): - yield { - "coord": np.array(data_set["coord"])[kk].reshape( - [-1, data.natoms[ii] * 3] - ), - "type": np.array(data_set["type"])[kk].reshape( - [-1, data.natoms[ii]] - ), - "natoms_vec": np.array(data.natoms_vec[ii]), - "box": np.array(data_set["box"])[kk].reshape([-1, 9]), - "default_mesh": np.array(data.default_mesh[ii]), - "dir": str(jj), - } - - for mn, dt, jj in self.p.generate(self.sub_sess, feed()): + for mn, dt, jj in self.iterator(data): if np.isinf(dt): log.warning( "Atoms with no neighbors found in %s. Please make sure it's what you expected." % jj ) - if dt < self.min_nbor_dist: + if dt < min_nbor_dist: if math.isclose(dt, 0.0, rel_tol=1e-6): # it's unexpected that the distance between two atoms is zero # zero distance will cause nan (#874) @@ -143,11 +78,27 @@ def feed(): "Some atoms are overlapping in %s. Please check your" " training data to remove duplicated atoms." % jj ) - self.min_nbor_dist = dt - self.max_nbor_size = np.maximum(mn, self.max_nbor_size) + min_nbor_dist = dt + max_nbor_size = np.maximum(mn, max_nbor_size) # do sqrt in the final - self.min_nbor_dist = math.sqrt(self.min_nbor_dist) - log.info("training data with min nbor dist: " + str(self.min_nbor_dist)) - log.info("training data with max nbor size: " + str(self.max_nbor_size)) - return self.min_nbor_dist, self.max_nbor_size + min_nbor_dist = math.sqrt(min_nbor_dist) + log.info("training data with min nbor dist: " + str(min_nbor_dist)) + log.info("training data with max nbor size: " + str(max_nbor_size)) + return min_nbor_dist, max_nbor_size + + @abstractmethod + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + mn : np.ndarray + The maximal number of neighbors + dt : float + The squared minimal distance between two atoms + jj : str + The directory of the data system + """ diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py new file mode 100644 index 0000000000..3956dac654 --- /dev/null +++ b/deepmd/utils/out_stat.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Output statistics.""" + +from typing import ( + Optional, + Tuple, +) + +import numpy as np + + +def compute_stats_from_redu( + output_redu: np.ndarray, + natoms: np.ndarray, + assigned_bias: Optional[np.ndarray] = None, + rcond: Optional[float] = None, +) -> Tuple[np.ndarray, np.ndarray]: + """Compute the output statistics. + + Given the reduced output value and the number of atoms for each atom, + compute the least-squares solution as the atomic output bais and std. + + Parameters + ---------- + output_redu + The reduced output value, shape is [nframes, ndim]. + natoms + The number of atoms for each atom, shape is [nframes, ntypes]. + assigned_bias + The assigned output bias, shape is [ntypes, ndim]. Set to nan + if not assigned. + rcond + Cut-off ratio for small singular values of a. + + Returns + ------- + np.ndarray + The computed output bias, shape is [ntypes, ndim]. + np.ndarray + The computed output std, shape is [ntypes, ndim]. + """ + output_redu = np.array(output_redu) + natoms = np.array(natoms) + # check shape + assert output_redu.ndim == 2 + assert natoms.ndim == 2 + assert output_redu.shape[0] == natoms.shape[0] # nframes + if assigned_bias is not None: + assigned_bias = np.array(assigned_bias).reshape( + natoms.shape[1], output_redu.shape[1] + ) + # compute output bias + if assigned_bias is not None: + # Atomic energies stats are incorrect if atomic energies are assigned. + # In this situation, we directly use these assigned energies instead of computing stats. + # This will make the loss decrease quickly + assigned_bias_atom_mask = ~np.isnan(assigned_bias).any(axis=1) + # assigned_bias_masked: nmask, ndim + assigned_bias_masked = assigned_bias[assigned_bias_atom_mask] + # assigned_bias_natoms: nframes, nmask + assigned_bias_natoms = natoms[:, assigned_bias_atom_mask] + # output_redu: nframes, ndim + output_redu -= np.einsum( + "ij,jk->ik", assigned_bias_natoms, assigned_bias_masked + ) + # remove assigned atom + natoms[:, assigned_bias_atom_mask] = 0 + + # computed_output_bias: ntypes, ndim + computed_output_bias, _, _, _ = np.linalg.lstsq(natoms, output_redu, rcond=rcond) + if assigned_bias is not None: + # add back assigned atom; this might not be required + computed_output_bias[assigned_bias_atom_mask] = assigned_bias_masked + # rest_redu: nframes, ndim + rest_redu = output_redu - np.einsum("ij,jk->ik", natoms, computed_output_bias) + output_std = rest_redu.std(axis=0) + return computed_output_bias, output_std + + +def compute_stats_from_atomic( + output: np.ndarray, + atype: np.ndarray, +) -> Tuple[np.ndarray, np.ndarray]: + """Compute the output statistics. + + Given the output value and the type of atoms, + compute the atomic output bais and std. + + Parameters + ---------- + output + The output value, shape is [nframes, nloc, ndim]. + atype + The type of atoms, shape is [nframes, nloc]. + + Returns + ------- + np.ndarray + The computed output bias, shape is [ntypes, ndim]. + np.ndarray + The computed output std, shape is [ntypes, ndim]. + """ + output = np.array(output) + atype = np.array(atype) + # check shape + assert output.ndim == 3 + assert atype.ndim == 2 + assert output.shape[:2] == atype.shape + # compute output bias + nframes, nloc, ndim = output.shape + ntypes = atype.max() + 1 + output_bias = np.zeros((ntypes, ndim)) + output_std = np.zeros((ntypes, ndim)) + for type_i in range(ntypes): + mask = atype == type_i + output_bias[type_i] = ( + output[mask].mean(axis=0) if output[mask].size > 0 else np.nan + ) + output_std[type_i] = ( + output[mask].std(axis=0) if output[mask].size > 0 else np.nan + ) + return output_bias, output_std diff --git a/deepmd/utils/pair_tab.py b/deepmd/utils/pair_tab.py index 1a526ac5fc..1b397a3cfa 100644 --- a/deepmd/utils/pair_tab.py +++ b/deepmd/utils/pair_tab.py @@ -1,9 +1,279 @@ +#!/usr/bin/env python3 + # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.pair_tab import ( - PairTab, +import logging +from typing import ( + Optional, + Tuple, +) + +import numpy as np +from scipy.interpolate import ( + CubicSpline, +) + +from deepmd.utils.version import ( + check_version_compatibility, ) -__all__ = [ - "PairTab", -] +log = logging.getLogger(__name__) + + +class PairTab: + """Pairwise tabulated potential. + + Parameters + ---------- + filename + File name for the short-range tabulated potential. + The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. + The first colume is the distance between atoms. + The second to the last columes are energies for pairs of certain types. + For example we have two atom types, 0 and 1. + The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly. + """ + + def __init__(self, filename: str, rcut: Optional[float] = None) -> None: + """Constructor.""" + self.reinit(filename, rcut) + + def reinit(self, filename: str, rcut: Optional[float] = None) -> None: + """Initialize the tabulated interaction. + + Parameters + ---------- + filename + File name for the short-range tabulated potential. + The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. + The first colume is the distance between atoms. + The second to the last columes are energies for pairs of certain types. + For example we have two atom types, 0 and 1. + The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly. + """ + if filename is None: + self.tab_info, self.tab_data = None, None + return + self.vdata = np.loadtxt(filename) + self.rmin = self.vdata[0][0] + self.rmax = self.vdata[-1][0] + self.hh = self.vdata[1][0] - self.vdata[0][0] + ncol = self.vdata.shape[1] - 1 + n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5 + self.ntypes = int(n0 + 0.1) + assert self.ntypes * (self.ntypes + 1) // 2 == ncol, ( + "number of volumes provided in %s does not match guessed number of types %d" + % (filename, self.ntypes) + ) + + # check table data against rcut and update tab_file if needed, table upper boundary is used as rcut if not provided. + self.rcut = rcut if rcut is not None else self.rmax + self._check_table_upper_boundary() + self.nspline = ( + self.vdata.shape[0] - 1 + ) # this nspline is updated based on the expanded table. + self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes]) + self.tab_data = self._make_data() + + def serialize(self) -> dict: + return { + "@class": "PairTab", + "@version": 1, + "rmin": self.rmin, + "rmax": self.rmax, + "hh": self.hh, + "ntypes": self.ntypes, + "rcut": self.rcut, + "nspline": self.nspline, + "@variables": { + "vdata": self.vdata, + "tab_info": self.tab_info, + "tab_data": self.tab_data, + }, + } + + @classmethod + def deserialize(cls, data) -> "PairTab": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("@class") + variables = data.pop("@variables") + tab = PairTab(None, None) + tab.vdata = variables["vdata"] + tab.rmin = data["rmin"] + tab.rmax = data["rmax"] + tab.hh = data["hh"] + tab.ntypes = data["ntypes"] + tab.rcut = data["rcut"] + tab.nspline = data["nspline"] + tab.tab_info = variables["tab_info"] + tab.tab_data = variables["tab_data"] + return tab + + def _check_table_upper_boundary(self) -> None: + """Update User Provided Table Based on `rcut`. + + This function checks the upper boundary provided in the table against rcut. + If the table upper boundary values decay to zero before rcut, padding zeros will + be added to the table to cover rcut; if the table upper boundary values do not decay to zero + before ruct, extrapolation will be performed till rcut. + + Examples + -------- + table = [[0.005 1. 2. 3. ] + [0.01 0.8 1.6 2.4 ] + [0.015 0. 1. 1.5 ]] + + rcut = 0.022 + + new_table = [[0.005 1. 2. 3. ] + [0.01 0.8 1.6 2.4 ] + [0.015 0. 1. 1.5 ] + [0.02 0. 0. 0. ] + + ---------------------------------------------- + + table = [[0.005 1. 2. 3. ] + [0.01 0.8 1.6 2.4 ] + [0.015 0.5 1. 1.5 ] + [0.02 0.25 0.4 0.75 ] + [0.025 0. 0.1 0. ] + [0.03 0. 0. 0. ]] + + rcut = 0.031 + + new_table = [[0.005 1. 2. 3. ] + [0.01 0.8 1.6 2.4 ] + [0.015 0.5 1. 1.5 ] + [0.02 0.25 0.4 0.75 ] + [0.025 0. 0.1 0. ] + [0.03 0. 0. 0. ] + [0.035 0. 0. 0. ]] + """ + upper_val = self.vdata[-1][1:] + upper_idx = self.vdata.shape[0] - 1 + self.ncol = self.vdata.shape[1] + + # the index in table for the grid point of rcut, always give the point after rcut. + rcut_idx = int(np.ceil(self.rcut / self.hh - self.rmin / self.hh)) + if np.all(upper_val == 0): + # if table values decay to `0` after rcut + if self.rcut < self.rmax and np.any(self.vdata[rcut_idx - 1][1:] != 0): + log.warning( + "The energy provided in the table does not decay to 0 at rcut." + ) + # if table values decay to `0` at rcut, do nothing + + # if table values decay to `0` before rcut, pad table with `0`s. + elif self.rcut > self.rmax: + pad_zero = np.zeros((rcut_idx - upper_idx, self.ncol)) + pad_zero[:, 0] = np.linspace( + self.rmax + self.hh, + self.rmax + self.hh * (rcut_idx - upper_idx), + rcut_idx - upper_idx, + ) + self.vdata = np.concatenate((self.vdata, pad_zero), axis=0) + else: + # if table values do not decay to `0` at rcut + if self.rcut <= self.rmax: + log.warning( + "The energy provided in the table does not decay to 0 at rcut." + ) + # if rcut goes beyond table upper bond, need extrapolation, ensure values decay to `0` before rcut. + else: + log.warning( + "The rcut goes beyond table upper boundary, performing extrapolation." + ) + pad_extrapolation = np.zeros((rcut_idx - upper_idx, self.ncol)) + + pad_extrapolation[:, 0] = np.linspace( + self.rmax + self.hh, + self.rmax + self.hh * (rcut_idx - upper_idx), + rcut_idx - upper_idx, + ) + # need to calculate table values to fill in with cubic spline + pad_extrapolation = self._extrapolate_table(pad_extrapolation) + + self.vdata = np.concatenate((self.vdata, pad_extrapolation), axis=0) + + def get(self) -> Tuple[np.array, np.array]: + """Get the serialized table.""" + return self.tab_info, self.tab_data + + def _extrapolate_table(self, pad_extrapolation: np.array) -> np.array: + """Soomth extrapolation between table upper boundary and rcut. + + This method should only be used when the table upper boundary `rmax` is smaller than `rcut`, and + the table upper boundary values are not zeros. To simplify the problem, we use a single + cubic spline between `rmax` and `rcut` for each pair of atom types. One can substitute this extrapolation + to higher order polynomials if needed. + + There are two scenarios: + 1. `ruct` - `rmax` >= hh: + Set values at the grid point right before `rcut` to 0, and perform exterapolation between + the grid point and `rmax`, this allows smooth decay to 0 at `rcut`. + 2. `rcut` - `rmax` < hh: + Set values at `rmax + hh` to 0, and perform extrapolation between `rmax` and `rmax + hh`. + + Parameters + ---------- + pad_extrapolation : np.array + The emepty grid that holds the extrapolation values. + + Returns + ------- + np.array + The cubic spline extrapolation. + """ + # in theory we should check if the table has at least two rows. + slope = self.vdata[-1, 1:] - self.vdata[-2, 1:] # shape of (ncol-1, ) + + # for extrapolation, we want values decay to `0` prior to `ruct` if possible + # here we try to find the grid point prior to `rcut` + grid_point = ( + -2 if pad_extrapolation[-1, 0] / self.hh - self.rmax / self.hh >= 2 else -1 + ) + temp_grid = np.stack((self.vdata[-1, :], pad_extrapolation[grid_point, :])) + vv = temp_grid[:, 1:] + xx = temp_grid[:, 0] + cs = CubicSpline(xx, vv, bc_type=((1, slope), (1, np.zeros_like(slope)))) + xx_grid = pad_extrapolation[:, 0] + res = cs(xx_grid) + + pad_extrapolation[:, 1:] = res + + # Note: when doing cubic spline, if we want to ensure values decay to zero prior to `rcut` + # this may cause values be positive post `rcut`, we need to overwrite those values to zero + pad_extrapolation = ( + pad_extrapolation if grid_point == -1 else pad_extrapolation[:-1, :] + ) + return pad_extrapolation + + def _make_data(self): + data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline]) + stride = 4 * self.nspline + idx_iter = 0 + xx = self.vdata[:, 0] + for t0 in range(self.ntypes): + for t1 in range(t0, self.ntypes): + vv = self.vdata[:, 1 + idx_iter] + cs = CubicSpline(xx, vv, bc_type="clamped") + dd = cs(xx, 1) + dd *= self.hh + dtmp = np.zeros(stride) + for ii in range(self.nspline): + dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1] + dtmp[ii * 4 + 1] = ( + -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1] + ) + dtmp[ii * 4 + 2] = dd[ii] + dtmp[ii * 4 + 3] = vv[ii] + data[ + (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride + + stride + ] = dtmp + data[ + (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride + + stride + ] = dtmp + idx_iter += 1 + return data diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py index 780bc8cabf..858e31a39d 100644 --- a/deepmd/utils/path.py +++ b/deepmd/utils/path.py @@ -1,13 +1,476 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.path import ( - DPH5Path, - DPOSPath, - DPPath, -) - -__all__ = [ - "DPPath", - "DPOSPath", - "DPH5Path", -] +import os +from abc import ( + ABC, + abstractmethod, +) +from functools import ( + lru_cache, +) +from pathlib import ( + Path, +) +from typing import ( + ClassVar, + Dict, + List, + Optional, +) + +import h5py +import numpy as np +from wcmatch.glob import ( + globfilter, +) + + +class DPPath(ABC): + """The path class to data system (DeepmdData). + + Parameters + ---------- + path : str + path + mode : str, optional + mode, by default "r" + """ + + def __new__(cls, path: str, mode: str = "r"): + if cls is DPPath: + if os.path.isdir(path): + return super().__new__(DPOSPath) + elif os.path.isfile(path.split("#")[0]): + # assume h5 if it is not dir + return super().__new__(DPH5Path) + raise FileNotFoundError("%s not found" % path) + return super().__new__(cls) + + @abstractmethod + def load_numpy(self) -> np.ndarray: + """Load NumPy array. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + + @abstractmethod + def load_txt(self, **kwargs) -> np.ndarray: + """Load NumPy array from text. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + + @abstractmethod + def save_numpy(self, arr: np.ndarray) -> None: + """Save NumPy array. + + Parameters + ---------- + arr : np.ndarray + NumPy array + """ + + @abstractmethod + def glob(self, pattern: str) -> List["DPPath"]: + """Search path using the glob pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + + @abstractmethod + def rglob(self, pattern: str) -> List["DPPath"]: + """This is like calling :meth:`DPPath.glob()` with `**/` added in front + of the given relative pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + + @abstractmethod + def is_file(self) -> bool: + """Check if self is file.""" + + @abstractmethod + def is_dir(self) -> bool: + """Check if self is directory.""" + + @abstractmethod + def __truediv__(self, key: str) -> "DPPath": + """Used for / operator.""" + + @abstractmethod + def __lt__(self, other: "DPPath") -> bool: + """Whether this DPPath is less than other for sorting.""" + + @abstractmethod + def __str__(self) -> str: + """Represent string.""" + + def __repr__(self) -> str: + return f"{type(self)} ({self!s})" + + def __eq__(self, other) -> bool: + return str(self) == str(other) + + def __hash__(self): + return hash(str(self)) + + @property + @abstractmethod + def name(self) -> str: + """Name of the path.""" + + @abstractmethod + def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: + """Make directory. + + Parameters + ---------- + parents : bool, optional + If true, any missing parents of this directory are created as well. + exist_ok : bool, optional + If true, no error will be raised if the target directory already exists. + """ + + +class DPOSPath(DPPath): + """The OS path class to data system (DeepmdData) for real directories. + + Parameters + ---------- + path : str + path + mode : str, optional + mode, by default "r" + """ + + def __init__(self, path: str, mode: str = "r") -> None: + super().__init__() + self.mode = mode + if isinstance(path, Path): + self.path = path + else: + self.path = Path(path) + + def load_numpy(self) -> np.ndarray: + """Load NumPy array. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + return np.load(str(self.path)) + + def load_txt(self, **kwargs) -> np.ndarray: + """Load NumPy array from text. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + return np.loadtxt(str(self.path), **kwargs) + + def save_numpy(self, arr: np.ndarray) -> None: + """Save NumPy array. + + Parameters + ---------- + arr : np.ndarray + NumPy array + """ + if self.mode == "r": + raise ValueError("Cannot save to read-only path") + with self.path.open("wb") as f: + np.save(f, arr) + + def glob(self, pattern: str) -> List["DPPath"]: + """Search path using the glob pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + # currently DPOSPath will only derivative DPOSPath + return [type(self)(p, mode=self.mode) for p in self.path.glob(pattern)] + + def rglob(self, pattern: str) -> List["DPPath"]: + """This is like calling :meth:`DPPath.glob()` with `**/` added in front + of the given relative pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + return [type(self)(p, mode=self.mode) for p in self.path.rglob(pattern)] + + def is_file(self) -> bool: + """Check if self is file.""" + return self.path.is_file() + + def is_dir(self) -> bool: + """Check if self is directory.""" + return self.path.is_dir() + + def __truediv__(self, key: str) -> "DPPath": + """Used for / operator.""" + return type(self)(self.path / key, mode=self.mode) + + def __lt__(self, other: "DPOSPath") -> bool: + """Whether this DPPath is less than other for sorting.""" + return self.path < other.path + + def __str__(self) -> str: + """Represent string.""" + return str(self.path) + + @property + def name(self) -> str: + """Name of the path.""" + return self.path.name + + def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: + """Make directory. + + Parameters + ---------- + parents : bool, optional + If true, any missing parents of this directory are created as well. + exist_ok : bool, optional + If true, no error will be raised if the target directory already exists. + """ + if self.mode == "r": + raise ValueError("Cannot mkdir to read-only path") + self.path.mkdir(parents=parents, exist_ok=exist_ok) + + +class DPH5Path(DPPath): + """The path class to data system (DeepmdData) for HDF5 files. + + Notes + ----- + OS - HDF5 relationship: + directory - Group + file - Dataset + + Parameters + ---------- + path : str + path + mode : str, optional + mode, by default "r" + """ + + def __init__(self, path: str, mode: str = "r") -> None: + super().__init__() + self.mode = mode + # we use "#" to split path + # so we do not support file names containing #... + s = path.split("#") + self.root_path = s[0] + self.root = self._load_h5py(s[0], mode) + # h5 path: default is the root path + self._name = s[1] if len(s) > 1 else "/" + + @classmethod + @lru_cache(None) + def _load_h5py(cls, path: str, mode: str = "r") -> h5py.File: + """Load hdf5 file. + + Parameters + ---------- + path : str + path to hdf5 file + mode : str, optional + mode, by default 'r' + """ + # this method has cache to avoid duplicated + # loading from different DPH5Path + # However the file will be never closed? + return h5py.File(path, mode) + + def load_numpy(self) -> np.ndarray: + """Load NumPy array. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + return self.root[self._name][:] + + def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray: + """Load NumPy array from text. + + Returns + ------- + np.ndarray + loaded NumPy array + """ + arr = self.load_numpy() + if dtype: + arr = arr.astype(dtype) + return arr + + def save_numpy(self, arr: np.ndarray) -> None: + """Save NumPy array. + + Parameters + ---------- + arr : np.ndarray + NumPy array + """ + if self._name in self._keys: + del self.root[self._name] + self.root.create_dataset(self._name, data=arr) + self.root.flush() + self._new_keys.append(self._name) + + def glob(self, pattern: str) -> List["DPPath"]: + """Search path using the glob pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + # got paths starts with current path first, which is faster + subpaths = [ii for ii in self._keys if ii.startswith(self._name)] + return [ + type(self)(f"{self.root_path}#{pp}", mode=self.mode) + for pp in globfilter(subpaths, self._connect_path(pattern)) + ] + + def rglob(self, pattern: str) -> List["DPPath"]: + """This is like calling :meth:`DPPath.glob()` with `**/` added in front + of the given relative pattern. + + Parameters + ---------- + pattern : str + glob pattern + + Returns + ------- + List[DPPath] + list of paths + """ + return self.glob("**" + pattern) + + @property + def _keys(self) -> List[str]: + """Walk all groups and dataset.""" + return self._file_keys(self.root) + + __file_new_keys: ClassVar[Dict[h5py.File, List[str]]] = {} + + @property + def _new_keys(self): + """New keys that haven't been cached.""" + self.__file_new_keys.setdefault(self.root, []) + return self.__file_new_keys[self.root] + + @classmethod + @lru_cache(None) + def _file_keys(cls, file: h5py.File) -> List[str]: + """Walk all groups and dataset.""" + l = [] + file.visit(lambda x: l.append("/" + x)) + return l + + def is_file(self) -> bool: + """Check if self is file.""" + if self._name not in self._keys and self._name not in self._new_keys: + return False + return isinstance(self.root[self._name], h5py.Dataset) + + def is_dir(self) -> bool: + """Check if self is directory.""" + if self._name == "/": + return True + if self._name not in self._keys and self._name not in self._new_keys: + return False + return isinstance(self.root[self._name], h5py.Group) + + def __truediv__(self, key: str) -> "DPPath": + """Used for / operator.""" + return type(self)(f"{self.root_path}#{self._connect_path(key)}", mode=self.mode) + + def _connect_path(self, path: str) -> str: + """Connect self with path.""" + if self._name.endswith("/"): + return f"{self._name}{path}" + return f"{self._name}/{path}" + + def __lt__(self, other: "DPH5Path") -> bool: + """Whether this DPPath is less than other for sorting.""" + if self.root_path == other.root_path: + return self._name < other._name + return self.root_path < other.root_path + + def __str__(self) -> str: + """Returns path of self.""" + return f"{self.root_path}#{self._name}" + + @property + def name(self) -> str: + """Name of the path.""" + return self._name.split("/")[-1] + + def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: + """Make directory. + + Parameters + ---------- + parents : bool, optional + If true, any missing parents of this directory are created as well. + exist_ok : bool, optional + If true, no error will be raised if the target directory already exists. + """ + if self._name in self._keys: + if not exist_ok: + raise FileExistsError(f"{self} already exists") + return + if parents: + self.root.require_group(self._name) + else: + self.root.create_group(self._name) + self._new_keys.append(self._name) diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py index 3b5b297304..22f315f63d 100644 --- a/deepmd/utils/plugin.py +++ b/deepmd/utils/plugin.py @@ -1,15 +1,161 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.plugin import ( - Plugin, - PluginVariant, - VariantABCMeta, - VariantMeta, +"""Base of plugin systems.""" +# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py + +import difflib +from abc import ( + ABCMeta, +) +from typing import ( + Callable, + Dict, + Optional, + Type, ) -__all__ = [ - "Plugin", - "VariantMeta", - "VariantABCMeta", - "PluginVariant", -] + +class Plugin: + """A class to register and restore plugins. + + Attributes + ---------- + plugins : Dict[str, object] + plugins + + Examples + -------- + >>> plugin = Plugin() + >>> @plugin.register("xx") + def xxx(): + pass + >>> print(plugin.plugins["xx"]) + """ + + def __init__(self): + self.plugins = {} + + def __add__(self, other) -> "Plugin": + self.plugins.update(other.plugins) + return self + + def register(self, key: str) -> Callable[[object], object]: + """Register a plugin. + + Parameters + ---------- + key : str + key of the plugin + + Returns + ------- + Callable[[object], object] + decorator + """ + + def decorator(object: object) -> object: + self.plugins[key] = object + return object + + return decorator + + def get_plugin(self, key) -> object: + """Visit a plugin by key. + + Parameters + ---------- + key : str + key of the plugin + + Returns + ------- + object + the plugin + """ + return self.plugins[key] + + +class VariantMeta: + def __call__(cls, *args, **kwargs): + """Remove `type` and keys that starts with underline.""" + obj = cls.__new__(cls, *args, **kwargs) + kwargs.pop("type", None) + to_pop = [] + for kk in kwargs: + if kk[0] == "_": + to_pop.append(kk) + for kk in to_pop: + kwargs.pop(kk, None) + obj.__init__(*args, **kwargs) + return obj + + +class VariantABCMeta(VariantMeta, ABCMeta): + pass + + +class PluginVariant(metaclass=VariantABCMeta): + """A class to remove `type` from input arguments.""" + + pass + + +def make_plugin_registry(name: Optional[str] = None) -> Type[object]: + """Make a plugin registry. + + Parameters + ---------- + name : Optional[str] + the name of the registry for the error message, e.g. descriptor, backend, etc. + + Examples + -------- + >>> class BaseClass(make_plugin_registry()): + pass + """ + if name is None: + name = "class" + + class PR: + __plugins = Plugin() + + @staticmethod + def register(key: str) -> Callable[[object], object]: + """Register a descriptor plugin. + + Parameters + ---------- + key : str + the key of a descriptor + + Returns + ------- + callable[[object], object] + the registered descriptor + + Examples + -------- + >>> @BaseClass.register("some_class") + class SomeClass(BaseClass): + pass + """ + return PR.__plugins.register(key) + + @classmethod + def get_class_by_type(cls, class_type: str) -> Type[object]: + """Get the class by the plugin type.""" + if class_type in PR.__plugins.plugins: + return PR.__plugins.plugins[class_type] + else: + # did you mean + matches = difflib.get_close_matches( + class_type, PR.__plugins.plugins.keys() + ) + dym_message = f"Did you mean: {matches[0]}?" if matches else "" + raise RuntimeError(f"Unknown {name} type: {class_type}. {dym_message}") + + @classmethod + def get_plugins(cls) -> Dict[str, Type[object]]: + """Get all the registered plugins.""" + return PR.__plugins.plugins + + return PR diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py index 09547eeac9..44ea6a1dac 100644 --- a/deepmd/utils/random.py +++ b/deepmd/utils/random.py @@ -1,15 +1,82 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.random import ( - choice, - random, - seed, - shuffle, +from typing import ( + Optional, + Tuple, + Union, ) -__all__ = [ - "choice", - "random", - "seed", - "shuffle", -] +import numpy as np + +_RANDOM_GENERATOR = np.random.RandomState() + + +def choice( + a: Union[np.ndarray, int], + size: Optional[Union[int, Tuple[int, ...]]] = None, + replace: bool = True, + p: Optional[np.ndarray] = None, +): + """Generates a random sample from a given 1-D array. + + Parameters + ---------- + a : 1-D array-like or int + If an ndarray, a random sample is generated from its elements. If an int, + the random sample is generated as if it were np.arange(a) + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples + are drawn. Default is None, in which case a single value is returned. + replace : boolean, optional + Whether the sample is with or without replacement. Default is True, meaning + that a value of a can be selected multiple times. + p : 1-D array-like, optional + The probabilities associated with each entry in a. If not given, the sample + assumes a uniform distribution over all entries in a. + + Returns + ------- + np.ndarray + arrays with results and their shapes + """ + return _RANDOM_GENERATOR.choice(a, size=size, replace=replace, p=p) + + +def random(size=None): + """Return random floats in the half-open interval [0.0, 1.0). + + Parameters + ---------- + size + Output shape. + + Returns + ------- + np.ndarray + Arrays with results and their shapes. + """ + return _RANDOM_GENERATOR.random_sample(size) + + +def seed(val: Optional[int] = None): + """Seed the generator. + + Parameters + ---------- + val : int + Seed. + """ + _RANDOM_GENERATOR.seed(val) + + +def shuffle(x: np.ndarray): + """Modify a sequence in-place by shuffling its contents. + + Parameters + ---------- + x : np.ndarray + The array or list to be shuffled. + """ + _RANDOM_GENERATOR.shuffle(x) + + +__all__ = ["choice", "random", "seed", "shuffle"] diff --git a/deepmd/utils/spin.py b/deepmd/utils/spin.py index 7820627649..38e8da48da 100644 --- a/deepmd/utils/spin.py +++ b/deepmd/utils/spin.py @@ -1,87 +1,199 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import copy from typing import ( List, - Optional, + Tuple, + Union, ) -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - tf, -) +import numpy as np class Spin: - """Class for spin. + """Class for spin, mainly processes the spin type-related information. + Atom types can be split into three kinds: + 1. Real types: real atom species, "Fe", "H", "O", etc. + 2. Spin types: atom species with spin, as virtual atoms in input, "Fe_spin", etc. + 3. Placeholder types: atom species without spin, as placeholders in input without contribution, + also name "H_spin", "O_spin", etc. + For any types in 2. or 3., the type index is `ntypes` plus index of its corresponding real type. Parameters ---------- - use_spin - Whether to use atomic spin model for each atom type - spin_norm - The magnitude of atomic spin for each atom type with spin - virtual_len - The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin + use_spin: List[bool] + A list of boolean values indicating whether to use atomic spin for each atom type. + True for spin and False for not. List of bool values with shape of [ntypes]. + virtual_scale: List[float], float + The scaling factor to determine the virtual distance + between a virtual atom representing spin and its corresponding real atom + for each atom type with spin. This factor is defined as the virtual distance + divided by the magnitude of atomic spin for each atom type with spin. + The virtual coordinate is defined as the real coordinate plus spin * virtual_scale. + List of float values with shape of [ntypes] or [ntypes_spin] or one single float value for all types, + only used when use_spin is True for each atom type. """ def __init__( self, - use_spin: Optional[List[bool]] = None, - spin_norm: Optional[List[float]] = None, - virtual_len: Optional[List[float]] = None, + use_spin: List[bool], + virtual_scale: Union[List[float], float], ) -> None: - """Constructor.""" - self.use_spin = use_spin - self.spin_norm = spin_norm - self.virtual_len = virtual_len - self.ntypes_spin = self.use_spin.count(True) + self.ntypes_real = len(use_spin) + self.ntypes_spin = use_spin.count(True) + self.use_spin = np.array(use_spin) + self.spin_mask = self.use_spin.astype(np.int64) + self.ntypes_real_and_spin = self.ntypes_real + self.ntypes_spin + self.ntypes_placeholder = self.ntypes_real - self.ntypes_spin + self.ntypes_input = 2 * self.ntypes_real # with placeholder for input types + self.real_type = np.arange(self.ntypes_real) + self.spin_type = np.arange(self.ntypes_real)[self.use_spin] + self.ntypes_real + self.real_and_spin_type = np.concatenate([self.real_type, self.spin_type]) + self.placeholder_type = ( + np.arange(self.ntypes_real)[~self.use_spin] + self.ntypes_real + ) + self.spin_placeholder_type = np.arange(self.ntypes_real) + self.ntypes_real + self.input_type = np.arange(self.ntypes_real * 2) + if isinstance(virtual_scale, list): + if len(virtual_scale) == self.ntypes_real: + self.virtual_scale = virtual_scale + elif len(virtual_scale) == self.ntypes_spin: + self.virtual_scale = np.zeros(self.ntypes_real) + self.virtual_scale[self.use_spin] = virtual_scale + else: + raise ValueError( + f"Invalid length of virtual_scale for spin atoms" + f": Expected {self.ntypes_real} or { self.ntypes_spin} but got {len(virtual_scale)}!" + ) + elif isinstance(virtual_scale, float): + self.virtual_scale = [virtual_scale for _ in range(self.ntypes_real)] + else: + raise ValueError(f"Invalid virtual scale type: {type(virtual_scale)}") + self.virtual_scale = np.array(self.virtual_scale) + self.virtual_scale_mask = (self.virtual_scale * self.use_spin).reshape([-1]) + self.pair_exclude_types = [] + self.init_pair_exclude_types_placeholder() + self.atom_exclude_types_ps = [] + self.init_atom_exclude_types_placeholder_spin() + self.atom_exclude_types_p = [] + self.init_atom_exclude_types_placeholder() - def build( - self, - reuse=None, - suffix="", - ): - """Build the computational graph for the spin. - - Parameters - ---------- - reuse - The weights in the networks should be reused when get the variable. - suffix - Name suffix to identify this descriptor - - Returns - ------- - embedded_types - The computational graph for embedded types - """ - name = "spin_attr" + suffix - with tf.variable_scope(name, reuse=reuse): - t_ntypes_spin = tf.constant( - self.ntypes_spin, name="ntypes_spin", dtype=tf.int32 - ) - t_virtual_len = tf.constant( - self.virtual_len, - name="virtual_len", - dtype=GLOBAL_TF_FLOAT_PRECISION, - ) - t_spin_norm = tf.constant( - self.spin_norm, - name="spin_norm", - dtype=GLOBAL_TF_FLOAT_PRECISION, - ) + def get_ntypes_real(self) -> int: + """Returns the number of real atom types.""" + return self.ntypes_real def get_ntypes_spin(self) -> int: """Returns the number of atom types which contain spin.""" return self.ntypes_spin + def get_ntypes_real_and_spin(self) -> int: + """Returns the number of real atom types and types which contain spin.""" + return self.ntypes_real_and_spin + + def get_ntypes_input(self) -> int: + """Returns the number of double real atom types for input placeholder.""" + return self.ntypes_input + def get_use_spin(self) -> List[bool]: """Returns the list of whether to use spin for each atom type.""" return self.use_spin - def get_spin_norm(self) -> List[float]: + def get_virtual_scale(self) -> np.ndarray: """Returns the list of magnitude of atomic spin for each atom type.""" - return self.spin_norm + return self.virtual_scale + + def init_pair_exclude_types_placeholder(self) -> None: + """ + Initialize the pair-wise exclusion types for descriptor. + The placeholder types for those without spin are excluded. + """ + ti_grid, tj_grid = np.meshgrid( + self.placeholder_type, self.input_type, indexing="ij" + ) + self.pair_exclude_types = ( + np.stack((ti_grid, tj_grid), axis=-1).reshape(-1, 2).tolist() + ) + + def init_atom_exclude_types_placeholder_spin(self) -> None: + """ + Initialize the atom-wise exclusion types for fitting. + Both the placeholder types and spin types are excluded. + """ + self.atom_exclude_types_ps = self.spin_placeholder_type.tolist() + + def init_atom_exclude_types_placeholder(self) -> None: + """ + Initialize the atom-wise exclusion types for fitting. + The placeholder types for those without spin are excluded. + """ + self.atom_exclude_types_p = self.placeholder_type.tolist() + + def get_pair_exclude_types(self, exclude_types=None) -> List[Tuple[int, int]]: + """ + Return the pair-wise exclusion types for descriptor. + The placeholder types for those without spin are excluded. + """ + if exclude_types is None: + return self.pair_exclude_types + else: + _exclude_types: List[Tuple[int, int]] = copy.deepcopy( + self.pair_exclude_types + ) + for tt in exclude_types: + assert len(tt) == 2 + _exclude_types.append((tt[0], tt[1])) + return _exclude_types + + def get_atom_exclude_types(self, exclude_types=None) -> List[int]: + """ + Return the atom-wise exclusion types for fitting before out_def. + Both the placeholder types and spin types are excluded. + """ + if exclude_types is None: + return self.atom_exclude_types_ps + else: + _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_ps) + _exclude_types += exclude_types + _exclude_types = list(set(_exclude_types)) + return _exclude_types + + def get_atom_exclude_types_placeholder(self, exclude_types=None) -> List[int]: + """ + Return the atom-wise exclusion types for fitting after out_def. + The placeholder types for those without spin are excluded. + """ + if exclude_types is None: + return self.atom_exclude_types_p + else: + _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_p) + _exclude_types += exclude_types + _exclude_types = list(set(_exclude_types)) + return _exclude_types + + def get_spin_mask(self): + """ + Return the spin mask of shape [ntypes], + with spin types being 1, and non-spin types being 0. + """ + return self.spin_mask + + def get_virtual_scale_mask(self): + """ + Return the virtual scale mask of shape [ntypes], + with spin types being its virtual scale, and non-spin types being 0. + """ + return self.virtual_scale_mask + + def serialize( + self, + ) -> dict: + return { + "use_spin": self.use_spin.tolist(), + "virtual_scale": self.virtual_scale.tolist(), + } - def get_virtual_len(self) -> List[float]: - """Returns the list of distance between real atom and virtual atom for each atom type.""" - return self.virtual_len + @classmethod + def deserialize( + cls, + data: dict, + ) -> "Spin": + return cls(**data) diff --git a/deepmd/utils/summary.py b/deepmd/utils/summary.py new file mode 100644 index 0000000000..e2118bf7e0 --- /dev/null +++ b/deepmd/utils/summary.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +import os +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + ClassVar, +) + +import deepmd +from deepmd.env import ( + GLOBAL_CONFIG, + get_default_nthreads, + global_float_prec, +) +from deepmd.utils.hostlist import ( + get_host_names, +) + +log = logging.getLogger(__name__) + + +class SummaryPrinter(ABC): + """Base summary printer. + + Backends should inherit from this class and implement the abstract methods. + """ + + # http://patorjk.com/software/taag. Font:Big" + WELCOME = ( + r" _____ _____ __ __ _____ _ _ _ ", + r"| __ \ | __ \ | \/ || __ \ | | (_)| | ", + r"| | | | ___ ___ | |__) || \ / || | | | ______ | | __ _ | |_ ", + r"| | | | / _ \ / _ \| ___/ | |\/| || | | ||______|| |/ /| || __|", + r"| |__| || __/| __/| | | | | || |__| | | < | || |_ ", + r"|_____/ \___| \___||_| |_| |_||_____/ |_|\_\|_| \__|", + ) + + CITATION = ( + "Please read and cite:", + "Wang, Zhang, Han and E, Comput.Phys.Comm. 228, 178-184 (2018)", + "Zeng et al, J. Chem. Phys., 159, 054801 (2023)", + "See https://deepmd.rtfd.io/credits/ for details.", + ) + + BUILD: ClassVar = { + "installed to": "\n".join(deepmd.__path__), + "source": GLOBAL_CONFIG["git_summ"], + "source brach": GLOBAL_CONFIG["git_branch"], + "source commit": GLOBAL_CONFIG["git_hash"], + "source commit at": GLOBAL_CONFIG["git_date"], + "use float prec": global_float_prec, + "build variant": GLOBAL_CONFIG["dp_variant"], + } + + def __call__(self): + """Print build and current running cluster configuration summary.""" + nodename, nodelist = get_host_names() + build_info = self.BUILD.copy() + build_info.update(self.get_backend_info()) + if len(nodelist) > 1: + build_info.update( + { + "world size": str(len(nodelist)), + "node list": ", ".join(set(nodelist)), + } + ) + build_info.update( + { + "running on": nodename, + "computing device": self.get_compute_device(), + } + ) + if self.is_built_with_cuda(): + env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "unset") + build_info["CUDA_VISIBLE_DEVICES"] = env_value + if self.is_built_with_rocm(): + env_value = os.environ.get("HIP_VISIBLE_DEVICES", "unset") + build_info["HIP_VISIBLE_DEVICES"] = env_value + if self.is_built_with_cuda() or self.is_built_with_rocm(): + build_info["Count of visible GPUs"] = str(self.get_ngpus()) + + intra, inter = get_default_nthreads() + build_info.update( + { + "num_intra_threads": str(intra), + "num_inter_threads": str(inter), + } + ) + # count the maximum characters in the keys and values + max_key_len = max(len(k) for k in build_info) + 2 + max_val_len = max( + len(x) for v in build_info.values() for x in str(v).split("\n") + ) + # print the summary + for line in self.WELCOME + self.CITATION: + log.info(line) + log.info("-" * (max_key_len + max_val_len)) + for kk, vv in build_info.items(): + for iline, vline in enumerate(str(vv).split("\n")): + if iline == 0: + log.info(f"{kk + ': ':<{max_key_len}}{vline}") + else: + log.info(f"{'':<{max_key_len}}{vline}") + log.info("-" * (max_key_len + max_val_len)) + + @abstractmethod + def is_built_with_cuda(self) -> bool: + """Check if the backend is built with CUDA.""" + + @abstractmethod + def is_built_with_rocm(self) -> bool: + """Check if the backend is built with ROCm.""" + + @abstractmethod + def get_compute_device(self) -> str: + """Get Compute device.""" + + @abstractmethod + def get_ngpus(self) -> int: + """Get the number of GPUs.""" + + def get_backend_info(self) -> dict: + """Get backend information.""" + return {} diff --git a/deepmd/utils/update_sel.py b/deepmd/utils/update_sel.py new file mode 100644 index 0000000000..d1be8e8138 --- /dev/null +++ b/deepmd/utils/update_sel.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from abc import ( + abstractmethod, +) +from typing import ( + Type, +) + +from deepmd.utils.data_system import ( + get_data, +) +from deepmd.utils.neighbor_stat import ( + NeighborStat, +) + +log = logging.getLogger(__name__) + + +class BaseUpdateSel: + """Update the sel field in the descriptor.""" + + def update_one_sel( + self, + jdata, + descriptor, + mixed_type: bool = False, + rcut_key="rcut", + sel_key="sel", + ): + rcut = descriptor[rcut_key] + tmp_sel = self.get_sel( + jdata, + rcut, + mixed_type=mixed_type, + ) + sel = descriptor[sel_key] + if isinstance(sel, int): + # convert to list and finnally convert back to int + sel = [sel] + if self.parse_auto_sel(descriptor[sel_key]): + ratio = self.parse_auto_sel_ratio(descriptor[sel_key]) + descriptor[sel_key] = sel = [ + int(self.wrap_up_4(ii * ratio)) for ii in tmp_sel + ] + else: + # sel is set by user + for ii, (tt, dd) in enumerate(zip(tmp_sel, sel)): + if dd and tt > dd: + # we may skip warning for sel=0, where the user is likely + # to exclude such type in the descriptor + log.warning( + "sel of type %d is not enough! The expected value is " + "not less than %d, but you set it to %d. The accuracy" + " of your model may get worse." % (ii, tt, dd) + ) + if mixed_type: + descriptor[sel_key] = sum(sel) + return descriptor + + def parse_auto_sel(self, sel): + if not isinstance(sel, str): + return False + words = sel.split(":") + if words[0] == "auto": + return True + else: + return False + + def parse_auto_sel_ratio(self, sel): + if not self.parse_auto_sel(sel): + raise RuntimeError(f"invalid auto sel format {sel}") + else: + words = sel.split(":") + if len(words) == 1: + ratio = 1.1 + elif len(words) == 2: + ratio = float(words[1]) + else: + raise RuntimeError(f"invalid auto sel format {sel}") + return ratio + + def wrap_up_4(self, xx): + return 4 * ((int(xx) + 3) // 4) + + def get_sel(self, jdata, rcut, mixed_type: bool = False): + _, max_nbor_size = self.get_nbor_stat(jdata, rcut, mixed_type=mixed_type) + return max_nbor_size + + def get_rcut(self, jdata): + if jdata["model"].get("type") == "pairwise_dprc": + return max( + jdata["model"]["qm_model"]["descriptor"]["rcut"], + jdata["model"]["qmmm_model"]["descriptor"]["rcut"], + ) + descrpt_data = jdata["model"]["descriptor"] + rcut_list = [] + if descrpt_data["type"] == "hybrid": + for ii in descrpt_data["list"]: + rcut_list.append(ii["rcut"]) + else: + rcut_list.append(descrpt_data["rcut"]) + return max(rcut_list) + + def get_type_map(self, jdata): + return jdata["model"].get("type_map", None) + + def get_nbor_stat(self, jdata, rcut, mixed_type: bool = False): + # it seems that DeepmdDataSystem does not need rcut + # it's not clear why there is an argument... + # max_rcut = get_rcut(jdata) + max_rcut = rcut + type_map = self.get_type_map(jdata) + + if type_map and len(type_map) == 0: + type_map = None + multi_task_mode = "data_dict" in jdata["training"] + if not multi_task_mode: + train_data = get_data( + jdata["training"]["training_data"], max_rcut, type_map, None + ) + train_data.get_batch() + else: + assert ( + type_map is not None + ), "Data stat in multi-task mode must have available type_map! " + train_data = None + for systems in jdata["training"]["data_dict"]: + tmp_data = get_data( + jdata["training"]["data_dict"][systems]["training_data"], + max_rcut, + type_map, + None, + ) + tmp_data.get_batch() + assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! " + if train_data is None: + train_data = tmp_data + else: + train_data.system_dirs += tmp_data.system_dirs + train_data.data_systems += tmp_data.data_systems + train_data.natoms += tmp_data.natoms + train_data.natoms_vec += tmp_data.natoms_vec + train_data.default_mesh += tmp_data.default_mesh + data_ntypes = train_data.get_ntypes() + if type_map is not None: + map_ntypes = len(type_map) + else: + map_ntypes = data_ntypes + ntypes = max([map_ntypes, data_ntypes]) + + neistat = self.neighbor_stat(ntypes, rcut, mixed_type=mixed_type) + + min_nbor_dist, max_nbor_size = neistat.get_stat(train_data) + self.hook(min_nbor_dist, max_nbor_size) + + return min_nbor_dist, max_nbor_size + + @property + @abstractmethod + def neighbor_stat(self) -> Type[NeighborStat]: + pass + + @abstractmethod + def hook(self, min_nbor_dist, max_nbor_size): + pass + + def get_min_nbor_dist(self, jdata, rcut): + min_nbor_dist, _ = self.get_nbor_stat(jdata, rcut) + return min_nbor_dist diff --git a/deepmd/utils/version.py b/deepmd/utils/version.py new file mode 100644 index 0000000000..a0b479778d --- /dev/null +++ b/deepmd/utils/version.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +def check_version_compatibility( + current_version: int, + maximum_supported_version: int, + minimal_supported_version: int = 1, +): + """Check if the current version is compatible with the supported versions. + + Parameters + ---------- + current_version : int + The current version. + maximum_supported_version : int + The maximum supported version. + minimal_supported_version : int, optional + The minimal supported version. Default is 1. + + Raises + ------ + ValueError + If the current version is not compatible with the supported versions. + """ + if not minimal_supported_version <= current_version <= maximum_supported_version: + raise ValueError( + f"Current version {current_version} is not compatible with supported versions " + f"[{minimal_supported_version}, {maximum_supported_version}]." + ) diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py index 267f89ed28..b344d3bb75 100644 --- a/deepmd/utils/weight_avg.py +++ b/deepmd/utils/weight_avg.py @@ -1,9 +1,48 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Alias for backward compatibility.""" -from deepmd_utils.utils.weight_avg import ( - weighted_average, +from collections import ( + defaultdict, ) +from typing import ( + Dict, + List, + Tuple, +) + +import numpy as np + + +def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict: + """Compute wighted average of prediction errors (MAE or RMSE) for model. + + Parameters + ---------- + errors : List[Dict[str, Tuple[float, float]]] + List: the error of systems + Dict: the error of quantities, name given by the key + str: the name of the quantity, must starts with 'mae' or 'rmse' + Tuple: (error, weight) -__all__ = [ - "weighted_average", -] + Returns + ------- + Dict + weighted averages + """ + sum_err = defaultdict(float) + sum_siz = defaultdict(int) + for err in errors: + for kk, (ee, ss) in err.items(): + if kk.startswith("mae"): + sum_err[kk] += ee * ss + elif kk.startswith("rmse"): + sum_err[kk] += ee * ee * ss + else: + raise RuntimeError("unknown error type") + sum_siz[kk] += ss + for kk in sum_err.keys(): + if kk.startswith("mae"): + sum_err[kk] = sum_err[kk] / sum_siz[kk] + elif kk.startswith("rmse"): + sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk]) + else: + raise RuntimeError("unknown error type") + return sum_err diff --git a/deepmd_utils/__init__.py b/deepmd_utils/__init__.py deleted file mode 100644 index 1c5314bb7e..0000000000 --- a/deepmd_utils/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""Untilization methods for DeePMD-kit. - -The __init__ module should not import any modules -for performance. -""" diff --git a/deepmd_utils/common.py b/deepmd_utils/common.py deleted file mode 100644 index b594c54030..0000000000 --- a/deepmd_utils/common.py +++ /dev/null @@ -1,270 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import json -import warnings -from pathlib import ( - Path, -) -from typing import ( - TYPE_CHECKING, - Any, - Dict, - List, - Optional, - TypeVar, - Union, -) - -try: - from typing import Literal # python >=3.8 -except ImportError: - from typing_extensions import Literal # type: ignore - -import numpy as np -import yaml - -from deepmd_utils.env import ( - GLOBAL_NP_FLOAT_PRECISION, -) -from deepmd_utils.utils.path import ( - DPPath, -) - -__all__ = [ - "data_requirement", - "add_data_requirement", - "select_idx_map", - "make_default_mesh", - "j_must_have", - "j_loader", - "expand_sys_str", - "get_np_precision", -] - - -if TYPE_CHECKING: - _DICT_VAL = TypeVar("_DICT_VAL") - _PRECISION = Literal["default", "float16", "float32", "float64"] - _ACTIVATION = Literal[ - "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf" - ] - __all__.extend( - [ - "_DICT_VAL", - "_PRECISION", - "_ACTIVATION", - ] - ) - - -# TODO this is not a good way to do things. This is some global variable to which -# TODO anyone can write and there is no good way to keep track of the changes -data_requirement = {} - - -def add_data_requirement( - key: str, - ndof: int, - atomic: bool = False, - must: bool = False, - high_prec: bool = False, - type_sel: Optional[bool] = None, - repeat: int = 1, - default: float = 0.0, - dtype: Optional[np.dtype] = None, -): - """Specify data requirements for training. - - Parameters - ---------- - key : str - type of data stored in corresponding `*.npy` file e.g. `forces` or `energy` - ndof : int - number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces - have `atomic=True` and `ndof=3` - atomic : bool, optional - specifies whwther the `ndof` keyworrd applies to per atom quantity or not, - by default False - must : bool, optional - specifi if the `*.npy` data file must exist, by default False - high_prec : bool, optional - if true load data to `np.float64` else `np.float32`, by default False - type_sel : bool, optional - select only certain type of atoms, by default None - repeat : int, optional - if specify repaeat data `repeat` times, by default 1 - default : float, optional, default=0. - default value of data - dtype : np.dtype, optional - the dtype of data, overwrites `high_prec` if provided - """ - data_requirement[key] = { - "ndof": ndof, - "atomic": atomic, - "must": must, - "high_prec": high_prec, - "type_sel": type_sel, - "repeat": repeat, - "default": default, - "dtype": dtype, - } - - -def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray: - """Build map of indices for element supplied element types from all atoms list. - - Parameters - ---------- - atom_types : np.ndarray - array specifing type for each atoms as integer - select_types : np.ndarray - types of atoms you want to find indices for - - Returns - ------- - np.ndarray - indices of types of atoms defined by `select_types` in `atom_types` array - - Warnings - -------- - `select_types` array will be sorted before finding indices in `atom_types` - """ - sort_select_types = np.sort(select_types) - idx_map = [] - for ii in sort_select_types: - idx_map.append(np.where(atom_types == ii)[0]) - return np.concatenate(idx_map) - - -def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray: - """Make mesh. - - Only the size of mesh matters, not the values: - * 6 for PBC, no mixed types - * 0 for no PBC, no mixed types - * 7 for PBC, mixed types - * 1 for no PBC, mixed types - - Parameters - ---------- - pbc : bool - if True, the mesh will be made for periodic boundary conditions - mixed_type : bool - if True, the mesh will be made for mixed types - - Returns - ------- - np.ndarray - mesh - """ - mesh_size = int(pbc) * 6 + int(mixed_type) - default_mesh = np.zeros(mesh_size, dtype=np.int32) - return default_mesh - - -# TODO maybe rename this to j_deprecated and only warn about deprecated keys, -# TODO if the deprecated_key argument is left empty function puppose is only custom -# TODO error since dict[key] already raises KeyError when the key is missing -def j_must_have( - jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = [] -) -> "_DICT_VAL": - """Assert that supplied dictionary conaines specified key. - - Returns - ------- - _DICT_VAL - value that was store unde supplied key - - Raises - ------ - RuntimeError - if the key is not present - """ - if key not in jdata.keys(): - for ii in deprecated_key: - if ii in jdata.keys(): - warnings.warn(f"the key {ii} is deprecated, please use {key} instead") - return jdata[ii] - else: - raise RuntimeError(f"json database must provide key {key}") - else: - return jdata[key] - - -def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: - """Load yaml or json settings file. - - Parameters - ---------- - filename : Union[str, Path] - path to file - - Returns - ------- - Dict[str, Any] - loaded dictionary - - Raises - ------ - TypeError - if the supplied file is of unsupported type - """ - filepath = Path(filename) - if filepath.suffix.endswith("json"): - with filepath.open() as fp: - return json.load(fp) - elif filepath.suffix.endswith(("yml", "yaml")): - with filepath.open() as fp: - return yaml.safe_load(fp) - else: - raise TypeError("config file must be json, or yaml/yml") - - -# TODO port completely to pathlib when all callers are ported -def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: - """Recursively iterate over directories taking those that contain `type.raw` file. - - Parameters - ---------- - root_dir : Union[str, Path] - starting directory - - Returns - ------- - List[str] - list of string pointing to system directories - """ - root_dir = DPPath(root_dir) - matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] - if (root_dir / "type.raw").is_file(): - matches.append(str(root_dir)) - return matches - - -def get_np_precision(precision: "_PRECISION") -> np.dtype: - """Get numpy precision constant from string. - - Parameters - ---------- - precision : _PRECISION - string name of numpy constant or default - - Returns - ------- - np.dtype - numpy presicion constant - - Raises - ------ - RuntimeError - if string is invalid - """ - if precision == "default": - return GLOBAL_NP_FLOAT_PRECISION - elif precision == "float16": - return np.float16 - elif precision == "float32": - return np.float32 - elif precision == "float64": - return np.float64 - else: - raise RuntimeError(f"{precision} is not a valid precision") diff --git a/deepmd_utils/entrypoints/doc.py b/deepmd_utils/entrypoints/doc.py deleted file mode 100644 index 9f1fd39095..0000000000 --- a/deepmd_utils/entrypoints/doc.py +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""Module that prints train input arguments docstrings.""" - -from deepmd_utils.utils.argcheck import ( - gen_doc, - gen_json, -) - -__all__ = ["doc_train_input"] - - -def doc_train_input(*, out_type: str = "rst", **kwargs): - """Print out trining input arguments to console.""" - if out_type == "rst": - doc_str = gen_doc(make_anchor=True) - elif out_type == "json": - doc_str = gen_json() - else: - raise RuntimeError("Unsupported out type %s" % out_type) - print(doc_str) diff --git a/deepmd_utils/entrypoints/gui.py b/deepmd_utils/entrypoints/gui.py deleted file mode 100644 index 8b6b9e0a09..0000000000 --- a/deepmd_utils/entrypoints/gui.py +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""DP-GUI entrypoint.""" - - -def start_dpgui(*, port: int, bind_all: bool, **kwargs): - """Host DP-GUI server. - - Parameters - ---------- - port : int - The port to serve DP-GUI on. - bind_all : bool - Serve on all public interfaces. This will expose your DP-GUI instance - to the network on both IPv4 and IPv6 (where available). - **kwargs - additional arguments - - Raises - ------ - ModuleNotFoundError - The dpgui package is not installed - """ - try: - from dpgui import ( - start_dpgui, - ) - except ModuleNotFoundError as e: - raise ModuleNotFoundError( - "To use DP-GUI, please install the dpgui package:\npip install dpgui" - ) from e - start_dpgui(port=port, bind_all=bind_all) diff --git a/deepmd_utils/env.py b/deepmd_utils/env.py deleted file mode 100644 index b1d4958ed8..0000000000 --- a/deepmd_utils/env.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import os - -import numpy as np - -__all__ = [ - "GLOBAL_NP_FLOAT_PRECISION", - "GLOBAL_ENER_FLOAT_PRECISION", - "global_float_prec", -] - -# FLOAT_PREC -dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower() -if dp_float_prec in ("high", ""): - # default is high - GLOBAL_NP_FLOAT_PRECISION = np.float64 - GLOBAL_ENER_FLOAT_PRECISION = np.float64 - global_float_prec = "double" -elif dp_float_prec == "low": - GLOBAL_NP_FLOAT_PRECISION = np.float32 - GLOBAL_ENER_FLOAT_PRECISION = np.float64 - global_float_prec = "float" -else: - raise RuntimeError( - "Unsupported float precision option: %s. Supported: high," - "low. Please set precision with environmental variable " - "DP_INTERFACE_PREC." % dp_float_prec - ) diff --git a/deepmd_utils/loggers/loggers.py b/deepmd_utils/loggers/loggers.py deleted file mode 100644 index 015581f6bd..0000000000 --- a/deepmd_utils/loggers/loggers.py +++ /dev/null @@ -1,277 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""Logger initialization for package.""" - -import logging -import os -from typing import ( - TYPE_CHECKING, - Optional, -) - -if TYPE_CHECKING: - from pathlib import ( - Path, - ) - - from mpi4py import ( - MPI, - ) - - _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND - -logging.getLogger(__name__) - -__all__ = ["set_log_handles"] - -# logger formater -FFORMATTER = logging.Formatter( - "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s" -) -CFORMATTER = logging.Formatter( - # "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s" - "%(app_name)s %(levelname)-7s %(message)s" -) -FFORMATTER_MPI = logging.Formatter( - "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s" -) -CFORMATTER_MPI = logging.Formatter( - # "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s" - "%(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s" -) - - -class _AppFilter(logging.Filter): - """Add field `app_name` to log messages.""" - - def filter(self, record): - record.app_name = "DEEPMD" - return True - - -class _MPIRankFilter(logging.Filter): - """Add MPI rank number to log messages, adds field `rank`.""" - - def __init__(self, rank: int) -> None: - super().__init__(name="MPI_rank_id") - self.mpi_rank = str(rank) - - def filter(self, record): - record.rank = self.mpi_rank - return True - - -class _MPIMasterFilter(logging.Filter): - """Filter that lets through only messages emited from rank==0.""" - - def __init__(self, rank: int) -> None: - super().__init__(name="MPI_master_log") - self.mpi_rank = rank - - def filter(self, record): - if self.mpi_rank == 0: - return True - else: - return False - - -class _MPIFileStream: - """Wrap MPI.File` so it has the same API as python file streams. - - Parameters - ---------- - filename : Path - disk location of the file stream - MPI : MPI - MPI communicator object - mode : str, optional - file write mode, by default _MPI_APPEND_MODE - """ - - def __init__( - self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE" - ) -> None: - self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode) - self.stream.Set_atomicity(True) - self.name = "MPIfilestream" - - def write(self, msg: str): - """Write to MPI shared file stream. - - Parameters - ---------- - msg : str - message to write - """ - b = bytearray() - b.extend(map(ord, msg)) - self.stream.Write_shared(b) - - def close(self): - """Synchronize and close MPI file stream.""" - self.stream.Sync() - self.stream.Close() - - -class _MPIHandler(logging.FileHandler): - """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to. - - Parameters - ---------- - filename : Path - file path - MPI : MPI - MPI communicator object - mode : str, optional - file access mode, by default "_MPI_APPEND_MODE" - """ - - def __init__( - self, - filename: "Path", - MPI: "MPI", - mode: str = "_MPI_APPEND_MODE", - ) -> None: - self.MPI = MPI - super().__init__(filename, mode=mode, encoding=None, delay=False) - - def _open(self): - return _MPIFileStream(self.baseFilename, self.MPI, self.mode) - - def setStream(self, stream): - """Stream canot be reasigned in MPI mode.""" - raise NotImplementedError("Unable to do for MPI file handler!") - - -def set_log_handles( - level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None -): - """Set desired level for package loggers and add file handlers. - - Parameters - ---------- - level : int - logging level - log_path : Optional[str] - path to log file, if None logs will be send only to console. If the parent - directory does not exist it will be automatically created, by default None - mpi_log : Optional[str], optional - mpi log type. Has three options. `master` will output logs to file and console - only from rank==0. `collect` will write messages from all ranks to one file - opened under rank==0 and to console. `workers` will open one log file for each - worker designated by its rank, console behaviour is the same as for `collect`. - If this argument is specified, package 'mpi4py' must be already installed. - by default None - - Raises - ------ - RuntimeError - If the argument `mpi_log` is specified, package `mpi4py` is not installed. - - References - ---------- - https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U - https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error - https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu - - Notes - ----- - Logging levels: - - +---------+--------------+----------------+----------------+----------------+ - | | our notation | python logging | tensorflow cpp | OpenMP | - +=========+==============+================+================+================+ - | debug | 10 | 10 | 0 | 1/on/true/yes | - +---------+--------------+----------------+----------------+----------------+ - | info | 20 | 20 | 1 | 0/off/false/no | - +---------+--------------+----------------+----------------+----------------+ - | warning | 30 | 30 | 2 | 0/off/false/no | - +---------+--------------+----------------+----------------+----------------+ - | error | 40 | 40 | 3 | 0/off/false/no | - +---------+--------------+----------------+----------------+----------------+ - - """ - # silence logging for OpenMP when running on CPU if level is any other than debug - if level <= 10: - os.environ["KMP_WARNINGS"] = "FALSE" - - # set TF cpp internal logging level - os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1)) - - # get root logger - root_log = logging.getLogger("deepmd") - root_log.propagate = False - - root_log.setLevel(level) - - # check if arguments are present - MPI = None - if mpi_log: - try: - from mpi4py import ( - MPI, - ) - except ImportError as e: - raise RuntimeError( - "You cannot specify 'mpi_log' when mpi4py not installed" - ) from e - - # * add console handler ************************************************************ - ch = logging.StreamHandler() - if MPI: - rank = MPI.COMM_WORLD.Get_rank() - if mpi_log == "master": - ch.setFormatter(CFORMATTER) - ch.addFilter(_MPIMasterFilter(rank)) - else: - ch.setFormatter(CFORMATTER_MPI) - ch.addFilter(_MPIRankFilter(rank)) - else: - ch.setFormatter(CFORMATTER) - - ch.setLevel(level) - ch.addFilter(_AppFilter()) - # clean old handlers before adding new one - root_log.handlers.clear() - root_log.addHandler(ch) - - # * add file handler *************************************************************** - if log_path: - # create directory - log_path.parent.mkdir(exist_ok=True, parents=True) - - fh = None - - if mpi_log == "master": - rank = MPI.COMM_WORLD.Get_rank() - if rank == 0: - fh = logging.FileHandler(log_path, mode="w") - fh.addFilter(_MPIMasterFilter(rank)) - fh.setFormatter(FFORMATTER) - elif mpi_log == "collect": - rank = MPI.COMM_WORLD.Get_rank() - fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE) - fh.addFilter(_MPIRankFilter(rank)) - fh.setFormatter(FFORMATTER_MPI) - elif mpi_log == "workers": - rank = MPI.COMM_WORLD.Get_rank() - # if file has suffix than inser rank number before suffix - # e.g deepmd.log -> deepmd_.log - # if no suffix is present, insert rank as suffix - # e.g. deepmdlog -> deepmdlog. - if log_path.suffix: - worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix( - log_path.suffix - ) - else: - worker_log = log_path.with_suffix(f".{rank}") - - fh = logging.FileHandler(worker_log, mode="w") - fh.setFormatter(FFORMATTER) - else: - fh = logging.FileHandler(log_path, mode="w") - fh.setFormatter(FFORMATTER) - - if fh: - fh.setLevel(level) - fh.addFilter(_AppFilter()) - root_log.addHandler(fh) diff --git a/deepmd_utils/model_format/common.py b/deepmd_utils/model_format/common.py deleted file mode 100644 index d032e5d5df..0000000000 --- a/deepmd_utils/model_format/common.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from abc import ( - ABC, -) - -import numpy as np - -PRECISION_DICT = { - "float16": np.float16, - "float32": np.float32, - "float64": np.float64, - "half": np.float16, - "single": np.float32, - "double": np.float64, -} -DEFAULT_PRECISION = "float64" - - -class NativeOP(ABC): - """The unit operation of a native model.""" - - def call(self, *args, **kwargs): - """Forward pass in NumPy implementation.""" - raise NotImplementedError - - def __call__(self, *args, **kwargs): - """Forward pass in NumPy implementation.""" - return self.call(*args, **kwargs) diff --git a/deepmd_utils/model_format/output_def.py b/deepmd_utils/model_format/output_def.py deleted file mode 100644 index 268dc21ea6..0000000000 --- a/deepmd_utils/model_format/output_def.py +++ /dev/null @@ -1,281 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - Dict, - List, - Tuple, -) - - -def check_shape( - shape: List[int], - def_shape: List[int], -): - """Check if the shape satisfies the defined shape.""" - assert len(shape) == len(def_shape) - if def_shape[-1] == -1: - if list(shape[:-1]) != def_shape[:-1]: - raise ValueError(f"{shape[:-1]} shape not matching def {def_shape[:-1]}") - else: - if list(shape) != def_shape: - raise ValueError(f"{shape} shape not matching def {def_shape}") - - -def check_var(var, var_def): - if var_def.atomic: - # var.shape == [nf, nloc, *var_def.shape] - if len(var.shape) != len(var_def.shape) + 2: - raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}") - check_shape(list(var.shape[2:]), var_def.shape) - else: - # var.shape == [nf, *var_def.shape] - if len(var.shape) != len(var_def.shape) + 1: - raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}") - check_shape(list(var.shape[1:]), var_def.shape) - - -def model_check_output(cls): - """Check if the output of the Model is consistent with the definition. - - Two methods are assumed to be provided by the Model: - 1. Model.output_def that gives the output definition. - 2. Model.__call__ that defines the forward path of the model. - - """ - - class wrapper(cls): - def __init__( - self, - *args, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.md = self.output_def() - - def __call__( - self, - *args, - **kwargs, - ): - ret = cls.__call__(self, *args, **kwargs) - for kk in self.md.keys_outp(): - dd = self.md[kk] - check_var(ret[kk], dd) - if dd.reduciable: - rk = get_reduce_name(kk) - check_var(ret[rk], self.md[rk]) - if dd.differentiable: - dnr, dnc = get_deriv_name(kk) - check_var(ret[dnr], self.md[dnr]) - check_var(ret[dnc], self.md[dnc]) - return ret - - return wrapper - - -def fitting_check_output(cls): - """Check if the output of the Fitting is consistent with the definition. - - Two methods are assumed to be provided by the Fitting: - 1. Fitting.output_def that gives the output definition. - 2. Fitting.__call__ defines the forward path of the fitting. - - """ - - class wrapper(cls): - def __init__( - self, - *args, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.md = self.output_def() - - def __call__( - self, - *args, - **kwargs, - ): - ret = cls.__call__(self, *args, **kwargs) - for kk in self.md.keys(): - dd = self.md[kk] - check_var(ret[kk], dd) - return ret - - return wrapper - - -class OutputVariableDef: - """Defines the shape and other properties of the one output variable. - - It is assume that the fitting network output variables for each - local atom. This class defines one output variable, including its - name, shape, reducibility and differentiability. - - Parameters - ---------- - name - Name of the output variable. Notice that the xxxx_redu, - xxxx_derv_c, xxxx_derv_r are reserved names that should - not be used to define variables. - shape - The shape of the variable. e.g. energy should be [1], - dipole should be [3], polarizabilty should be [3,3]. - reduciable - If the variable is reduced. - differentiable - If the variable is differentiated with respect to coordinates - of atoms and cell tensor (pbc case). Only reduciable variable - are differentiable. - - """ - - def __init__( - self, - name: str, - shape: List[int], - reduciable: bool = False, - differentiable: bool = False, - atomic: bool = True, - ): - self.name = name - self.shape = list(shape) - self.atomic = atomic - self.reduciable = reduciable - self.differentiable = differentiable - if not self.reduciable and self.differentiable: - raise ValueError("only reduciable variable are differentiable") - - -class FittingOutputDef: - """Defines the shapes and other properties of the fitting network outputs. - - It is assume that the fitting network output variables for each - local atom. This class defines all the outputs. - - Parameters - ---------- - var_defs - List of output variable definitions. - - """ - - def __init__( - self, - var_defs: List[OutputVariableDef], - ): - self.var_defs = {vv.name: vv for vv in var_defs} - - def __getitem__( - self, - key: str, - ) -> OutputVariableDef: - return self.var_defs[key] - - def get_data(self) -> Dict[str, OutputVariableDef]: - return self.var_defs - - def keys(self): - return self.var_defs.keys() - - -class ModelOutputDef: - """Defines the shapes and other properties of the model outputs. - - The model reduce and differentiate fitting outputs if applicable. - If a variable is named by foo, then the reduced variable is called - foo_redu, the derivative w.r.t. coordinates is called foo_derv_r - and the derivative w.r.t. cell is called foo_derv_c. - - Parameters - ---------- - fit_defs - Definition for the fitting net output - - """ - - def __init__( - self, - fit_defs: FittingOutputDef, - ): - self.def_outp = fit_defs - self.def_redu = do_reduce(self.def_outp) - self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp) - self.var_defs: Dict[str, OutputVariableDef] = {} - for ii in [ - self.def_outp.get_data(), - self.def_redu, - self.def_derv_c, - self.def_derv_r, - ]: - self.var_defs.update(ii) - - def __getitem__( - self, - key: str, - ) -> OutputVariableDef: - return self.var_defs[key] - - def get_data( - self, - key: str, - ) -> Dict[str, OutputVariableDef]: - return self.var_defs - - def keys(self): - return self.var_defs.keys() - - def keys_outp(self): - return self.def_outp.keys() - - def keys_redu(self): - return self.def_redu.keys() - - def keys_derv_r(self): - return self.def_derv_r.keys() - - def keys_derv_c(self): - return self.def_derv_c.keys() - - -def get_reduce_name(name: str) -> str: - return name + "_redu" - - -def get_deriv_name(name: str) -> Tuple[str, str]: - return name + "_derv_r", name + "_derv_c" - - -def do_reduce( - def_outp: FittingOutputDef, -) -> Dict[str, OutputVariableDef]: - def_redu: Dict[str, OutputVariableDef] = {} - for kk, vv in def_outp.get_data().items(): - if vv.reduciable: - rk = get_reduce_name(kk) - def_redu[rk] = OutputVariableDef( - rk, vv.shape, reduciable=False, differentiable=False, atomic=False - ) - return def_redu - - -def do_derivative( - def_outp: FittingOutputDef, -) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]: - def_derv_r: Dict[str, OutputVariableDef] = {} - def_derv_c: Dict[str, OutputVariableDef] = {} - for kk, vv in def_outp.get_data().items(): - if vv.differentiable: - rkr, rkc = get_deriv_name(kk) - def_derv_r[rkr] = OutputVariableDef( - rkr, - vv.shape + [3], # noqa: RUF005 - reduciable=False, - differentiable=False, - ) - def_derv_c[rkc] = OutputVariableDef( - rkc, - vv.shape + [3, 3], # noqa: RUF005 - reduciable=True, - differentiable=False, - ) - return def_derv_r, def_derv_c diff --git a/deepmd_utils/utils/__init__.py b/deepmd_utils/utils/__init__.py deleted file mode 100644 index bac6924ac1..0000000000 --- a/deepmd_utils/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# For performance, do not add things to this file -# import submodules instead diff --git a/deepmd_utils/utils/argcheck.py b/deepmd_utils/utils/argcheck.py deleted file mode 100644 index 6c51a7b859..0000000000 --- a/deepmd_utils/utils/argcheck.py +++ /dev/null @@ -1,2028 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import json -import logging -from typing import ( - Callable, - List, - Optional, -) - -from dargs import ( - Argument, - ArgumentEncoder, - Variant, - dargs, -) - -from deepmd.common import ( - ACTIVATION_FN_DICT, - PRECISION_DICT, -) -from deepmd_utils.utils.argcheck_nvnmd import ( - nvnmd_args, -) -from deepmd_utils.utils.plugin import ( - Plugin, -) - -log = logging.getLogger(__name__) - - -def list_to_doc(xx): - items = [] - for ii in xx: - if len(items) == 0: - items.append(f'"{ii}"') - else: - items.append(f', "{ii}"') - items.append(".") - return "".join(items) - - -def make_link(content, ref_key): - return ( - f"`{content} <{ref_key}_>`_" - if not dargs.RAW_ANCHOR - else f"`{content} <#{ref_key}>`_" - ) - - -def type_embedding_args(): - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_seed = "Random seed for parameter initialization" - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net are trainable" - - return [ - Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, default=None, doc=doc_seed), - ] - - -def spin_args(): - doc_use_spin = "Whether to use atomic spin model for each atom type" - doc_spin_norm = "The magnitude of atomic spin for each atom type with spin" - doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin" - - return [ - Argument("use_spin", List[bool], doc=doc_use_spin), - Argument("spin_norm", List[float], doc=doc_spin_norm), - Argument("virtual_len", List[float], doc=doc_virtual_len), - ] - - -# --- Descriptor configurations: --- # - - -class ArgsPlugin: - def __init__(self) -> None: - self.__plugin = Plugin() - - def register( - self, name: str, alias: Optional[List[str]] = None - ) -> Callable[[], List[Argument]]: - """Register a descriptor argument plugin. - - Parameters - ---------- - name : str - the name of a descriptor - alias : List[str], optional - the list of aliases of this descriptor - - Returns - ------- - Callable[[], List[Argument]] - the registered descriptor argument method - - Examples - -------- - >>> some_plugin = ArgsPlugin() - >>> @some_plugin.register("some_descrpt") - def descrpt_some_descrpt_args(): - return [] - """ - # convert alias to hashed item - if isinstance(alias, list): - alias = tuple(alias) - return self.__plugin.register((name, alias)) - - def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]: - """Get all arguments. - - Parameters - ---------- - exclude_hybrid : bool - exclude hybrid descriptor to prevent circular calls - - Returns - ------- - List[Argument] - all arguments - """ - arguments = [] - for (name, alias), metd in self.__plugin.plugins.items(): - if exclude_hybrid and name == "hybrid": - continue - arguments.append( - Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias) - ) - return arguments - - -descrpt_args_plugin = ArgsPlugin() - - -@descrpt_args_plugin.register("loc_frame") -def descrpt_local_frame_args(): - doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor." - doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius." - doc_rcut = "The cut-off radius. The default value is 6.0" - doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\ -- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ -- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\ -- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\ -- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ -- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\ -- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance." - - return [ - Argument("sel_a", List[int], optional=False, doc=doc_sel_a), - Argument("sel_r", List[int], optional=False, doc=doc_sel_r), - Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), - Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule), - ] - - -@descrpt_args_plugin.register("se_e2_a", alias=["se_a"]) -def descrpt_se_a_args(): - doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - doc_rcut = "The cut-off radius." - doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net is trainable" - doc_seed = "Random seed for parameter initialization" - doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." - doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" - - return [ - Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), - Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), - Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), - Argument( - "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron - ), - Argument( - "axis_neuron", - int, - optional=True, - default=4, - alias=["n_axis_neuron"], - doc=doc_axis_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument( - "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side - ), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "exclude_types", - List[List[int]], - optional=True, - default=[], - doc=doc_exclude_types, - ), - Argument( - "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero - ), - ] - - -@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"]) -def descrpt_se_t_args(): - doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - doc_rcut = "The cut-off radius." - doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net are trainable" - doc_seed = "Random seed for parameter initialization" - doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" - - return [ - Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), - Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), - Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), - Argument( - "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero - ), - ] - - -@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"]) -def descrpt_se_a_tpe_args(): - doc_type_nchanl = "number of channels for type embedding" - doc_type_nlayer = "number of hidden layers of type embedding net" - doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded." - - return [ - *descrpt_se_a_args(), - Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl), - Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer), - Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), - ] - - -@descrpt_args_plugin.register("se_e2_r", alias=["se_r"]) -def descrpt_se_r_args(): - doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - doc_rcut = "The cut-off radius." - doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net are trainable" - doc_seed = "Random seed for parameter initialization" - doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." - doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used" - - return [ - Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), - Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), - Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), - Argument( - "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument( - "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side - ), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "exclude_types", - List[List[int]], - optional=True, - default=[], - doc=doc_exclude_types, - ), - Argument( - "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero - ), - ] - - -@descrpt_args_plugin.register("hybrid") -def descrpt_hybrid_args(): - doc_list = "A list of descriptor definitions" - - return [ - Argument( - "list", - list, - optional=False, - doc=doc_list, - repeat=True, - sub_fields=[], - sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)], - fold_subdoc=True, - ) - ] - - -def descrpt_se_atten_common_args(): - doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\ - - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - doc_rcut = "The cut-off radius." - doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`" - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net is trainable" - doc_seed = "Random seed for parameter initialization" - doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." - doc_attn = "The length of hidden vectors in attention layers" - doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True" - doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates" - doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix" - - return [ - Argument( - "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel - ), - Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), - Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), - Argument( - "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron - ), - Argument( - "axis_neuron", - int, - optional=True, - default=4, - alias=["n_axis_neuron"], - doc=doc_axis_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument( - "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side - ), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "exclude_types", - List[List[int]], - optional=True, - default=[], - doc=doc_exclude_types, - ), - Argument("attn", int, optional=True, default=128, doc=doc_attn), - Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer), - Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr), - Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask), - ] - - -@descrpt_args_plugin.register("se_atten") -def descrpt_se_atten_args(): - doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible." - doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True." - doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" - - return [ - *descrpt_se_atten_common_args(), - Argument( - "stripped_type_embedding", - bool, - optional=True, - default=False, - doc=doc_stripped_type_embedding, - ), - Argument( - "smooth_type_embdding", - bool, - optional=True, - default=False, - doc=doc_smooth_type_embdding, - ), - Argument( - "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero - ), - ] - - -@descrpt_args_plugin.register("se_atten_v2") -def descrpt_se_atten_v2_args(): - doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" - - return [ - *descrpt_se_atten_common_args(), - Argument( - "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero - ), - ] - - -@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"]) -def descrpt_se_a_ebd_v2_args(): - return descrpt_se_a_args() - - -@descrpt_args_plugin.register("se_a_mask") -def descrpt_se_a_mask_args(): - doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - - doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built." - doc_axis_neuron = "Size of the submatrix of G (embedding matrix)." - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters." - doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." - doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_trainable = "If the parameters in the embedding net is trainable" - doc_seed = "Random seed for parameter initialization" - - return [ - Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel), - Argument( - "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron - ), - Argument( - "axis_neuron", - int, - optional=True, - default=4, - alias=["n_axis_neuron"], - doc=doc_axis_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt), - Argument( - "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side - ), - Argument( - "exclude_types", - List[List[int]], - optional=True, - default=[], - doc=doc_exclude_types, - ), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("trainable", bool, optional=True, default=True, doc=doc_trainable), - Argument("seed", [int, None], optional=True, doc=doc_seed), - ] - - -def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant: - link_lf = make_link("loc_frame", "model/descriptor[loc_frame]") - link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]") - link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]") - link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]") - link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]") - link_hybrid = make_link("hybrid", "model/descriptor[hybrid]") - link_se_atten = make_link("se_atten", "model/descriptor[se_atten]") - link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]") - doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\ -- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\ -- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\ -- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\ -- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\ -- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\ -- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\ -- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\ -- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\ -- `hybrid`: Concatenate of a list of descriptors as a new descriptor." - - return Variant( - "type", - descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid), - doc=doc_descrpt_type, - ) - - -# --- Fitting net configurations: --- # -fitting_args_plugin = ArgsPlugin() - - -@fitting_args_plugin.register("ener") -def fitting_ener(): - doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." - doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." - doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\ -- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ -- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1." - doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." - doc_seed = "Random seed for parameter initialization of the fitting net" - doc_atom_ener = "Specify the atomic energy in vacuum for each type" - doc_layer_name = ( - "The name of the each layer. The length of this list should be equal to n_neuron + 1. " - "If two layers, either in the same fitting or different fittings, " - "have the same name, they will share the same neural network parameters. " - "The shape of these layers should be the same. " - "If null is given for a layer, parameters will not be shared." - ) - doc_use_aparam_as_mask = ( - "Whether to use the aparam as a mask in input." - "If True, the aparam will not be used in fitting net for embedding." - "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True." - ) - - return [ - Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), - Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), - Argument( - "neuron", - List[int], - optional=True, - default=[120, 120, 120], - alias=["n_neuron"], - doc=doc_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), - Argument( - "trainable", - [List[bool], bool], - optional=True, - default=True, - doc=doc_trainable, - ), - Argument( - "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond - ), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "atom_ener", - List[Optional[float]], - optional=True, - default=[], - doc=doc_atom_ener, - ), - Argument("layer_name", List[str], optional=True, doc=doc_layer_name), - Argument( - "use_aparam_as_mask", - bool, - optional=True, - default=False, - doc=doc_use_aparam_as_mask, - ), - ] - - -@fitting_args_plugin.register("dos") -def fitting_dos(): - doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." - doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." - doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\ -- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ -- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1." - doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." - doc_seed = "Random seed for parameter initialization of the fitting net" - doc_numb_dos = ( - "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)" - ) - - return [ - Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), - Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), - Argument( - "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("precision", str, optional=True, default="float64", doc=doc_precision), - Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), - Argument( - "trainable", - [List[bool], bool], - optional=True, - default=True, - doc=doc_trainable, - ), - Argument( - "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond - ), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos), - ] - - -@fitting_args_plugin.register("polar") -def fitting_polar(): - doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``" - # doc_diag_shift = 'The diagonal part of the polarizability matrix will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.' - doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix." - doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected." - doc_seed = "Random seed for parameter initialization of the fitting net" - - # YWolfeee: user can decide whether to use shift diag - doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true." - - return [ - Argument( - "neuron", - List[int], - optional=True, - default=[120, 120, 120], - alias=["n_neuron"], - doc=doc_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag), - Argument( - "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale - ), - # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift), - Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag), - Argument( - "sel_type", - [List[int], int, None], - optional=True, - alias=["pol_type"], - doc=doc_sel_type, - ), - Argument("seed", [int, None], optional=True, doc=doc_seed), - ] - - -# def fitting_global_polar(): -# return fitting_polar() - - -@fitting_args_plugin.register("dipole") -def fitting_dipole(): - doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' - doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." - doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected." - doc_seed = "Random seed for parameter initialization of the fitting net" - return [ - Argument( - "neuron", - List[int], - optional=True, - default=[120, 120, 120], - alias=["n_neuron"], - doc=doc_neuron, - ), - Argument( - "activation_function", - str, - optional=True, - default="tanh", - doc=doc_activation_function, - ), - Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), - Argument("precision", str, optional=True, default="default", doc=doc_precision), - Argument( - "sel_type", - [List[int], int, None], - optional=True, - alias=["dipole_type"], - doc=doc_sel_type, - ), - Argument("seed", [int, None], optional=True, doc=doc_seed), - ] - - -# YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support. -def fitting_variant_type_args(): - doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\ -- `ener`: Fit an energy model (potential energy surface).\n\n\ -- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\ -- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\ -- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n" - - return Variant( - "type", - fitting_args_plugin.get_all_argument(), - optional=True, - default_tag="ener", - doc=doc_descrpt_type, - ) - - -# --- Modifier configurations: --- # -def modifier_dipole_charge(): - doc_model_name = "The name of the frozen dipole model file." - doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. " - doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}" - doc_ewald_h = "The grid spacing of the FFT grid. Unit is A" - doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}" - - return [ - Argument("model_name", str, optional=False, doc=doc_model_name), - Argument( - "model_charge_map", List[float], optional=False, doc=doc_model_charge_map - ), - Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map), - Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta), - Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h), - ] - - -def modifier_variant_type_args(): - doc_modifier_type = "The type of modifier. See explanation below.\n\n\ --`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction" - return Variant( - "type", - [ - Argument("dipole_charge", dict, modifier_dipole_charge()), - ], - optional=False, - doc=doc_modifier_type, - ) - - -# --- model compression configurations: --- # -def model_compression(): - doc_model_file = "The input model file, which will be compressed by the DeePMD-kit." - doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)." - doc_min_nbor_dist = ( - "The nearest distance between neighbor atoms saved in the frozen model." - ) - - return [ - Argument("model_file", str, optional=False, doc=doc_model_file), - Argument("table_config", List[float], optional=False, doc=doc_table_config), - Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist), - ] - - -# --- model compression configurations: --- # -def model_compression_type_args(): - doc_compress_type = "The type of model compression, which should be consistent with the descriptor type." - - return Variant( - "type", - [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])], - optional=True, - default_tag="se_e2_a", - doc=doc_compress_type, - ) - - -def model_args(exclude_hybrid=False): - doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect." - doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics." - doc_data_stat_protect = "Protect parameter for atomic energy regression." - doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias." - doc_type_embedding = "The type embedding." - doc_modifier = "The modifier of model output." - doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly." - doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided." - doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided." - doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided." - doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided." - doc_compress_config = "Model compression configurations" - doc_spin = "The settings for systems with spin." - hybrid_models = [] - if not exclude_hybrid: - hybrid_models.extend( - [ - pairwise_dprc(), - linear_ener_model_args(), - ] - ) - return Argument( - "model", - dict, - [ - Argument("type_map", List[str], optional=True, doc=doc_type_map), - Argument( - "data_stat_nbatch", - int, - optional=True, - default=10, - doc=doc_data_stat_nbatch, - ), - Argument( - "data_stat_protect", - float, - optional=True, - default=1e-2, - doc=doc_data_stat_protect, - ), - Argument( - "data_bias_nsample", - int, - optional=True, - default=10, - doc=doc_data_bias_nsample, - ), - Argument("use_srtab", str, optional=True, doc=doc_use_srtab), - Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha), - Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin), - Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax), - Argument( - "srtab_add_bias", - bool, - optional=True, - default=True, - doc=doc_srtab_add_bias, - ), - Argument( - "type_embedding", - dict, - type_embedding_args(), - [], - optional=True, - doc=doc_type_embedding, - ), - Argument( - "modifier", - dict, - [], - [modifier_variant_type_args()], - optional=True, - doc=doc_modifier, - ), - Argument( - "compress", - dict, - [], - [model_compression_type_args()], - optional=True, - doc=doc_compress_config, - fold_subdoc=True, - ), - Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin), - ], - [ - Variant( - "type", - [ - standard_model_args(), - multi_model_args(), - frozen_model_args(), - pairtab_model_args(), - *hybrid_models, - ], - optional=True, - default_tag="standard", - ), - ], - ) - - -def standard_model_args() -> Argument: - doc_descrpt = "The descriptor of atomic environment." - doc_fitting = "The fitting of physical properties." - - ca = Argument( - "standard", - dict, - [ - Argument( - "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt - ), - Argument( - "fitting_net", - dict, - [], - [fitting_variant_type_args()], - doc=doc_fitting, - ), - ], - doc="Stardard model, which contains a descriptor and a fitting.", - ) - return ca - - -def multi_model_args() -> Argument: - doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details." - doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`." - - ca = Argument( - "multi", - dict, - [ - Argument( - "descriptor", - dict, - [], - [descrpt_variant_type_args()], - doc=doc_descrpt, - fold_subdoc=True, - ), - Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict), - ], - doc="Multiple-task model.", - ) - return ca - - -def pairwise_dprc() -> Argument: - qm_model_args = model_args(exclude_hybrid=True) - qm_model_args.name = "qm_model" - qm_model_args.fold_subdoc = True - qmmm_model_args = model_args(exclude_hybrid=True) - qmmm_model_args.name = "qmmm_model" - qmmm_model_args.fold_subdoc = True - ca = Argument( - "pairwise_dprc", - dict, - [ - qm_model_args, - qmmm_model_args, - ], - ) - return ca - - -def frozen_model_args() -> Argument: - doc_model_file = "Path to the frozen model file." - ca = Argument( - "frozen", - dict, - [ - Argument("model_file", str, optional=False, doc=doc_model_file), - ], - ) - return ca - - -def pairtab_model_args() -> Argument: - doc_tab_file = "Path to the tabulation file." - doc_rcut = "The cut-off radius." - doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\ - - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\ - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\ - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".' - ca = Argument( - "pairtab", - dict, - [ - Argument("tab_file", str, optional=False, doc=doc_tab_file), - Argument("rcut", float, optional=False, doc=doc_rcut), - Argument("sel", [int, List[int], str], optional=False, doc=doc_sel), - ], - doc="Pairwise tabulation energy model.", - ) - return ca - - -def linear_ener_model_args() -> Argument: - doc_weights = ( - "If the type is list of float, a list of weights for each model. " - 'If "mean", the weights are set to be 1 / len(models). ' - 'If "sum", the weights are set to be 1.' - ) - models_args = model_args(exclude_hybrid=True) - models_args.name = "models" - models_args.fold_subdoc = True - models_args.set_dtype(list) - models_args.set_repeat(True) - models_args.doc = "The sub-models." - ca = Argument( - "linear_ener", - dict, - [ - models_args, - Argument( - "weights", - [list, str], - optional=False, - doc=doc_weights, - ), - ], - ) - return ca - - -# --- Learning rate configurations: --- # -def learning_rate_exp(): - doc_start_lr = "The learning rate at the start of the training." - doc_stop_lr = "The desired learning rate at the end of the training." - doc_decay_steps = ( - "The learning rate is decaying every this number of training steps." - ) - - args = [ - Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr), - Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr), - Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps), - ] - return args - - -def learning_rate_variant_type_args(): - doc_lr = "The type of the learning rate." - - return Variant( - "type", - [Argument("exp", dict, learning_rate_exp())], - optional=True, - default_tag="exp", - doc=doc_lr, - ) - - -def learning_rate_args(): - doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`." - doc_lr = "The definitio of learning rate" - return Argument( - "learning_rate", - dict, - [ - Argument( - "scale_by_worker", - str, - optional=True, - default="linear", - doc=doc_scale_by_worker, - ) - ], - [learning_rate_variant_type_args()], - optional=True, - doc=doc_lr, - ) - - -def learning_rate_dict_args(): - doc_learning_rate_dict = ( - "The dictionary of definitions of learning rates in multi-task mode. " - "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n" - ) - ca = Argument( - "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict - ) - return ca - - -# --- Loss configurations: --- # -def start_pref(item, label=None, abbr=None): - if label is None: - label = item - if abbr is None: - abbr = item - return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored." - - -def limit_pref(item): - return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity." - - -loss_args_plugin = ArgsPlugin() - - -@loss_args_plugin.register("ener") -def loss_ener(): - doc_start_pref_e = start_pref("energy", abbr="e") - doc_limit_pref_e = limit_pref("energy") - doc_start_pref_f = start_pref("force", abbr="f") - doc_limit_pref_f = limit_pref("force") - doc_start_pref_v = start_pref("virial", abbr="v") - doc_limit_pref_v = limit_pref("virial") - doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae") - doc_limit_pref_ae = limit_pref("atomic energy") - doc_start_pref_pf = start_pref( - "atomic prefactor force", label="atom_pref", abbr="pf" - ) - doc_limit_pref_pf = limit_pref("atomic prefactor force") - doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf") - doc_limit_pref_gf = limit_pref("generalized force") - doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used." - doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label." - doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1." - return [ - Argument( - "start_pref_e", - [float, int], - optional=True, - default=0.02, - doc=doc_start_pref_e, - ), - Argument( - "limit_pref_e", - [float, int], - optional=True, - default=1.00, - doc=doc_limit_pref_e, - ), - Argument( - "start_pref_f", - [float, int], - optional=True, - default=1000, - doc=doc_start_pref_f, - ), - Argument( - "limit_pref_f", - [float, int], - optional=True, - default=1.00, - doc=doc_limit_pref_f, - ), - Argument( - "start_pref_v", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_v, - ), - Argument( - "limit_pref_v", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_v, - ), - Argument( - "start_pref_ae", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_ae, - ), - Argument( - "limit_pref_ae", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_ae, - ), - Argument( - "start_pref_pf", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_pf, - ), - Argument( - "limit_pref_pf", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_pf, - ), - Argument("relative_f", [float, None], optional=True, doc=doc_relative_f), - Argument( - "enable_atom_ener_coeff", - [bool], - optional=True, - default=False, - doc=doc_enable_atom_ener_coeff, - ), - Argument( - "start_pref_gf", - float, - optional=True, - default=0.0, - doc=doc_start_pref_gf, - ), - Argument( - "limit_pref_gf", - float, - optional=True, - default=0.0, - doc=doc_limit_pref_gf, - ), - Argument( - "numb_generalized_coord", - int, - optional=True, - default=0, - doc=doc_numb_generalized_coord, - ), - ] - - -@loss_args_plugin.register("ener_spin") -def loss_ener_spin(): - doc_start_pref_e = start_pref("energy") - doc_limit_pref_e = limit_pref("energy") - doc_start_pref_fr = start_pref("force_real_atom") - doc_limit_pref_fr = limit_pref("force_real_atom") - doc_start_pref_fm = start_pref("force_magnetic") - doc_limit_pref_fm = limit_pref("force_magnetic") - doc_start_pref_v = start_pref("virial") - doc_limit_pref_v = limit_pref("virial") - doc_start_pref_ae = start_pref("atom_ener") - doc_limit_pref_ae = limit_pref("atom_ener") - doc_start_pref_pf = start_pref("atom_pref") - doc_limit_pref_pf = limit_pref("atom_pref") - doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label." - doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1." - return [ - Argument( - "start_pref_e", - [float, int], - optional=True, - default=0.02, - doc=doc_start_pref_e, - ), - Argument( - "limit_pref_e", - [float, int], - optional=True, - default=1.00, - doc=doc_limit_pref_e, - ), - Argument( - "start_pref_fr", - [float, int], - optional=True, - default=1000, - doc=doc_start_pref_fr, - ), - Argument( - "limit_pref_fr", - [float, int], - optional=True, - default=1.00, - doc=doc_limit_pref_fr, - ), - Argument( - "start_pref_fm", - [float, int], - optional=True, - default=10000, - doc=doc_start_pref_fm, - ), - Argument( - "limit_pref_fm", - [float, int], - optional=True, - default=10.0, - doc=doc_limit_pref_fm, - ), - Argument( - "start_pref_v", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_v, - ), - Argument( - "limit_pref_v", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_v, - ), - Argument( - "start_pref_ae", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_ae, - ), - Argument( - "limit_pref_ae", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_ae, - ), - Argument( - "start_pref_pf", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_pf, - ), - Argument( - "limit_pref_pf", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_pf, - ), - Argument("relative_f", [float, None], optional=True, doc=doc_relative_f), - Argument( - "enable_atom_ener_coeff", - [bool], - optional=True, - default=False, - doc=doc_enable_atom_ener_coeff, - ), - ] - - -@loss_args_plugin.register("dos") -def loss_dos(): - doc_start_pref_dos = start_pref("Density of State (DOS)") - doc_limit_pref_dos = limit_pref("Density of State (DOS)") - doc_start_pref_cdf = start_pref( - "Cumulative Distribution Function (cumulative intergral of DOS)" - ) - doc_limit_pref_cdf = limit_pref( - "Cumulative Distribution Function (cumulative intergral of DOS)" - ) - doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)") - doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)") - doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS") - doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS") - return [ - Argument( - "start_pref_dos", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_dos, - ), - Argument( - "limit_pref_dos", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_dos, - ), - Argument( - "start_pref_cdf", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_cdf, - ), - Argument( - "limit_pref_cdf", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_cdf, - ), - Argument( - "start_pref_ados", - [float, int], - optional=True, - default=1.00, - doc=doc_start_pref_ados, - ), - Argument( - "limit_pref_ados", - [float, int], - optional=True, - default=1.00, - doc=doc_limit_pref_ados, - ), - Argument( - "start_pref_acdf", - [float, int], - optional=True, - default=0.00, - doc=doc_start_pref_acdf, - ), - Argument( - "limit_pref_acdf", - [float, int], - optional=True, - default=0.00, - doc=doc_limit_pref_acdf, - ), - ] - - -# YWolfeee: Modified to support tensor type of loss args. -@loss_args_plugin.register("tensor") -def loss_tensor(): - # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]." - # doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well." - doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included." - doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0." - return [ - Argument( - "pref", [float, int], optional=False, default=None, doc=doc_global_weight - ), - Argument( - "pref_atomic", - [float, int], - optional=False, - default=None, - doc=doc_local_weight, - ), - ] - - -def loss_variant_type_args(): - doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`." - - return Variant( - "type", - loss_args_plugin.get_all_argument(), - optional=True, - default_tag="ener", - doc=doc_loss, - ) - - -def loss_args(): - doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset." - ca = Argument( - "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss - ) - return ca - - -def loss_dict_args(): - doc_loss_dict = ( - "The dictionary of definitions of multiple loss functions in multi-task mode. " - "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n" - ) - ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict) - return ca - - -# --- Training configurations: --- # -def training_data_args(): # ! added by Ziyao: new specification style for data systems. - link_sys = make_link("systems", "training/training_data/systems") - doc_systems = ( - "The data systems for training. " - "This key can be provided with a list that specifies the systems, or be provided with a string " - "by which the prefix of all systems are given and the list of the systems is automatically generated." - ) - doc_set_prefix = f"The prefix of the sets in the {link_sys}." - doc_batch_size = f'This key can be \n\n\ -- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ -- int: all {link_sys} use the same batch size.\n\n\ -- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ -- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\ -- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\ -If MPI is used, the value should be considered as the batch size per task.' - doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ -- "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ -- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ -- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.' - doc_sys_probs = ( - "A list of float if specified. " - "Should be of the same length as `systems`, " - "specifying the probability of each system." - ) - - args = [ - Argument( - "systems", [List[str], str], optional=False, default=".", doc=doc_systems - ), - Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix), - Argument( - "batch_size", - [List[int], int, str], - optional=True, - default="auto", - doc=doc_batch_size, - ), - Argument( - "auto_prob", - str, - optional=True, - default="prob_sys_size", - doc=doc_auto_prob_style, - alias=[ - "auto_prob_style", - ], - ), - Argument( - "sys_probs", - List[float], - optional=True, - default=None, - doc=doc_sys_probs, - alias=["sys_weights"], - ), - ] - - doc_training_data = "Configurations of training data." - return Argument( - "training_data", - dict, - optional=True, - sub_fields=args, - sub_variants=[], - doc=doc_training_data, - ) - - -def validation_data_args(): # ! added by Ziyao: new specification style for data systems. - link_sys = make_link("systems", "training/validation_data/systems") - doc_systems = ( - "The data systems for validation. " - "This key can be provided with a list that specifies the systems, or be provided with a string " - "by which the prefix of all systems are given and the list of the systems is automatically generated." - ) - doc_set_prefix = f"The prefix of the sets in the {link_sys}." - doc_batch_size = f'This key can be \n\n\ -- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ -- int: all {link_sys} use the same batch size.\n\n\ -- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ -- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.' - doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ -- "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ -- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ -- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.' - doc_sys_probs = ( - "A list of float if specified. " - "Should be of the same length as `systems`, " - "specifying the probability of each system." - ) - doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period." - - args = [ - Argument( - "systems", [List[str], str], optional=False, default=".", doc=doc_systems - ), - Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix), - Argument( - "batch_size", - [List[int], int, str], - optional=True, - default="auto", - doc=doc_batch_size, - ), - Argument( - "auto_prob", - str, - optional=True, - default="prob_sys_size", - doc=doc_auto_prob_style, - alias=[ - "auto_prob_style", - ], - ), - Argument( - "sys_probs", - List[float], - optional=True, - default=None, - doc=doc_sys_probs, - alias=["sys_weights"], - ), - Argument( - "numb_btch", - int, - optional=True, - default=1, - doc=doc_numb_btch, - alias=[ - "numb_batch", - ], - ), - ] - - doc_validation_data = ( - "Configurations of validation data. Similar to that of training data, " - "except that a `numb_btch` argument may be configured" - ) - return Argument( - "validation_data", - dict, - optional=True, - default=None, - sub_fields=args, - sub_variants=[], - doc=doc_validation_data, - ) - - -def mixed_precision_args(): # ! added by Denghui. - doc_output_prec = 'The precision for mixed precision params. " \ - "The trainable variables precision during the mixed precision training process, " \ - "supported options are float32 only currently.' - doc_compute_prec = 'The precision for mixed precision compute. " \ - "The compute precision during the mixed precision training process, "" \ - "supported options are float16 and bfloat16 currently.' - - args = [ - Argument( - "output_prec", str, optional=True, default="float32", doc=doc_output_prec - ), - Argument( - "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec - ), - ] - - doc_mixed_precision = "Configurations of mixed precision." - return Argument( - "mixed_precision", - dict, - optional=True, - sub_fields=args, - sub_variants=[], - doc=doc_mixed_precision, - ) - - -def training_args(): # ! modified by Ziyao: data configuration isolated. - doc_numb_steps = "Number of training batch. Each training uses one batch of data." - doc_seed = "The random seed for getting frames from the training data set." - doc_disp_file = "The file for printing learning curve." - doc_disp_freq = "The frequency of printing learning curve." - doc_save_freq = "The frequency of saving check point." - doc_save_ckpt = "The path prefix of saving check point files." - doc_disp_training = "Displaying verbose information during training." - doc_time_training = "Timing durining training." - doc_profiling = "Profiling during training." - doc_profiling_file = "Output file for profiling." - doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`." - doc_tensorboard = "Enable tensorboard" - doc_tensorboard_log_dir = "The log directory of tensorboard outputs" - doc_tensorboard_freq = "The frequency of writing tensorboard events." - doc_data_dict = ( - "The dictionary of multi DataSystems in multi-task mode. " - "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, " - "contains training data and optional validation data definitions." - ) - doc_fitting_weight = ( - "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, " - "is the training weight of fitting net `fitting_key`. " - "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. " - "Weights will be normalized and minus ones will be ignored. " - "If not set, each fitting net will be equally selected when training." - ) - - arg_training_data = training_data_args() - arg_validation_data = validation_data_args() - mixed_precision_data = mixed_precision_args() - - args = [ - arg_training_data, - arg_validation_data, - mixed_precision_data, - Argument( - "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"] - ), - Argument("seed", [int, None], optional=True, doc=doc_seed), - Argument( - "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file - ), - Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq), - Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq), - Argument( - "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt - ), - Argument( - "disp_training", bool, optional=True, default=True, doc=doc_disp_training - ), - Argument( - "time_training", bool, optional=True, default=True, doc=doc_time_training - ), - Argument("profiling", bool, optional=True, default=False, doc=doc_profiling), - Argument( - "profiling_file", - str, - optional=True, - default="timeline.json", - doc=doc_profiling_file, - ), - Argument( - "enable_profiler", - bool, - optional=True, - default=False, - doc=doc_enable_profiler, - ), - Argument( - "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard - ), - Argument( - "tensorboard_log_dir", - str, - optional=True, - default="log", - doc=doc_tensorboard_log_dir, - ), - Argument( - "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq - ), - Argument("data_dict", dict, optional=True, doc=doc_data_dict), - Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight), - ] - - doc_training = "The training options." - return Argument("training", dict, args, [], doc=doc_training) - - -def make_index(keys): - ret = [] - for ii in keys: - ret.append(make_link(ii, ii)) - return ", ".join(ret) - - -def gen_doc(*, make_anchor=True, make_link=True, **kwargs): - if make_link: - make_anchor = True - ptr = [] - for ii in gen_args(): - ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs)) - - key_words = [] - for ii in "\n\n".join(ptr).split("\n"): - if "argument path" in ii: - key_words.append(ii.split(":")[1].replace("`", "").strip()) - # ptr.insert(0, make_index(key_words)) - - return "\n\n".join(ptr) - - -def gen_json(**kwargs): - return json.dumps( - tuple(gen_args()), - cls=ArgumentEncoder, - ) - - -def gen_args(**kwargs) -> List[Argument]: - return [ - model_args(), - learning_rate_args(), - learning_rate_dict_args(), - loss_args(), - loss_dict_args(), - training_args(), - nvnmd_args(), - ] - - -def normalize_multi_task(data): - # single-task or multi-task mode - if data["model"].get("type", "standard") not in ("standard", "multi"): - return data - single_fitting_net = "fitting_net" in data["model"].keys() - single_training_data = "training_data" in data["training"].keys() - single_valid_data = "validation_data" in data["training"].keys() - single_loss = "loss" in data.keys() - single_learning_rate = "learning_rate" in data.keys() - multi_fitting_net = "fitting_net_dict" in data["model"].keys() - multi_training_data = "data_dict" in data["training"].keys() - multi_loss = "loss_dict" in data.keys() - multi_fitting_weight = "fitting_weight" in data["training"].keys() - multi_learning_rate = "learning_rate_dict" in data.keys() - assert (single_fitting_net == single_training_data) and ( - multi_fitting_net == multi_training_data - ), ( - "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! " - "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' " - "must be defined at the same time! Please check your input script. " - ) - assert not (single_fitting_net and multi_fitting_net), ( - "Single-task mode and multi-task mode can not be performed together. " - "Please check your input script and choose just one format! " - ) - assert ( - single_fitting_net or multi_fitting_net - ), "Please define your fitting net and training data! " - if multi_fitting_net: - assert not single_valid_data, ( - "In multi-task mode, 'training/validation_data' should not appear " - "outside 'training/data_dict'! Please check your input script." - ) - assert ( - not single_loss - ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! " - assert ( - "type_map" in data["model"] - ), "In multi-task mode, 'model/type_map' must be defined! " - data["model"]["type"] = "multi" - data["model"]["fitting_net_dict"] = normalize_fitting_net_dict( - data["model"]["fitting_net_dict"] - ) - data["training"]["data_dict"] = normalize_data_dict( - data["training"]["data_dict"] - ) - data["loss_dict"] = ( - normalize_loss_dict( - data["model"]["fitting_net_dict"].keys(), data["loss_dict"] - ) - if multi_loss - else {} - ) - if multi_learning_rate: - data["learning_rate_dict"] = normalize_learning_rate_dict( - data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"] - ) - elif single_learning_rate: - data[ - "learning_rate_dict" - ] = normalize_learning_rate_dict_with_single_learning_rate( - data["model"]["fitting_net_dict"].keys(), data["learning_rate"] - ) - fitting_weight = ( - data["training"]["fitting_weight"] if multi_fitting_weight else None - ) - data["training"]["fitting_weight"] = normalize_fitting_weight( - data["model"]["fitting_net_dict"].keys(), - data["training"]["data_dict"].keys(), - fitting_weight=fitting_weight, - ) - else: - assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! " - assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! " - return data - - -def normalize_fitting_net_dict(fitting_net_dict): - new_dict = {} - base = Argument("base", dict, [], [fitting_variant_type_args()], doc="") - for fitting_key_item in fitting_net_dict: - data = base.normalize_value( - fitting_net_dict[fitting_key_item], trim_pattern="_*" - ) - base.check_value(data, strict=True) - new_dict[fitting_key_item] = data - return new_dict - - -def normalize_data_dict(data_dict): - new_dict = {} - base = Argument( - "base", dict, [training_data_args(), validation_data_args()], [], doc="" - ) - for data_system_key_item in data_dict: - data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*") - base.check_value(data, strict=True) - new_dict[data_system_key_item] = data - return new_dict - - -def normalize_loss_dict(fitting_keys, loss_dict): - # check the loss dict - failed_loss_keys = [item for item in loss_dict if item not in fitting_keys] - assert ( - not failed_loss_keys - ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format( - str(failed_loss_keys), str(list(fitting_keys)) - ) - new_dict = {} - base = Argument("base", dict, [], [loss_variant_type_args()], doc="") - for item in loss_dict: - data = base.normalize_value(loss_dict[item], trim_pattern="_*") - base.check_value(data, strict=True) - new_dict[item] = data - return new_dict - - -def normalize_learning_rate_dict(fitting_keys, learning_rate_dict): - # check the learning_rate dict - failed_learning_rate_keys = [ - item for item in learning_rate_dict if item not in fitting_keys - ] - assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format( - str(failed_learning_rate_keys), str(list(fitting_keys)) - ) - new_dict = {} - base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="") - for item in learning_rate_dict: - data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*") - base.check_value(data, strict=True) - new_dict[item] = data - return new_dict - - -def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate): - new_dict = {} - base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="") - data = base.normalize_value(learning_rate, trim_pattern="_*") - base.check_value(data, strict=True) - for fitting_key in fitting_keys: - new_dict[fitting_key] = data - return new_dict - - -def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None): - # check the mapping - failed_data_keys = [item for item in data_keys if item not in fitting_keys] - assert ( - not failed_data_keys - ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format( - str(failed_data_keys), str(list(fitting_keys)) - ) - empty_fitting_keys = [] - valid_fitting_keys = [] - for item in fitting_keys: - if item not in data_keys: - empty_fitting_keys.append(item) - else: - valid_fitting_keys.append(item) - if empty_fitting_keys: - log.warning( - "Fitting net(s) {} have no data and will not be used in training.".format( - str(empty_fitting_keys) - ) - ) - num_pair = len(valid_fitting_keys) - assert num_pair > 0, "No valid training data systems for fitting nets!" - - # check and normalize the fitting weight - new_weight = {} - if fitting_weight is None: - equal_weight = 1.0 / num_pair - for item in fitting_keys: - new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0 - else: - failed_weight_keys = [ - item for item in fitting_weight if item not in fitting_keys - ] - assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format( - str(failed_weight_keys), str(list(fitting_keys)) - ) - sum_prob = 0.0 - for item in fitting_keys: - if item in valid_fitting_keys: - if ( - item in fitting_weight - and isinstance(fitting_weight[item], (int, float)) - and fitting_weight[item] > 0.0 - ): - sum_prob += fitting_weight[item] - new_weight[item] = fitting_weight[item] - else: - valid_fitting_keys.remove(item) - log.warning( - f"Fitting net '{item}' has zero or invalid weight " - "and will not be used in training." - ) - new_weight[item] = 0.0 - else: - new_weight[item] = 0.0 - assert sum_prob > 0.0, "No valid training weight for fitting nets!" - # normalize - for item in new_weight: - new_weight[item] /= sum_prob - return new_weight - - -def normalize(data): - data = normalize_multi_task(data) - - base = Argument("base", dict, gen_args()) - data = base.normalize_value(data, trim_pattern="_*") - base.check_value(data, strict=True) - - return data - - -if __name__ == "__main__": - gen_doc() diff --git a/deepmd_utils/utils/batch_size.py b/deepmd_utils/utils/batch_size.py deleted file mode 100644 index 1b93a51242..0000000000 --- a/deepmd_utils/utils/batch_size.py +++ /dev/null @@ -1,233 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import logging -import os -from abc import ( - ABC, - abstractmethod, -) -from typing import ( - Callable, - Tuple, -) - -import numpy as np - -from deepmd_utils.utils.errors import ( - OutOfMemoryError, -) - -log = logging.getLogger(__name__) - - -class AutoBatchSize(ABC): - """This class allows DeePMD-kit to automatically decide the maximum - batch size that will not cause an OOM error. - - Notes - ----- - In some CPU environments, the program may be directly killed when OOM. In - this case, by default the batch size will not be increased for CPUs. The - environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size. - - In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`. - - Parameters - ---------- - initial_batch_size : int, default: 1024 - initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE - is not set - factor : float, default: 2. - increased factor - - Attributes - ---------- - current_batch_size : int - current batch size (number of total atoms) - maximum_working_batch_size : int - maximum working batch size - minimal_not_working_batch_size : int - minimal not working batch size - """ - - def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: - # See also PyTorchLightning/pytorch-lightning#1638 - # TODO: discuss a proper initial batch size - self.current_batch_size = initial_batch_size - DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0)) - if DP_INFER_BATCH_SIZE > 0: - self.current_batch_size = DP_INFER_BATCH_SIZE - self.maximum_working_batch_size = DP_INFER_BATCH_SIZE - self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1 - else: - self.maximum_working_batch_size = initial_batch_size - if self.is_gpu_available(): - self.minimal_not_working_batch_size = 2**31 - else: - self.minimal_not_working_batch_size = ( - self.maximum_working_batch_size + 1 - ) - log.warning( - "You can use the environment variable DP_INFER_BATCH_SIZE to" - "control the inference batch size (nframes * natoms). " - "The default value is %d." % initial_batch_size - ) - - self.factor = factor - - def execute( - self, callable: Callable, start_index: int, natoms: int - ) -> Tuple[int, tuple]: - """Excuate a method with given batch size. - - Parameters - ---------- - callable : Callable - The method should accept the batch size and start_index as parameters, - and returns executed batch size and data. - start_index : int - start index - natoms : int - natoms - - Returns - ------- - int - executed batch size * number of atoms - tuple - result from callable, None if failing to execute - - Raises - ------ - OutOfMemoryError - OOM when batch size is 1 - """ - if natoms > 0: - batch_nframes = self.current_batch_size // natoms - else: - batch_nframes = self.current_batch_size - try: - n_batch, result = callable(max(batch_nframes, 1), start_index) - except Exception as e: - if not self.is_oom_error(e): - raise e - self.minimal_not_working_batch_size = min( - self.minimal_not_working_batch_size, self.current_batch_size - ) - if self.maximum_working_batch_size >= self.minimal_not_working_batch_size: - self.maximum_working_batch_size = int( - self.minimal_not_working_batch_size / self.factor - ) - if self.minimal_not_working_batch_size <= natoms: - raise OutOfMemoryError( - "The callable still throws an out-of-memory (OOM) error even when batch size is 1!" - ) from e - # adjust the next batch size - self._adjust_batch_size(1.0 / self.factor) - return 0, None - else: - n_tot = n_batch * natoms - self.maximum_working_batch_size = max( - self.maximum_working_batch_size, n_tot - ) - # adjust the next batch size - if ( - n_tot + natoms > self.current_batch_size - and self.current_batch_size * self.factor - < self.minimal_not_working_batch_size - ): - self._adjust_batch_size(self.factor) - return n_batch, result - - def _adjust_batch_size(self, factor: float): - old_batch_size = self.current_batch_size - self.current_batch_size = int(self.current_batch_size * factor) - log.info( - "Adjust batch size from %d to %d" - % (old_batch_size, self.current_batch_size) - ) - - def execute_all( - self, callable: Callable, total_size: int, natoms: int, *args, **kwargs - ) -> Tuple[np.ndarray]: - """Excuate a method with all given data. - - Parameters - ---------- - callable : Callable - The method should accept *args and **kwargs as input and return the similiar array. - total_size : int - Total size - natoms : int - The number of atoms - *args - Variable length argument list. - **kwargs - If 2D np.ndarray, assume the first axis is batch; otherwise do nothing. - """ - - def execute_with_batch_size( - batch_size: int, start_index: int - ) -> Tuple[int, Tuple[np.ndarray]]: - end_index = start_index + batch_size - end_index = min(end_index, total_size) - return (end_index - start_index), callable( - *[ - ( - vv[start_index:end_index] - if isinstance(vv, np.ndarray) and vv.ndim > 1 - else vv - ) - for vv in args - ], - **{ - kk: ( - vv[start_index:end_index] - if isinstance(vv, np.ndarray) and vv.ndim > 1 - else vv - ) - for kk, vv in kwargs.items() - }, - ) - - index = 0 - results = [] - while index < total_size: - n_batch, result = self.execute(execute_with_batch_size, index, natoms) - if not isinstance(result, tuple): - result = (result,) - index += n_batch - if n_batch: - for rr in result: - rr.reshape((n_batch, -1)) - results.append(result) - - r = tuple([np.concatenate(r, axis=0) for r in zip(*results)]) - if len(r) == 1: - # avoid returning tuple if callable doesn't return tuple - r = r[0] - return r - - @abstractmethod - def is_gpu_available(self) -> bool: - """Check if GPU is available. - - Returns - ------- - bool - True if GPU is available - """ - - @abstractmethod - def is_oom_error(self, e: Exception) -> bool: - """Check if the exception is an OOM error. - - Parameters - ---------- - e : Exception - Exception - - Returns - ------- - bool - True if the exception is an OOM error - """ diff --git a/deepmd_utils/utils/compat.py b/deepmd_utils/utils/compat.py deleted file mode 100644 index 5f9c14e6d8..0000000000 --- a/deepmd_utils/utils/compat.py +++ /dev/null @@ -1,392 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""Module providing compatibility between `0.x.x` and `1.x.x` input versions.""" - -import json -import warnings -from pathlib import ( - Path, -) -from typing import ( - Any, - Dict, - Optional, - Sequence, - Union, -) - -import numpy as np - -from deepmd.common import ( - j_must_have, -) - - -def convert_input_v0_v1( - jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None -) -> Dict[str, Any]: - """Convert input from v0 format to v1. - - Parameters - ---------- - jdata : Dict[str, Any] - loaded json/yaml file - warning : bool, optional - whether to show deprecation warning, by default True - dump : Optional[Union[str, Path]], optional - whether to dump converted file, by default None - - Returns - ------- - Dict[str, Any] - converted output - """ - output = {} - output["model"] = _model(jdata, jdata["use_smooth"]) - output["learning_rate"] = _learning_rate(jdata) - output["loss"] = _loss(jdata) - output["training"] = _training(jdata) - if warning: - _warning_input_v0_v1(dump) - if dump is not None: - with open(dump, "w") as fp: - json.dump(output, fp, indent=4) - return output - - -def _warning_input_v0_v1(fname: Optional[Union[str, Path]]): - msg = ( - "It seems that you are using a deepmd-kit input of version 0.x.x, " - "which is deprecated. we have converted the input to >2.0.0 compatible" - ) - if fname is not None: - msg += f", and output it to file {fname}" - warnings.warn(msg) - - -def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]: - """Convert data to v1 input for non-smooth model. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - smooth : bool - whether to use smooth or non-smooth descriptor version - - Returns - ------- - Dict[str, Dict[str, Any]] - dictionary with model input parameters and sub-dictionaries for descriptor and - fitting net - """ - model = {} - model["descriptor"] = ( - _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata) - ) - model["fitting_net"] = _fitting_net(jdata) - return model - - -def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for non-smooth descriptor. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with descriptor parameters - """ - descriptor = {} - descriptor["type"] = "loc_frame" - _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule")) - return descriptor - - -def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for smooth descriptor. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with descriptor parameters - """ - descriptor = {} - seed = jdata.get("seed", None) - if seed is not None: - descriptor["seed"] = seed - descriptor["type"] = "se_a" - descriptor["sel"] = jdata["sel_a"] - _jcopy(jdata, descriptor, ("rcut",)) - descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"]) - descriptor["neuron"] = j_must_have(jdata, "filter_neuron") - descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"]) - descriptor["resnet_dt"] = False - if "resnet_dt" in jdata: - descriptor["resnet_dt"] = jdata["filter_resnet_dt"] - - return descriptor - - -def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for fitting net. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with fitting net parameters - """ - fitting_net = {} - - seed = jdata.get("seed", None) - if seed is not None: - fitting_net["seed"] = seed - fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"]) - fitting_net["resnet_dt"] = True - if "resnet_dt" in jdata: - fitting_net["resnet_dt"] = jdata["resnet_dt"] - if "fitting_resnet_dt" in jdata: - fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"] - return fitting_net - - -def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for learning rate section. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with learning rate parameters - """ - learning_rate = {} - learning_rate["type"] = "exp" - _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr")) - return learning_rate - - -def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for loss function. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with loss function parameters - """ - loss: Dict[str, Any] = {} - _jcopy( - jdata, - loss, - ( - "start_pref_e", - "limit_pref_e", - "start_pref_f", - "limit_pref_f", - "start_pref_v", - "limit_pref_v", - ), - ) - if "start_pref_ae" in jdata: - loss["start_pref_ae"] = jdata["start_pref_ae"] - if "limit_pref_ae" in jdata: - loss["limit_pref_ae"] = jdata["limit_pref_ae"] - return loss - - -def _training(jdata: Dict[str, Any]) -> Dict[str, Any]: - """Convert data to v1 input for training. - - Parameters - ---------- - jdata : Dict[str, Any] - parsed input json/yaml data - - Returns - ------- - Dict[str, Any] - dict with training parameters - """ - training = {} - seed = jdata.get("seed", None) - if seed is not None: - training["seed"] = seed - - _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size")) - training["disp_file"] = "lcurve.out" - if "disp_file" in jdata: - training["disp_file"] = jdata["disp_file"] - training["disp_freq"] = j_must_have(jdata, "disp_freq") - training["numb_test"] = j_must_have(jdata, "numb_test") - training["save_freq"] = j_must_have(jdata, "save_freq") - training["save_ckpt"] = j_must_have(jdata, "save_ckpt") - training["disp_training"] = j_must_have(jdata, "disp_training") - training["time_training"] = j_must_have(jdata, "time_training") - if "profiling" in jdata: - training["profiling"] = jdata["profiling"] - if training["profiling"]: - training["profiling_file"] = j_must_have(jdata, "profiling_file") - return training - - -def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]): - """Copy specified keys from one dict to another. - - Parameters - ---------- - src : Dict[str, Any] - source dictionary - dst : Dict[str, Any] - destination dictionary, will be modified in place - keys : Sequence[str] - list of keys to copy - """ - for k in keys: - dst[k] = src[k] - - -def remove_decay_rate(jdata: Dict[str, Any]): - """Convert decay_rate to stop_lr. - - Parameters - ---------- - jdata : Dict[str, Any] - input data - """ - lr = jdata["learning_rate"] - if "decay_rate" in lr: - decay_rate = lr["decay_rate"] - start_lr = lr["start_lr"] - stop_step = jdata["training"]["stop_batch"] - decay_steps = lr["decay_steps"] - stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr - lr["stop_lr"] = stop_lr - lr.pop("decay_rate") - - -def convert_input_v1_v2( - jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None -) -> Dict[str, Any]: - tr_cfg = jdata["training"] - tr_data_keys = { - "systems", - "set_prefix", - "batch_size", - "sys_prob", - "auto_prob", - # alias included - "sys_weights", - "auto_prob_style", - } - - tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys} - new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys} - new_tr_cfg["training_data"] = tr_data_cfg - if "training_data" in tr_cfg: - raise RuntimeError( - "Both v1 (training/systems) and v2 (training/training_data) parameters are given." - ) - - jdata["training"] = new_tr_cfg - - # remove deprecated arguments - remove_decay_rate(jdata) - - if warning: - _warning_input_v1_v2(dump) - if dump is not None: - with open(dump, "w") as fp: - json.dump(jdata, fp, indent=4) - - return jdata - - -def _warning_input_v1_v2(fname: Optional[Union[str, Path]]): - msg = ( - "It seems that you are using a deepmd-kit input of version 1.x.x, " - "which is deprecated. we have converted the input to >2.0.0 compatible" - ) - if fname is not None: - msg += f", and output it to file {fname}" - warnings.warn(msg) - - -def deprecate_numb_test( - jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None -) -> Dict[str, Any]: - """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0. - - See `#1243 `_. - - Parameters - ---------- - jdata : Dict[str, Any] - loaded json/yaml file - warning : bool, optional - whether to show deprecation warning, by default True - dump : Optional[Union[str, Path]], optional - whether to dump converted file, by default None - - Returns - ------- - Dict[str, Any] - converted output - """ - try: - jdata.get("training", {}).pop("numb_test") - except KeyError: - pass - else: - if warning: - warnings.warn( - "The argument training->numb_test has been deprecated since v2.0.0. " - "Use training->validation_data->batch_size instead." - ) - - if dump is not None: - with open(dump, "w") as fp: - json.dump(jdata, fp, indent=4) - return jdata - - -def update_deepmd_input( - jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None -) -> Dict[str, Any]: - def is_deepmd_v0_input(jdata): - return "model" not in jdata.keys() - - def is_deepmd_v1_input(jdata): - return "systems" in j_must_have(jdata, "training").keys() - - if is_deepmd_v0_input(jdata): - jdata = convert_input_v0_v1(jdata, warning, None) - jdata = convert_input_v1_v2(jdata, False, None) - jdata = deprecate_numb_test(jdata, False, dump) - elif is_deepmd_v1_input(jdata): - jdata = convert_input_v1_v2(jdata, warning, None) - jdata = deprecate_numb_test(jdata, False, dump) - else: - jdata = deprecate_numb_test(jdata, warning, dump) - - return jdata diff --git a/deepmd_utils/utils/data.py b/deepmd_utils/utils/data.py deleted file mode 100644 index 2689257e16..0000000000 --- a/deepmd_utils/utils/data.py +++ /dev/null @@ -1,614 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-License-Identifier: LGPL-3.0-or-later -import logging -from typing import ( - List, - Optional, -) - -import numpy as np - -from deepmd_utils.env import ( - GLOBAL_ENER_FLOAT_PRECISION, - GLOBAL_NP_FLOAT_PRECISION, -) -from deepmd_utils.utils import random as dp_random -from deepmd_utils.utils.path import ( - DPPath, -) - -log = logging.getLogger(__name__) - - -class DeepmdData: - """Class for a data system. - - It loads data from hard disk, and mantains the data as a `data_dict` - - Parameters - ---------- - sys_path - Path to the data system - set_prefix - Prefix for the directories of different sets - shuffle_test - If the test data are shuffled - type_map - Gives the name of different atom types - optional_type_map - If the type_map.raw in each system is optional - modifier - Data modifier that has the method `modify_data` - trn_all_set - Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test. - sort_atoms : bool - Sort atoms by atom types. Required to enable when the data is directly feeded to - descriptors except mixed types. - """ - - def __init__( - self, - sys_path: str, - set_prefix: str = "set", - shuffle_test: bool = True, - type_map: Optional[List[str]] = None, - optional_type_map: bool = True, - modifier=None, - trn_all_set: bool = False, - sort_atoms: bool = True, - ): - """Constructor.""" - root = DPPath(sys_path) - self.dirs = root.glob(set_prefix + ".*") - if not len(self.dirs): - raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}") - self.dirs.sort() - # check mix_type format - error_format_msg = ( - "if one of the set is of mixed_type format, " - "then all of the sets in this system should be of mixed_type format!" - ) - self.mixed_type = self._check_mode(self.dirs[0]) - for set_item in self.dirs[1:]: - assert self._check_mode(set_item) == self.mixed_type, error_format_msg - # load atom type - self.atom_type = self._load_type(root) - self.natoms = len(self.atom_type) - # load atom type map - self.type_map = self._load_type_map(root) - assert ( - optional_type_map or self.type_map is not None - ), f"System {sys_path} must have type_map.raw in this mode! " - if self.type_map is not None: - assert len(self.type_map) >= max(self.atom_type) + 1 - # check pbc - self.pbc = self._check_pbc(root) - # enforce type_map if necessary - self.enforce_type_map = False - if type_map is not None and self.type_map is not None and len(type_map): - if not self.mixed_type: - atom_type_ = [ - type_map.index(self.type_map[ii]) for ii in self.atom_type - ] - self.atom_type = np.array(atom_type_, dtype=np.int32) - else: - self.enforce_type_map = True - sorter = np.argsort(type_map) - self.type_idx_map = np.array( - sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)] - ) - # padding for virtual atom - self.type_idx_map = np.append( - self.type_idx_map, np.array([-1], dtype=np.int32) - ) - self.type_map = type_map - if type_map is None and self.type_map is None and self.mixed_type: - raise RuntimeError("mixed_type format must have type_map!") - # make idx map - self.sort_atoms = sort_atoms - self.idx_map = self._make_idx_map(self.atom_type) - # train dirs - self.test_dir = self.dirs[-1] - if trn_all_set: - self.train_dirs = self.dirs - else: - if len(self.dirs) == 1: - self.train_dirs = self.dirs - else: - self.train_dirs = self.dirs[:-1] - self.data_dict = {} - # add box and coord - self.add("box", 9, must=self.pbc) - self.add("coord", 3, atomic=True, must=True) - # the training times of each frame - self.add("numb_copy", 1, must=False, default=1, dtype=int) - # set counters - self.set_count = 0 - self.iterator = 0 - self.shuffle_test = shuffle_test - # set modifier - self.modifier = modifier - - def add( - self, - key: str, - ndof: int, - atomic: bool = False, - must: bool = False, - high_prec: bool = False, - type_sel: Optional[List[int]] = None, - repeat: int = 1, - default: float = 0.0, - dtype: Optional[np.dtype] = None, - ): - """Add a data item that to be loaded. - - Parameters - ---------- - key - The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy` - ndof - The number of dof - atomic - The item is an atomic property. - If False, the size of the data should be nframes x ndof - If True, the size of data should be nframes x natoms x ndof - must - The data file `sys_path/set.*/key.npy` must exist. - If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0 - high_prec - Load the data and store in float64, otherwise in float32 - type_sel - Select certain type of atoms - repeat - The data will be repeated `repeat` times. - default : float, default=0. - default value of data - dtype : np.dtype, optional - the dtype of data, overwrites `high_prec` if provided - """ - self.data_dict[key] = { - "ndof": ndof, - "atomic": atomic, - "must": must, - "high_prec": high_prec, - "type_sel": type_sel, - "repeat": repeat, - "reduce": None, - "default": default, - "dtype": dtype, - } - return self - - def reduce(self, key_out: str, key_in: str): - """Generate a new item from the reduction of another atom. - - Parameters - ---------- - key_out - The name of the reduced item - key_in - The name of the data item to be reduced - """ - assert key_in in self.data_dict, "cannot find input key" - assert self.data_dict[key_in]["atomic"], "reduced property should be atomic" - assert key_out not in self.data_dict, "output key should not have been added" - assert ( - self.data_dict[key_in]["repeat"] == 1 - ), "reduced proerties should not have been repeated" - - self.data_dict[key_out] = { - "ndof": self.data_dict[key_in]["ndof"], - "atomic": False, - "must": True, - "high_prec": True, - "type_sel": None, - "repeat": 1, - "reduce": key_in, - } - return self - - def get_data_dict(self) -> dict: - """Get the `data_dict`.""" - return self.data_dict - - def check_batch_size(self, batch_size): - """Check if the system can get a batch of data with `batch_size` frames.""" - for ii in self.train_dirs: - if self.data_dict["coord"]["high_prec"]: - tmpe = ( - (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION) - ) - else: - tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION) - if tmpe.ndim == 1: - tmpe = tmpe.reshape([1, -1]) - if tmpe.shape[0] < batch_size: - return ii, tmpe.shape[0] - return None - - def check_test_size(self, test_size): - """Check if the system can get a test dataset with `test_size` frames.""" - if self.data_dict["coord"]["high_prec"]: - tmpe = ( - (self.test_dir / "coord.npy") - .load_numpy() - .astype(GLOBAL_ENER_FLOAT_PRECISION) - ) - else: - tmpe = ( - (self.test_dir / "coord.npy") - .load_numpy() - .astype(GLOBAL_NP_FLOAT_PRECISION) - ) - if tmpe.ndim == 1: - tmpe = tmpe.reshape([1, -1]) - if tmpe.shape[0] < test_size: - return self.test_dir, tmpe.shape[0] - else: - return None - - def get_batch(self, batch_size: int) -> dict: - """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system. - - Parameters - ---------- - batch_size - size of the batch - """ - if hasattr(self, "batch_set"): - set_size = self.batch_set["coord"].shape[0] - else: - set_size = 0 - if self.iterator + batch_size > set_size: - self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()]) - self.set_count += 1 - set_size = self.batch_set["coord"].shape[0] - iterator_1 = self.iterator + batch_size - if iterator_1 >= set_size: - iterator_1 = set_size - idx = np.arange(self.iterator, iterator_1) - self.iterator += batch_size - ret = self._get_subdata(self.batch_set, idx) - return ret - - def get_test(self, ntests: int = -1) -> dict: - """Get the test data with `ntests` frames. - - Parameters - ---------- - ntests - Size of the test data set. If `ntests` is -1, all test data will be get. - """ - if not hasattr(self, "test_set"): - self._load_test_set(self.test_dir, self.shuffle_test) - if ntests == -1: - idx = None - else: - ntests_ = ( - ntests - if ntests < self.test_set["type"].shape[0] - else self.test_set["type"].shape[0] - ) - # print('ntest', self.test_set['type'].shape[0], ntests, ntests_) - idx = np.arange(ntests_) - ret = self._get_subdata(self.test_set, idx=idx) - if self.modifier is not None: - self.modifier.modify_data(ret, self) - return ret - - def get_ntypes(self) -> int: - """Number of atom types in the system.""" - if self.type_map is not None: - return len(self.type_map) - else: - return max(self.get_atom_type()) + 1 - - def get_type_map(self) -> List[str]: - """Get the type map.""" - return self.type_map - - def get_atom_type(self) -> List[int]: - """Get atom types.""" - return self.atom_type - - def get_numb_set(self) -> int: - """Get number of training sets.""" - return len(self.train_dirs) - - def get_numb_batch(self, batch_size: int, set_idx: int) -> int: - """Get the number of batches in a set.""" - data = self._load_set(self.train_dirs[set_idx]) - ret = data["coord"].shape[0] // batch_size - if ret == 0: - ret = 1 - return ret - - def get_sys_numb_batch(self, batch_size: int) -> int: - """Get the number of batches in the data system.""" - ret = 0 - for ii in range(len(self.train_dirs)): - ret += self.get_numb_batch(batch_size, ii) - return ret - - def get_natoms(self): - """Get number of atoms.""" - return len(self.atom_type) - - def get_natoms_vec(self, ntypes: int): - """Get number of atoms and number of atoms in different types. - - Parameters - ---------- - ntypes - Number of types (may be larger than the actual number of types in the system). - - Returns - ------- - natoms - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - """ - natoms, natoms_vec = self._get_natoms_2(ntypes) - tmp = [natoms, natoms] - tmp = np.append(tmp, natoms_vec) - return tmp.astype(np.int32) - - def avg(self, key): - """Return the average value of an item.""" - if key not in self.data_dict.keys(): - raise RuntimeError("key %s has not been added" % key) - info = self.data_dict[key] - ndof = info["ndof"] - eners = [] - for ii in self.train_dirs: - data = self._load_set(ii) - ei = data[key].reshape([-1, ndof]) - eners.append(ei) - eners = np.concatenate(eners, axis=0) - if eners.size == 0: - return 0 - else: - return np.average(eners, axis=0) - - def _idx_map_sel(self, atom_type, type_sel): - new_types = [] - for ii in atom_type: - if ii in type_sel: - new_types.append(ii) - new_types = np.array(new_types, dtype=int) - natoms = new_types.shape[0] - idx = np.arange(natoms) - idx_map = np.lexsort((idx, new_types)) - return idx_map - - def _get_natoms_2(self, ntypes): - sample_type = self.atom_type - natoms = len(sample_type) - natoms_vec = np.zeros(ntypes).astype(int) - for ii in range(ntypes): - natoms_vec[ii] = np.count_nonzero(sample_type == ii) - return natoms, natoms_vec - - def _get_subdata(self, data, idx=None): - new_data = {} - for ii in data: - dd = data[ii] - if "find_" in ii: - new_data[ii] = dd - else: - if idx is not None: - new_data[ii] = dd[idx] - else: - new_data[ii] = dd - return new_data - - def _load_batch_set(self, set_name: DPPath): - if not hasattr(self, "batch_set") or self.get_numb_set() > 1: - self.batch_set = self._load_set(set_name) - if self.modifier is not None: - self.modifier.modify_data(self.batch_set, self) - self.batch_set, _ = self._shuffle_data(self.batch_set) - self.reset_get_batch() - - def reset_get_batch(self): - self.iterator = 0 - - def _load_test_set(self, set_name: DPPath, shuffle_test): - self.test_set = self._load_set(set_name) - if shuffle_test: - self.test_set, _ = self._shuffle_data(self.test_set) - - def _shuffle_data(self, data): - ret = {} - nframes = data["coord"].shape[0] - idx = np.arange(nframes) - # the training times of each frame - idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,))) - dp_random.shuffle(idx) - for kk in data: - if ( - type(data[kk]) == np.ndarray - and len(data[kk].shape) == 2 - and data[kk].shape[0] == nframes - and "find_" not in kk - ): - ret[kk] = data[kk][idx] - else: - ret[kk] = data[kk] - return ret, idx - - def _load_set(self, set_name: DPPath): - # get nframes - if not isinstance(set_name, DPPath): - set_name = DPPath(set_name) - path = set_name / "coord.npy" - if self.data_dict["coord"]["high_prec"]: - coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION) - else: - coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION) - if coord.ndim == 1: - coord = coord.reshape([1, -1]) - nframes = coord.shape[0] - assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms - # load keys - data = {} - for kk in self.data_dict.keys(): - if self.data_dict[kk]["reduce"] is None: - data["find_" + kk], data[kk] = self._load_data( - set_name, - kk, - nframes, - self.data_dict[kk]["ndof"], - atomic=self.data_dict[kk]["atomic"], - high_prec=self.data_dict[kk]["high_prec"], - must=self.data_dict[kk]["must"], - type_sel=self.data_dict[kk]["type_sel"], - repeat=self.data_dict[kk]["repeat"], - default=self.data_dict[kk]["default"], - dtype=self.data_dict[kk]["dtype"], - ) - for kk in self.data_dict.keys(): - if self.data_dict[kk]["reduce"] is not None: - k_in = self.data_dict[kk]["reduce"] - ndof = self.data_dict[kk]["ndof"] - data["find_" + kk] = data["find_" + k_in] - tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION) - data[kk] = np.sum( - np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1 - ) - - if self.mixed_type: - # nframes x natoms - atom_type_mix = self._load_type_mix(set_name) - if self.enforce_type_map: - try: - atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32) - except IndexError as e: - raise IndexError( - "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format( - set_name, self.get_ntypes() - ) - ) from e - atom_type_mix = atom_type_mix_ - real_type = atom_type_mix.reshape([nframes, self.natoms]) - data["type"] = real_type - natoms = data["type"].shape[1] - # nframes x ntypes - atom_type_nums = np.array( - [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())], - dtype=np.int32, - ).T - ghost_nums = np.array( - [(real_type == -1).sum(axis=-1)], - dtype=np.int32, - ).T - assert ( - atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms - ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format( - set_name, self.get_ntypes() - ) - data["real_natoms_vec"] = np.concatenate( - ( - np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)), - atom_type_nums, - ), - axis=-1, - ) - else: - data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1)) - - return data - - def _load_data( - self, - set_name, - key, - nframes, - ndof_, - atomic=False, - must=True, - repeat=1, - high_prec=False, - type_sel=None, - default: float = 0.0, - dtype: Optional[np.dtype] = None, - ): - if atomic: - natoms = self.natoms - idx_map = self.idx_map - # if type_sel, then revise natoms and idx_map - if type_sel is not None: - natoms = 0 - for jj in type_sel: - natoms += np.sum(self.atom_type == jj) - idx_map = self._idx_map_sel(self.atom_type, type_sel) - ndof = ndof_ * natoms - else: - ndof = ndof_ - if dtype is not None: - pass - elif high_prec: - dtype = GLOBAL_ENER_FLOAT_PRECISION - else: - dtype = GLOBAL_NP_FLOAT_PRECISION - path = set_name / (key + ".npy") - if path.is_file(): - data = path.load_numpy().astype(dtype) - try: # YWolfeee: deal with data shape error - if atomic: - data = data.reshape([nframes, natoms, -1]) - data = data[:, idx_map, :] - data = data.reshape([nframes, -1]) - data = np.reshape(data, [nframes, ndof]) - except ValueError as err_message: - explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`." - log.error(str(err_message)) - log.error(explanation) - raise ValueError(str(err_message) + ". " + explanation) - if repeat != 1: - data = np.repeat(data, repeat).reshape([nframes, -1]) - return np.float32(1.0), data - elif must: - raise RuntimeError("%s not found!" % path) - else: - data = np.full([nframes, ndof], default, dtype=dtype) - if repeat != 1: - data = np.repeat(data, repeat).reshape([nframes, -1]) - return np.float32(0.0), data - - def _load_type(self, sys_path: DPPath): - atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32) - return atom_type - - def _load_type_mix(self, set_name: DPPath): - type_path = set_name / "real_atom_types.npy" - real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms]) - return real_type - - def _make_idx_map(self, atom_type): - natoms = atom_type.shape[0] - idx = np.arange(natoms) - if self.sort_atoms: - idx_map = np.lexsort((idx, atom_type)) - else: - idx_map = idx - return idx_map - - def _load_type_map(self, sys_path: DPPath): - fname = sys_path / "type_map.raw" - if fname.is_file(): - return fname.load_txt(dtype=str, ndmin=1).tolist() - else: - return None - - def _check_pbc(self, sys_path: DPPath): - pbc = True - if (sys_path / "nopbc").is_file(): - pbc = False - return pbc - - def _check_mode(self, set_path: DPPath): - return (set_path / "real_atom_types.npy").is_file() diff --git a/deepmd_utils/utils/data_system.py b/deepmd_utils/utils/data_system.py deleted file mode 100644 index f83f587590..0000000000 --- a/deepmd_utils/utils/data_system.py +++ /dev/null @@ -1,654 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import collections -import logging -import warnings -from functools import ( - lru_cache, -) -from typing import ( - List, - Optional, -) - -import numpy as np - -import deepmd_utils.utils.random as dp_random -from deepmd_utils.common import ( - make_default_mesh, -) -from deepmd_utils.env import ( - GLOBAL_NP_FLOAT_PRECISION, -) -from deepmd_utils.utils.data import ( - DeepmdData, -) - -log = logging.getLogger(__name__) - - -class DeepmdDataSystem: - """Class for manipulating many data systems. - - It is implemented with the help of DeepmdData - """ - - def __init__( - self, - systems: List[str], - batch_size: int, - test_size: int, - rcut: Optional[float] = None, - set_prefix: str = "set", - shuffle_test: bool = True, - type_map: Optional[List[str]] = None, - optional_type_map: bool = True, - modifier=None, - trn_all_set=False, - sys_probs=None, - auto_prob_style="prob_sys_size", - sort_atoms: bool = True, - ): - """Constructor. - - Parameters - ---------- - systems - Specifying the paths to systems - batch_size - The batch size - test_size - The size of test data - rcut - The cut-off radius. Not used. - set_prefix - Prefix for the directories of different sets - shuffle_test - If the test data are shuffled - type_map - Gives the name of different atom types - optional_type_map - If the type_map.raw in each system is optional - modifier - Data modifier that has the method `modify_data` - trn_all_set - Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test. - sys_probs : list of float - The probabilitis of systems to get the batch. - Summation of positive elements of this list should be no greater than 1. - Element of this list can be negative, the probability of the corresponding system is determined - automatically by the number of batches in the system. - auto_prob_style : str - Determine the probability of systems automatically. The method is assigned by this key and can be - - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems() - - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system - - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : - the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, - where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, - the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional - to the number of batches in the system. - sort_atoms : bool - Sort atoms by atom types. Required to enable when the data is directly feeded to - descriptors except mixed types. - """ - # init data - del rcut - self.system_dirs = systems - self.nsystems = len(self.system_dirs) - self.data_systems = [] - for ii in self.system_dirs: - self.data_systems.append( - DeepmdData( - ii, - set_prefix=set_prefix, - shuffle_test=shuffle_test, - type_map=type_map, - optional_type_map=optional_type_map, - modifier=modifier, - trn_all_set=trn_all_set, - sort_atoms=sort_atoms, - ) - ) - # check mix_type format - error_format_msg = ( - "if one of the system is of mixed_type format, " - "then all of the systems should be of mixed_type format!" - ) - if self.data_systems[0].mixed_type: - for data_sys in self.data_systems[1:]: - assert data_sys.mixed_type, error_format_msg - self.mixed_type = True - else: - for data_sys in self.data_systems[1:]: - assert not data_sys.mixed_type, error_format_msg - self.mixed_type = False - # batch size - self.batch_size = batch_size - is_auto_bs = False - self.mixed_systems = False - if isinstance(self.batch_size, int): - self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int) - elif isinstance(self.batch_size, str): - words = self.batch_size.split(":") - if "auto" == words[0]: - is_auto_bs = True - rule = 32 - if len(words) == 2: - rule = int(words[1]) - self.batch_size = self._make_auto_bs(rule) - elif "mixed" == words[0]: - self.mixed_type = True - self.mixed_systems = True - if len(words) == 2: - rule = int(words[1]) - else: - raise RuntimeError("batch size must be specified for mixed systems") - self.batch_size = rule * np.ones(self.nsystems, dtype=int) - else: - raise RuntimeError("unknown batch_size rule " + words[0]) - elif isinstance(self.batch_size, list): - pass - else: - raise RuntimeError("invalid batch_size") - assert isinstance(self.batch_size, (list, np.ndarray)) - assert len(self.batch_size) == self.nsystems - - # natoms, nbatches - ntypes = [] - for ii in self.data_systems: - ntypes.append(ii.get_ntypes()) - self.sys_ntypes = max(ntypes) - self.natoms = [] - self.natoms_vec = [] - self.nbatches = [] - type_map_list = [] - for ii in range(self.nsystems): - self.natoms.append(self.data_systems[ii].get_natoms()) - self.natoms_vec.append( - self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int) - ) - self.nbatches.append( - self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii]) - ) - type_map_list.append(self.data_systems[ii].get_type_map()) - self.type_map = self._check_type_map_consistency(type_map_list) - - # ! altered by Marián Rynik - # test size - # now test size can be set as a percentage of systems data or test size - # can be set for each system individualy in the same manner as batch - # size. This enables one to use systems with diverse number of - # structures and different number of atoms. - self.test_size = test_size - if isinstance(self.test_size, int): - self.test_size = self.test_size * np.ones(self.nsystems, dtype=int) - elif isinstance(self.test_size, str): - words = self.test_size.split("%") - try: - percent = int(words[0]) - except ValueError: - raise RuntimeError("unknown test_size rule " + words[0]) - self.test_size = self._make_auto_ts(percent) - elif isinstance(self.test_size, list): - pass - else: - raise RuntimeError("invalid test_size") - assert isinstance(self.test_size, (list, np.ndarray)) - assert len(self.test_size) == self.nsystems - - # init pick idx - self.pick_idx = 0 - - # derive system probabilities - self.sys_probs = None - self.set_sys_probs(sys_probs, auto_prob_style) - - # check batch and test size - for ii in range(self.nsystems): - chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii]) - if chk_ret is not None and not is_auto_bs and not self.mixed_systems: - warnings.warn( - "system %s required batch size is larger than the size of the dataset %s (%d > %d)" - % ( - self.system_dirs[ii], - chk_ret[0], - self.batch_size[ii], - chk_ret[1], - ) - ) - chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii]) - if chk_ret is not None and not is_auto_bs and not self.mixed_systems: - warnings.warn( - "system %s required test size is larger than the size of the dataset %s (%d > %d)" - % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1]) - ) - - def _load_test(self, ntests=-1): - self.test_data = collections.defaultdict(list) - for ii in range(self.nsystems): - test_system_data = self.data_systems[ii].get_test(ntests=ntests) - for nn in test_system_data: - self.test_data[nn].append(test_system_data[nn]) - - @property - @lru_cache(maxsize=None) - def default_mesh(self) -> List[np.ndarray]: - """Mesh for each system.""" - return [ - make_default_mesh( - self.data_systems[ii].pbc, self.data_systems[ii].mixed_type - ) - for ii in range(self.nsystems) - ] - - def compute_energy_shift(self, rcond=None, key="energy"): - sys_ener = [] - for ss in self.data_systems: - sys_ener.append(ss.avg(key)) - sys_ener = np.concatenate(sys_ener) - sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION) - sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1]) - sys_tynatom = sys_tynatom[:, 2:] - energy_shift, resd, rank, s_value = np.linalg.lstsq( - sys_tynatom, sys_ener, rcond=rcond - ) - return energy_shift - - def add_dict(self, adict: dict) -> None: - """Add items to the data system by a `dict`. - `adict` should have items like - .. code-block:: python. - - adict[key] = { - "ndof": ndof, - "atomic": atomic, - "must": must, - "high_prec": high_prec, - "type_sel": type_sel, - "repeat": repeat, - } - - For the explaination of the keys see `add` - """ - for kk in adict: - self.add( - kk, - adict[kk]["ndof"], - atomic=adict[kk]["atomic"], - must=adict[kk]["must"], - high_prec=adict[kk]["high_prec"], - type_sel=adict[kk]["type_sel"], - repeat=adict[kk]["repeat"], - default=adict[kk]["default"], - ) - - def add( - self, - key: str, - ndof: int, - atomic: bool = False, - must: bool = False, - high_prec: bool = False, - type_sel: Optional[List[int]] = None, - repeat: int = 1, - default: float = 0.0, - ): - """Add a data item that to be loaded. - - Parameters - ---------- - key - The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy` - ndof - The number of dof - atomic - The item is an atomic property. - If False, the size of the data should be nframes x ndof - If True, the size of data should be nframes x natoms x ndof - must - The data file `sys_path/set.*/key.npy` must exist. - If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0 - high_prec - Load the data and store in float64, otherwise in float32 - type_sel - Select certain type of atoms - repeat - The data will be repeated `repeat` times. - default, default=0. - Default value of data - """ - for ii in self.data_systems: - ii.add( - key, - ndof, - atomic=atomic, - must=must, - high_prec=high_prec, - repeat=repeat, - type_sel=type_sel, - default=default, - ) - - def reduce(self, key_out, key_in): - """Generate a new item from the reduction of another atom. - - Parameters - ---------- - key_out - The name of the reduced item - key_in - The name of the data item to be reduced - """ - for ii in self.data_systems: - ii.reduce(key_out, key_in) - - def get_data_dict(self, ii: int = 0) -> dict: - return self.data_systems[ii].get_data_dict() - - def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"): - if sys_probs is None: - if auto_prob_style == "prob_uniform": - prob_v = 1.0 / float(self.nsystems) - probs = [prob_v for ii in range(self.nsystems)] - elif auto_prob_style[:13] == "prob_sys_size": - if auto_prob_style == "prob_sys_size": - prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0" - else: - prob_style = auto_prob_style - probs = prob_sys_size_ext( - prob_style, self.get_nsystems(), self.nbatches - ) - else: - raise RuntimeError("Unknown auto prob style: " + auto_prob_style) - else: - probs = process_sys_probs(sys_probs, self.nbatches) - self.sys_probs = probs - - def get_batch(self, sys_idx: Optional[int] = None) -> dict: - # batch generation style altered by Ziyao Li: - # one should specify the "sys_prob" and "auto_prob_style" params - # via set_sys_prob() function. The sys_probs this function uses is - # defined as a private variable, self.sys_probs, initialized in __init__(). - # This is to optimize the (vain) efforts in evaluating sys_probs every batch. - """Get a batch of data from the data systems. - - Parameters - ---------- - sys_idx : int - The index of system from which the batch is get. - If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored - If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. - This option does not work for mixed systems. - - Returns - ------- - dict - The batch data - """ - if not self.mixed_systems: - b_data = self.get_batch_standard(sys_idx) - else: - b_data = self.get_batch_mixed() - return b_data - - def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict: - """Get a batch of data from the data systems in the standard way. - - Parameters - ---------- - sys_idx : int - The index of system from which the batch is get. - If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored - If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. - - Returns - ------- - dict - The batch data - """ - if sys_idx is not None: - self.pick_idx = sys_idx - else: - # prob = self._get_sys_probs(sys_probs, auto_prob_style) - self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs) - b_data = self.data_systems[self.pick_idx].get_batch( - self.batch_size[self.pick_idx] - ) - b_data["natoms_vec"] = self.natoms_vec[self.pick_idx] - b_data["default_mesh"] = self.default_mesh[self.pick_idx] - return b_data - - def get_batch_mixed(self) -> dict: - """Get a batch of data from the data systems in the mixed way. - - Returns - ------- - dict - The batch data - """ - # mixed systems have a global batch size - batch_size = self.batch_size[0] - batch_data = [] - for _ in range(batch_size): - self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs) - bb_data = self.data_systems[self.pick_idx].get_batch(1) - bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx] - bb_data["default_mesh"] = self.default_mesh[self.pick_idx] - batch_data.append(bb_data) - b_data = self._merge_batch_data(batch_data) - return b_data - - def _merge_batch_data(self, batch_data: List[dict]) -> dict: - """Merge batch data from different systems. - - Parameters - ---------- - batch_data : list of dict - A list of batch data from different systems. - - Returns - ------- - dict - The merged batch data. - """ - b_data = {} - max_natoms = max(bb["natoms_vec"][0] for bb in batch_data) - # natoms_vec - natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int) - natoms_vec[0:3] = max_natoms - b_data["natoms_vec"] = natoms_vec - # real_natoms_vec - real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data]) - b_data["real_natoms_vec"] = real_natoms_vec - # type - type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int) - for ii, bb in enumerate(batch_data): - type_vec[ii, : bb["type"].shape[1]] = bb["type"][0] - b_data["type"] = type_vec - # default_mesh - default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0) - b_data["default_mesh"] = default_mesh - # other data - data_dict = self.get_data_dict(0) - for kk, vv in data_dict.items(): - if kk not in batch_data[0]: - continue - b_data["find_" + kk] = batch_data[0]["find_" + kk] - if not vv["atomic"]: - b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0) - else: - b_data[kk] = np.zeros( - (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]), - dtype=batch_data[0][kk].dtype, - ) - for ii, bb in enumerate(batch_data): - b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0] - return b_data - - # ! altered by Marián Rynik - def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1): # depreciated - """Get test data from the the data systems. - - Parameters - ---------- - sys_idx - The test dat of system with index `sys_idx` will be returned. - If is None, the currently selected system will be returned. - n_test - Number of test data. If set to -1 all test data will be get. - """ - if not hasattr(self, "test_data"): - self._load_test(ntests=n_test) - if sys_idx is not None: - idx = sys_idx - else: - idx = self.pick_idx - - test_system_data = {} - for nn in self.test_data: - test_system_data[nn] = self.test_data[nn][idx] - test_system_data["natoms_vec"] = self.natoms_vec[idx] - test_system_data["default_mesh"] = self.default_mesh[idx] - return test_system_data - - def get_sys_ntest(self, sys_idx=None): - """Get number of tests for the currently selected system, - or one defined by sys_idx. - """ - if sys_idx is not None: - return self.test_size[sys_idx] - else: - return self.test_size[self.pick_idx] - - def get_type_map(self) -> List[str]: - """Get the type map.""" - return self.type_map - - def get_nbatches(self) -> int: - """Get the total number of batches.""" - return self.nbatches - - def get_ntypes(self) -> int: - """Get the number of types.""" - return self.sys_ntypes - - def get_nsystems(self) -> int: - """Get the number of data systems.""" - return self.nsystems - - def get_sys(self, idx: int) -> DeepmdData: - """Get a certain data system.""" - return self.data_systems[idx] - - def get_batch_size(self) -> int: - """Get the batch size.""" - return self.batch_size - - def _format_name_length(self, name, width): - if len(name) <= width: - return "{: >{}}".format(name, width) - else: - name = name[-(width - 3) :] - name = "-- " + name - return name - - def print_summary(self, name): - # width 65 - sys_width = 42 - log.info( - f"---Summary of DataSystem: {name:13s}-----------------------------------------------" - ) - log.info("found %d system(s):" % self.nsystems) - log.info( - ("%s " % self._format_name_length("system", sys_width)) - + ("%6s %6s %6s %9s %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc")) - ) - for ii in range(self.nsystems): - log.info( - "%s %6d %6d %6d %9.3e %3s" - % ( - self._format_name_length(self.system_dirs[ii], sys_width), - self.natoms[ii], - # TODO batch size * nbatches = number of structures - self.batch_size[ii], - self.nbatches[ii], - self.sys_probs[ii], - "T" if self.data_systems[ii].pbc else "F", - ) - ) - log.info( - "--------------------------------------------------------------------------------------" - ) - - def _make_auto_bs(self, rule): - bs = [] - for ii in self.data_systems: - ni = ii.get_natoms() - bsi = rule // ni - if bsi * ni < rule: - bsi += 1 - bs.append(bsi) - return bs - - # ! added by Marián Rynik - def _make_auto_ts(self, percent): - ts = [] - for ii in range(self.nsystems): - ni = self.batch_size[ii] * self.nbatches[ii] - tsi = int(ni * percent / 100) - ts.append(tsi) - - return ts - - def _check_type_map_consistency(self, type_map_list): - ret = [] - for ii in type_map_list: - if ii is not None: - min_len = min([len(ii), len(ret)]) - for idx in range(min_len): - if ii[idx] != ret[idx]: - raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}") - if len(ii) > len(ret): - ret = ii - return ret - - -def process_sys_probs(sys_probs, nbatch): - sys_probs = np.array(sys_probs) - type_filter = sys_probs >= 0 - assigned_sum_prob = np.sum(type_filter * sys_probs) - # 1e-8 is to handle floating point error; See #1917 - assert ( - assigned_sum_prob <= 1.0 + 1e-8 - ), "the sum of assigned probability should be less than 1" - rest_sum_prob = 1.0 - assigned_sum_prob - if not np.isclose(rest_sum_prob, 0): - rest_nbatch = (1 - type_filter) * nbatch - rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch) - ret_prob = rest_prob + type_filter * sys_probs - else: - ret_prob = sys_probs - assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1" - return ret_prob - - -def prob_sys_size_ext(keywords, nsystems, nbatch): - block_str = keywords.split(";")[1:] - block_stt = [] - block_end = [] - block_weights = [] - for ii in block_str: - stt = int(ii.split(":")[0]) - end = int(ii.split(":")[1]) - weight = float(ii.split(":")[2]) - assert weight >= 0, "the weight of a block should be no less than 0" - block_stt.append(stt) - block_end.append(end) - block_weights.append(weight) - nblocks = len(block_str) - block_probs = np.array(block_weights) / np.sum(block_weights) - sys_probs = np.zeros([nsystems]) - for ii in range(nblocks): - nbatch_block = nbatch[block_stt[ii] : block_end[ii]] - tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block) - sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii] - return sys_probs diff --git a/deepmd_utils/utils/errors.py b/deepmd_utils/utils/errors.py deleted file mode 100644 index 11f42ede96..0000000000 --- a/deepmd_utils/utils/errors.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -class OutOfMemoryError(Exception): - """This error is caused by out-of-memory (OOM).""" diff --git a/deepmd_utils/utils/pair_tab.py b/deepmd_utils/utils/pair_tab.py deleted file mode 100644 index 4451f53379..0000000000 --- a/deepmd_utils/utils/pair_tab.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - Tuple, -) - -import numpy as np -from scipy.interpolate import ( - CubicSpline, -) - - -class PairTab: - """Pairwise tabulated potential. - - Parameters - ---------- - filename - File name for the short-range tabulated potential. - The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. - The first colume is the distance between atoms. - The second to the last columes are energies for pairs of certain types. - For example we have two atom types, 0 and 1. - The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly. - """ - - def __init__(self, filename: str) -> None: - """Constructor.""" - self.reinit(filename) - - def reinit(self, filename: str) -> None: - """Initialize the tabulated interaction. - - Parameters - ---------- - filename - File name for the short-range tabulated potential. - The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. - The first colume is the distance between atoms. - The second to the last columes are energies for pairs of certain types. - For example we have two atom types, 0 and 1. - The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly. - """ - self.vdata = np.loadtxt(filename) - self.rmin = self.vdata[0][0] - self.hh = self.vdata[1][0] - self.vdata[0][0] - self.nspline = self.vdata.shape[0] - 1 - ncol = self.vdata.shape[1] - 1 - n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5 - self.ntypes = int(n0 + 0.1) - assert self.ntypes * (self.ntypes + 1) // 2 == ncol, ( - "number of volumes provided in %s does not match guessed number of types %d" - % (filename, self.ntypes) - ) - self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes]) - self.tab_data = self._make_data() - - def get(self) -> Tuple[np.array, np.array]: - """Get the serialized table.""" - return self.tab_info, self.tab_data - - def _make_data(self): - data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline]) - stride = 4 * self.nspline - idx_iter = 0 - xx = self.vdata[:, 0] - for t0 in range(self.ntypes): - for t1 in range(t0, self.ntypes): - vv = self.vdata[:, 1 + idx_iter] - cs = CubicSpline(xx, vv) - dd = cs(xx, 1) - dd *= self.hh - dtmp = np.zeros(stride) - for ii in range(self.nspline): - dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1] - dtmp[ii * 4 + 1] = ( - -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1] - ) - dtmp[ii * 4 + 2] = dd[ii] - dtmp[ii * 4 + 3] = vv[ii] - data[ - (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride - + stride - ] = dtmp - data[ - (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride - + stride - ] = dtmp - idx_iter += 1 - return data diff --git a/deepmd_utils/utils/path.py b/deepmd_utils/utils/path.py deleted file mode 100644 index a8e4bc329f..0000000000 --- a/deepmd_utils/utils/path.py +++ /dev/null @@ -1,358 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import os -from abc import ( - ABC, - abstractmethod, -) -from functools import ( - lru_cache, -) -from pathlib import ( - Path, -) -from typing import ( - List, - Optional, -) - -import h5py -import numpy as np -from wcmatch.glob import ( - globfilter, -) - - -class DPPath(ABC): - """The path class to data system (DeepmdData). - - Parameters - ---------- - path : str - path - """ - - def __new__(cls, path: str): - if cls is DPPath: - if os.path.isdir(path): - return super().__new__(DPOSPath) - elif os.path.isfile(path.split("#")[0]): - # assume h5 if it is not dir - # TODO: check if it is a real h5? or just check suffix? - return super().__new__(DPH5Path) - raise FileNotFoundError("%s not found" % path) - return super().__new__(cls) - - @abstractmethod - def load_numpy(self) -> np.ndarray: - """Load NumPy array. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - - @abstractmethod - def load_txt(self, **kwargs) -> np.ndarray: - """Load NumPy array from text. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - - @abstractmethod - def glob(self, pattern: str) -> List["DPPath"]: - """Search path using the glob pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - - @abstractmethod - def rglob(self, pattern: str) -> List["DPPath"]: - """This is like calling :meth:`DPPath.glob()` with `**/` added in front - of the given relative pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - - @abstractmethod - def is_file(self) -> bool: - """Check if self is file.""" - - @abstractmethod - def is_dir(self) -> bool: - """Check if self is directory.""" - - @abstractmethod - def __truediv__(self, key: str) -> "DPPath": - """Used for / operator.""" - - @abstractmethod - def __lt__(self, other: "DPPath") -> bool: - """Whether this DPPath is less than other for sorting.""" - - @abstractmethod - def __str__(self) -> str: - """Represent string.""" - - def __repr__(self) -> str: - return f"{type(self)} ({self!s})" - - def __eq__(self, other) -> bool: - return str(self) == str(other) - - def __hash__(self): - return hash(str(self)) - - -class DPOSPath(DPPath): - """The OS path class to data system (DeepmdData) for real directories. - - Parameters - ---------- - path : str - path - """ - - def __init__(self, path: str) -> None: - super().__init__() - if isinstance(path, Path): - self.path = path - else: - self.path = Path(path) - - def load_numpy(self) -> np.ndarray: - """Load NumPy array. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - return np.load(str(self.path)) - - def load_txt(self, **kwargs) -> np.ndarray: - """Load NumPy array from text. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - return np.loadtxt(str(self.path), **kwargs) - - def glob(self, pattern: str) -> List["DPPath"]: - """Search path using the glob pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - # currently DPOSPath will only derivative DPOSPath - # TODO: discuss if we want to mix DPOSPath and DPH5Path? - return [type(self)(p) for p in self.path.glob(pattern)] - - def rglob(self, pattern: str) -> List["DPPath"]: - """This is like calling :meth:`DPPath.glob()` with `**/` added in front - of the given relative pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - return [type(self)(p) for p in self.path.rglob(pattern)] - - def is_file(self) -> bool: - """Check if self is file.""" - return self.path.is_file() - - def is_dir(self) -> bool: - """Check if self is directory.""" - return self.path.is_dir() - - def __truediv__(self, key: str) -> "DPPath": - """Used for / operator.""" - return type(self)(self.path / key) - - def __lt__(self, other: "DPOSPath") -> bool: - """Whether this DPPath is less than other for sorting.""" - return self.path < other.path - - def __str__(self) -> str: - """Represent string.""" - return str(self.path) - - -class DPH5Path(DPPath): - """The path class to data system (DeepmdData) for HDF5 files. - - Notes - ----- - OS - HDF5 relationship: - directory - Group - file - Dataset - - Parameters - ---------- - path : str - path - """ - - def __init__(self, path: str) -> None: - super().__init__() - # we use "#" to split path - # so we do not support file names containing #... - s = path.split("#") - self.root_path = s[0] - self.root = self._load_h5py(s[0]) - # h5 path: default is the root path - self.name = s[1] if len(s) > 1 else "/" - - @classmethod - @lru_cache(None) - def _load_h5py(cls, path: str) -> h5py.File: - """Load hdf5 file. - - Parameters - ---------- - path : str - path to hdf5 file - """ - # this method has cache to avoid duplicated - # loading from different DPH5Path - # However the file will be never closed? - return h5py.File(path, "r") - - def load_numpy(self) -> np.ndarray: - """Load NumPy array. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - return self.root[self.name][:] - - def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray: - """Load NumPy array from text. - - Returns - ------- - np.ndarray - loaded NumPy array - """ - arr = self.load_numpy() - if dtype: - arr = arr.astype(dtype) - return arr - - def glob(self, pattern: str) -> List["DPPath"]: - """Search path using the glob pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - # got paths starts with current path first, which is faster - subpaths = [ii for ii in self._keys if ii.startswith(self.name)] - return [ - type(self)(f"{self.root_path}#{pp}") - for pp in globfilter(subpaths, self._connect_path(pattern)) - ] - - def rglob(self, pattern: str) -> List["DPPath"]: - """This is like calling :meth:`DPPath.glob()` with `**/` added in front - of the given relative pattern. - - Parameters - ---------- - pattern : str - glob pattern - - Returns - ------- - List[DPPath] - list of paths - """ - return self.glob("**" + pattern) - - @property - def _keys(self) -> List[str]: - """Walk all groups and dataset.""" - return self._file_keys(self.root) - - @classmethod - @lru_cache(None) - def _file_keys(cls, file: h5py.File) -> List[str]: - """Walk all groups and dataset.""" - l = [] - file.visit(lambda x: l.append("/" + x)) - return l - - def is_file(self) -> bool: - """Check if self is file.""" - if self.name not in self._keys: - return False - return isinstance(self.root[self.name], h5py.Dataset) - - def is_dir(self) -> bool: - """Check if self is directory.""" - if self.name not in self._keys: - return False - return isinstance(self.root[self.name], h5py.Group) - - def __truediv__(self, key: str) -> "DPPath": - """Used for / operator.""" - return type(self)(f"{self.root_path}#{self._connect_path(key)}") - - def _connect_path(self, path: str) -> str: - """Connect self with path.""" - if self.name.endswith("/"): - return f"{self.name}{path}" - return f"{self.name}/{path}" - - def __lt__(self, other: "DPH5Path") -> bool: - """Whether this DPPath is less than other for sorting.""" - if self.root_path == other.root_path: - return self.name < other.name - return self.root_path < other.root_path - - def __str__(self) -> str: - """Returns path of self.""" - return f"{self.root_path}#{self.name}" diff --git a/deepmd_utils/utils/plugin.py b/deepmd_utils/utils/plugin.py deleted file mode 100644 index 2a77b744c5..0000000000 --- a/deepmd_utils/utils/plugin.py +++ /dev/null @@ -1,95 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""Base of plugin systems.""" -# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py - -from abc import ( - ABCMeta, -) -from typing import ( - Callable, -) - - -class Plugin: - """A class to register and restore plugins. - - Attributes - ---------- - plugins : Dict[str, object] - plugins - - Examples - -------- - >>> plugin = Plugin() - >>> @plugin.register("xx") - def xxx(): - pass - >>> print(plugin.plugins['xx']) - """ - - def __init__(self): - self.plugins = {} - - def __add__(self, other) -> "Plugin": - self.plugins.update(other.plugins) - return self - - def register(self, key: str) -> Callable[[object], object]: - """Register a plugin. - - Parameters - ---------- - key : str - key of the plugin - - Returns - ------- - Callable[[object], object] - decorator - """ - - def decorator(object: object) -> object: - self.plugins[key] = object - return object - - return decorator - - def get_plugin(self, key) -> object: - """Visit a plugin by key. - - Parameters - ---------- - key : str - key of the plugin - - Returns - ------- - object - the plugin - """ - return self.plugins[key] - - -class VariantMeta: - def __call__(cls, *args, **kwargs): - """Remove `type` and keys that starts with underline.""" - obj = cls.__new__(cls, *args, **kwargs) - kwargs.pop("type", None) - to_pop = [] - for kk in kwargs: - if kk[0] == "_": - to_pop.append(kk) - for kk in to_pop: - kwargs.pop(kk, None) - obj.__init__(*args, **kwargs) - return obj - - -class VariantABCMeta(VariantMeta, ABCMeta): - pass - - -class PluginVariant(metaclass=VariantABCMeta): - """A class to remove `type` from input arguments.""" - - pass diff --git a/deepmd_utils/utils/random.py b/deepmd_utils/utils/random.py deleted file mode 100644 index 8944419412..0000000000 --- a/deepmd_utils/utils/random.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - Optional, -) - -import numpy as np - -_RANDOM_GENERATOR = np.random.RandomState() - - -def choice(a: np.ndarray, p: Optional[np.ndarray] = None): - """Generates a random sample from a given 1-D array. - - Parameters - ---------- - a : np.ndarray - A random sample is generated from its elements. - p : np.ndarray - The probabilities associated with each entry in a. - - Returns - ------- - np.ndarray - arrays with results and their shapes - """ - return _RANDOM_GENERATOR.choice(a, p=p) - - -def random(size=None): - """Return random floats in the half-open interval [0.0, 1.0). - - Parameters - ---------- - size - Output shape. - - Returns - ------- - np.ndarray - Arrays with results and their shapes. - """ - return _RANDOM_GENERATOR.random_sample(size) - - -def seed(val: Optional[int] = None): - """Seed the generator. - - Parameters - ---------- - val : int - Seed. - """ - _RANDOM_GENERATOR.seed(val) - - -def shuffle(x: np.ndarray): - """Modify a sequence in-place by shuffling its contents. - - Parameters - ---------- - x : np.ndarray - The array or list to be shuffled. - """ - _RANDOM_GENERATOR.shuffle(x) - - -__all__ = ["choice", "random", "seed", "shuffle"] diff --git a/deepmd_utils/utils/weight_avg.py b/deepmd_utils/utils/weight_avg.py deleted file mode 100644 index b344d3bb75..0000000000 --- a/deepmd_utils/utils/weight_avg.py +++ /dev/null @@ -1,48 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from collections import ( - defaultdict, -) -from typing import ( - Dict, - List, - Tuple, -) - -import numpy as np - - -def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict: - """Compute wighted average of prediction errors (MAE or RMSE) for model. - - Parameters - ---------- - errors : List[Dict[str, Tuple[float, float]]] - List: the error of systems - Dict: the error of quantities, name given by the key - str: the name of the quantity, must starts with 'mae' or 'rmse' - Tuple: (error, weight) - - Returns - ------- - Dict - weighted averages - """ - sum_err = defaultdict(float) - sum_siz = defaultdict(int) - for err in errors: - for kk, (ee, ss) in err.items(): - if kk.startswith("mae"): - sum_err[kk] += ee * ss - elif kk.startswith("rmse"): - sum_err[kk] += ee * ee * ss - else: - raise RuntimeError("unknown error type") - sum_siz[kk] += ss - for kk in sum_err.keys(): - if kk.startswith("mae"): - sum_err[kk] = sum_err[kk] / sum_siz[kk] - elif kk.startswith("rmse"): - sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk]) - else: - raise RuntimeError("unknown error type") - return sum_err diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css index 1569dc4a38..d0b761e71d 100644 --- a/doc/_static/css/custom.css +++ b/doc/_static/css/custom.css @@ -1,14 +1,22 @@ /* * SPDX-License-Identifier: LGPL-3.0-or-later */ -pre{ - overflow: auto; +pre { + overflow: auto; } -.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo { - width: 275px; +.wy-side-nav-search .wy-dropdown > a img.logo, +.wy-side-nav-search > a img.logo { + width: 275px; +} +img.platform-icon { + height: 2ex; } @media (prefers-color-scheme: dark) { - .wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo { - content: url("../logo-dark.svg"); - } + .wy-side-nav-search .wy-dropdown > a img.logo, + .wy-side-nav-search > a img.logo { + content: url("../logo-dark.svg"); + } + img.platform-icon { + filter: invert(1); + } } diff --git a/doc/_static/logo_icon.svg b/doc/_static/logo_icon.svg new file mode 100644 index 0000000000..d8f6893355 --- /dev/null +++ b/doc/_static/logo_icon.svg @@ -0,0 +1 @@ + diff --git a/doc/_static/pytorch.svg b/doc/_static/pytorch.svg new file mode 100644 index 0000000000..04aae0c2a3 --- /dev/null +++ b/doc/_static/pytorch.svg @@ -0,0 +1 @@ +PyTorch icon diff --git a/doc/_static/tensorflow.svg b/doc/_static/tensorflow.svg new file mode 100644 index 0000000000..48746104ec --- /dev/null +++ b/doc/_static/tensorflow.svg @@ -0,0 +1 @@ +TensorFlow icon diff --git a/doc/api_op.rst b/doc/api_op.rst index 9f4c650497..d620ec6ef5 100644 --- a/doc/api_op.rst +++ b/doc/api_op.rst @@ -4,7 +4,7 @@ OP API op_module --------- -.. automodule:: deepmd.env.op_module +.. automodule:: deepmd.tf.env.op_module :members: :imported-members: :show-inheritance: @@ -13,7 +13,7 @@ op_module op_grads_module --------------- -.. automodule:: deepmd.env.op_grads_module +.. automodule:: deepmd.tf.env.op_grads_module :members: :imported-members: :show-inheritance: diff --git a/doc/backend.md b/doc/backend.md new file mode 100644 index 0000000000..2f0bc7ed20 --- /dev/null +++ b/doc/backend.md @@ -0,0 +1,57 @@ +# Backend + +## Supported backends + +DeePMD-kit supports multiple backends: TensorFlow and PyTorch. +To use DeePMD-kit, you must install at least one backend. +Each backend does not support all features. +In the documentation, TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }} icons are used to mark whether a backend supports a feature. + +### TensorFlow {{ tensorflow_icon }} + +- Model filename extension: `.pb` +- Checkpoint filename extension: `.meta`, `.index`, `.data-00000-of-00001` + +[TensorFlow](https://tensorflow.org) 2.2 or above is required. +DeePMD-kit does not use the TensorFlow v2 API but uses the TensorFlow v1 API (`tf.compat.v1`) in the graph mode. + +### PyTorch {{ pytorch_icon }} + +- Model filename extension: `.pth` +- Checkpoint filename extension: `.pt` + +[PyTorch](https://pytorch.org/) 2.0 or above is required. +While `.pth` and `.pt` are the same in the PyTorch package, they have different meanings in the DeePMD-kit to distinguish the model and the checkpoint. + +### DP {{ dpmodel_icon }} + +:::{note} +This backend is only for development and should not take into production. +::: + +- Model filename extension: `.dp` + +DP is a reference backend for development, which uses pure [NumPy](https://numpy.org/) to implement models without using any heavy deep-learning frameworks. +Due to the limitation of NumPy, it doesn't support gradient calculation and thus cannot be used for training. +As a reference backend, it is not aimed at the best performance, but only the correct results. +The DP backend uses [HDF5](https://docs.h5py.org/) to store model serialization data, which is backend-independent. +Only Python inference interface can load this format. + +## Switch the backend + +### Training + +When training and freezing a model, you can use `dp --tf` or `dp --pt` in the command line to switch the backend. + +### Inference + +When doing inference, DeePMD-kit detects the backend from the model filename. +For example, when the model filename ends with `.pb` (the ProtoBuf file), DeePMD-kit will consider it using the TensorFlow backend. + +## Convert model files between backends + +If a model is supported by two backends, one can use [`dp convert-backend`](./cli.rst) to convert the model file between these two backends. + +:::{warning} +Currently, only the `se_e2_a` model fully supports the backend conversion between TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }}. +::: diff --git a/doc/cli.rst b/doc/cli.rst index 668a2df2e3..15891369e3 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -4,6 +4,6 @@ Command line interface ====================== .. argparse:: - :module: deepmd.entrypoints.main + :module: deepmd.tf.entrypoints.main :func: main_parser :prog: dp diff --git a/doc/conf.py b/doc/conf.py index 63af974a86..58181f9e1c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,107 +17,15 @@ date, ) -from deepmd.common import ( +from deepmd.utils.argcheck import ( ACTIVATION_FN_DICT, PRECISION_DICT, -) -from deepmd.utils.argcheck import ( list_to_doc, ) sys.path.append(os.path.dirname(__file__)) import sphinx_contrib_exhale_multiproject # noqa: F401 - -def mkindex(dirname): - dirname = dirname + "/" - oldfindex = open(dirname + "index.md") - oldlist = oldfindex.readlines() - oldfindex.close() - - oldnames = [] - for entry in oldlist: - _name = entry[entry.find("(") + 1 : entry.find(")")] - oldnames.append(_name) - - newfindex = open(dirname + "index.md", "a") - for root, dirs, files in os.walk(dirname, topdown=False): - newnames = [ - name for name in files if "index.md" not in name and name not in oldnames - ] - for name in newnames: - f = open(dirname + name) - _lines = f.readlines() - for _headline in _lines: - _headline = _headline.strip("#") - headline = _headline.strip() - if len(headline) == 0 or headline[0] == "." or headline[0] == "=": - continue - else: - break - longname = "- [" + headline + "]" + "(" + name + ")\n" - newfindex.write(longname) - - newfindex.close() - - -def classify_index_TS(): - dirname = "troubleshooting/" - oldfindex = open(dirname + "index.md") - oldlist = oldfindex.readlines() - oldfindex.close() - - oldnames = [] - sub_titles = [] - heads = [] - while len(oldlist) > 0: - entry = oldlist.pop(0) - if entry.find("(") >= 0: - _name = entry[entry.find("(") + 1 : entry.find(")")] - oldnames.append(_name) - continue - if entry.find("##") >= 0: - _name = entry[entry.find("##") + 3 : -1] - sub_titles.append(_name) - continue - entry.strip() - if entry != "\n": - heads.append(entry) - - newfindex = open(dirname + "index.md", "w") - for entry in heads: - newfindex.write(entry) - newfindex.write("\n") - sub_lists = [[], []] - for root, dirs, files in os.walk(dirname, topdown=False): - newnames = [name for name in files if "index.md" not in name] - for name in newnames: - f = open(dirname + name) - _lines = f.readlines() - f.close() - for _headline in _lines: - _headline = _headline.strip("#") - headline = _headline.strip() - if len(headline) == 0 or headline[0] == "." or headline[0] == "=": - continue - else: - break - longname = "- [" + headline + "]" + "(" + name + ")\n" - if "howtoset_" in name: - sub_lists[1].append(longname) - else: - sub_lists[0].append(longname) - - newfindex.write("## Trouble shooting\n") - for entry in sub_lists[0]: - newfindex.write(entry) - newfindex.write("\n") - newfindex.write("## Parameters setting\n") - for entry in sub_lists[1]: - newfindex.write(entry) - newfindex.close() - - # -- Project information ----------------------------------------------------- project = "DeePMD-kit" @@ -169,10 +77,6 @@ def setup(app): # 'sphinx.ext.autosummary' # ] -# mkindex("troubleshooting") -# mkindex("development") -# classify_index_TS() - extensions = [ "deepmodeling_sphinx", "dargs.sphinx", @@ -188,6 +92,7 @@ def setup(app): "breathe", "exhale", "sphinxcontrib.bibtex", + "sphinx_design", ] # breathe_domain_by_extension = { @@ -213,7 +118,10 @@ def setup(app): exhale_projects_args = { "cc": { "containmentFolder": "./API_CC", - "exhaleDoxygenStdin": "INPUT = ../source/api_cc/include/", + "exhaleDoxygenStdin": """INPUT = ../source/api_cc/include/ + PREDEFINED += BUILD_TENSORFLOW + BUILD_PYTORCH + """, "rootFileTitle": "C++ API", "rootFileName": "api_cc.rst", }, @@ -275,6 +183,12 @@ def setup(app): .. |PRECISION| replace:: {list_to_doc(PRECISION_DICT.keys())} """ +myst_substitutions = { + "tensorflow_icon": """![TensorFlow](/_static/tensorflow.svg){class=platform-icon}""", + "pytorch_icon": """![PyTorch](/_static/pytorch.svg){class=platform-icon}""", + "dpmodel_icon": """![DP](/_static/logo_icon.svg){class=platform-icon}""", +} + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -298,6 +212,8 @@ def setup(app): myst_enable_extensions = [ "dollarmath", "colon_fence", + "substitution", + "attrs_inline", ] myst_fence_as_directive = ("math",) # fix emoji issue in pdf diff --git a/doc/credits.rst b/doc/credits.rst index 3fbe1d56d8..64880d9035 100644 --- a/doc/credits.rst +++ b/doc/credits.rst @@ -49,6 +49,13 @@ Cite DeePMD-kit and methods Zhang_2022_DPA1 +- If DPA-2 descriptor (`dpa2`) is used, + +.. bibliography:: + :filter: False + + Zhang_2023_DPA2 + - If frame-specific parameters (`fparam`, e.g. electronic temperature) is used, .. bibliography:: diff --git a/doc/data/data-conv.md b/doc/data/data-conv.md index e8464b1ea9..7634daf5e6 100644 --- a/doc/data/data-conv.md +++ b/doc/data/data-conv.md @@ -5,6 +5,7 @@ Two binary formats, NumPy and HDF5, are supported for training. The raw format i ## NumPy format In a system with the Numpy format, the system properties are stored as text files ending with `.raw`, such as `type.raw` and `type_map.raw`, under the system directory. If one needs to train a non-periodic system, an empty `nopbc` file should be put under the system directory. Both input and labeled frame properties are saved as the [NumPy binary data (NPY) files](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#npy-format) ending with `.npy` in each of the `set.*` directories. Take an example, a system may contain the following files: + ``` type.raw type_map.raw @@ -18,16 +19,19 @@ set.001/force.npy ``` We assume that the atom types do not change in all frames. It is provided by `type.raw`, which has one line with the types of atoms written one by one. The atom types should be integers. For example the `type.raw` of a system that has 2 atoms with 0 and 1: + ```bash $ cat type.raw 0 1 ``` Sometimes one needs to map the integer types to atom names. The mapping can be given by the file `type_map.raw`. For example + ```bash $ cat type_map.raw O H ``` + The type `0` is named by `"O"` and the type `1` is named by `"H"`. For training models with descriptor `se_atten`, a [new system format](../model/train-se-atten.md#data-format) is supported to put together the frame-sparse systems with the same atom number. @@ -35,9 +39,11 @@ For training models with descriptor `se_atten`, a [new system format](../model/t ## HDF5 format A system with the HDF5 format has the same structure as the Numpy format, but in an HDF5 file, a system is organized as an [HDF5 group](https://docs.h5py.org/en/stable/high/group.html). The file name of a Numpy file is the key in an HDF5 file, and the data is the value of the key. One needs to use `#` in a DP path to divide the path to the HDF5 file and the HDF5 path: + ``` /path/to/data.hdf5#/H2O ``` + Here, `/path/to/data.hdf5` is the file path and `/H2O` is the HDF5 path. All HDF5 paths should start with `/`. There should be some data in the `H2O` group, such as `/H2O/type.raw` and `/H2O/set.000/force.npy`. An HDF5 file with a large number of systems has better performance than multiple NumPy files in a large cluster. @@ -47,15 +53,18 @@ An HDF5 file with a large number of systems has better performance than multiple A raw file is a plain text file with each information item written in one file and one frame written on one line. **It's not directly supported**, but we provide a tool to convert them. In the raw format, the property of one frame is provided per line, ending with `.raw`. Take an example, the default files that provide box, coordinate, force, energy and virial are `box.raw`, `coord.raw`, `force.raw`, `energy.raw` and `virial.raw`, respectively. Here is an example of `force.raw`: + ```bash $ cat force.raw -0.724 2.039 -0.951 0.841 -0.464 0.363 6.737 1.554 -5.587 -2.803 0.062 2.222 -1.968 -0.163 1.020 -0.225 -0.789 0.343 ``` + This `force.raw` contains 3 frames with each frame having the forces of 2 atoms, thus it has 3 lines and 6 columns. Each line provides all the 3 force components of 2 atoms in 1 frame. The first three numbers are the 3 force components of the first atom, while the second three numbers are the 3 force components of the second atom. Other files are organized similarly. The number of lines of all raw files should be identical. One can use the script `$deepmd_source_dir/data/raw/raw_to_set.sh` to convert the prepared raw files to the NumPy format. For example, if we have a raw file that contains 6000 frames, + ```bash $ ls box.raw coord.raw energy.raw force.raw type.raw virial.raw @@ -69,4 +78,5 @@ making set 2 ... $ ls box.raw coord.raw energy.raw force.raw set.000 set.001 set.002 type.raw virial.raw ``` + It generates three sets `set.000`, `set.001` and `set.002`, with each set containing 2000 frames in the Numpy format. diff --git a/doc/data/dpdata.md b/doc/data/dpdata.md index 9b1a27ce82..63fe4f39c3 100644 --- a/doc/data/dpdata.md +++ b/doc/data/dpdata.md @@ -3,16 +3,19 @@ One can use a convenient tool [`dpdata`](https://github.com/deepmodeling/dpdata) to convert data directly from the output of first principle packages to the DeePMD-kit format. To install one can execute + ```bash pip install dpdata ``` An example of converting data [VASP](https://www.vasp.at/) data in `OUTCAR` format to DeePMD-kit data can be found at + ``` $deepmd_source_dir/examples/data_conv ``` Switch to that directory, then one can convert data by using the following python script + ```python import dpdata diff --git a/doc/data/index.md b/doc/data/index.md deleted file mode 100644 index 838265427b..0000000000 --- a/doc/data/index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Data - -In this section, we will introduce how to convert the DFT-labeled data into the data format used by DeePMD-kit. - -The DeePMD-kit organizes data in `systems`. Each `system` is composed of a number of `frames`. One may roughly view a `frame` as a snapshot of an MD trajectory, but it does not necessarily come from an MD simulation. A `frame` records the coordinates and types of atoms, cell vectors if the periodic boundary condition is assumed, energy, atomic forces and virials. It is noted that the `frames` in one `system` share the same number of atoms with the same type. - -- [System](system.md) -- [Formats of a system](data-conv.md) -- [Prepare data with dpdata](dpdata.md) diff --git a/doc/data/system.md b/doc/data/system.md index 0ecd0e9119..6ca044f1c9 100644 --- a/doc/data/system.md +++ b/doc/data/system.md @@ -4,44 +4,44 @@ DeePMD-kit takes a **system** as the data structure. A snapshot of a system is c A system should contain system properties, input frame properties, and labeled frame properties. The system property contains the following property: -ID | Property | Raw file | Required/Optional | Shape | Description --------- | ---------------------- | ------------ | -------------------- | ----------------------- | ----------- -type | Atom type indexes | type.raw | Required | Natoms | Integers that start with 0. If both the training parameter {ref}`type_map ` is set and `type_map.raw` is provided, the system atom type should be mapped to `type_map.raw` in `type.raw` and will be mapped to the model atom type when training; otherwise, the system atom type will be always mapped to the model atom type (whether {ref}`type_map ` is set or not) -type_map | Atom type names | type_map.raw | Optional | Ntypes | Atom names that map to atom type, which is unnecessary to be contained in the periodic table. Only works when the training parameter {ref}`type_map ` is set -nopbc | Non-periodic system | nopbc | Optional | 1 | If True, this system is non-periodic; otherwise it's periodic +| ID | Property | Raw file | Required/Optional | Shape | Description | +| -------- | ------------------- | ------------ | ----------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| type | Atom type indexes | type.raw | Required | Natoms | Integers that start with 0. If both the training parameter {ref}`type_map ` is set and `type_map.raw` is provided, the system atom type should be mapped to `type_map.raw` in `type.raw` and will be mapped to the model atom type when training; otherwise, the system atom type will be always mapped to the model atom type (whether {ref}`type_map ` is set or not) | +| type_map | Atom type names | type_map.raw | Optional | Ntypes | Atom names that map to atom type, which is unnecessary to be contained in the periodic table. Only works when the training parameter {ref}`type_map ` is set | +| nopbc | Non-periodic system | nopbc | Optional | 1 | If True, this system is non-periodic; otherwise it's periodic | The input frame properties contain the following property, the first axis of which is the number of frames: -ID | Property | Raw file | Unit | Required/Optional | Shape | Description --------- | ---------------------- | -------------- | ---- | -------------------- | ----------------------- | ----------- -coord | Atomic coordinates | coord.raw | Å | Required | Nframes \* Natoms \* 3 | -box | Boxes | box.raw | Å | Required if periodic | Nframes \* 3 \* 3 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` -fparam | Extra frame parameters | fparam.raw | Any | Optional | Nframes \* Any | -aparam | Extra atomic parameters | aparam.raw | Any | Optional | Nframes \* aparam \* Any | -numb_copy | Each frame is copied by the `numb_copy` (int) times | prob.raw | 1 | Optional | Nframes | Integer; Default is 1 for all frames +| ID | Property | Raw file | Unit | Required/Optional | Shape | Description | +| --------- | --------------------------------------------------- | ---------- | ---- | -------------------- | ------------------------ | ----------------------------------------- | +| coord | Atomic coordinates | coord.raw | Å | Required | Nframes \* Natoms \* 3 | +| box | Boxes | box.raw | Å | Required if periodic | Nframes \* 3 \* 3 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` | +| fparam | Extra frame parameters | fparam.raw | Any | Optional | Nframes \* Any | +| aparam | Extra atomic parameters | aparam.raw | Any | Optional | Nframes \* aparam \* Any | +| numb_copy | Each frame is copied by the `numb_copy` (int) times | prob.raw | 1 | Optional | Nframes | Integer; Default is 1 for all frames | The labeled frame properties are listed as follows, all of which will be used for training if and only if the loss function contains such property: -ID | Property | Raw file | Unit | Shape | Description ----------------------- | ----------------------- | ------------------------ | ---- | ----------------------- | ----------- -energy | Frame energies | energy.raw | eV | Nframes | -force | Atomic forces | force.raw | eV/Å | Nframes \* Natoms \* 3 | -virial | Frame virial | virial.raw | eV | Nframes \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` -atom_ener | Atomic energies | atom_ener.raw | eV | Nframes \* Natoms | -atom_pref | Weights of atomic forces | atom_pref.raw | 1 | Nframes \* Natoms | -dipole | Frame dipole | dipole.raw | Any | Nframes \* 3 | -atomic_dipole | Atomic dipole | atomic_dipole.raw | Any | Nframes \* Natoms \* 3 | -polarizability | Frame polarizability | polarizability.raw | Any | Nframes \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` -atomic_polarizability | Atomic polarizability | atomic_polarizability.raw| Any | Nframes \* Natoms \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` -drdq | Partial derivative of atomic coordinates with respect to generalized coordinates | drdq.raw | 1 | Nframes \* Natoms \* 3 \* Ngen_coords | +| ID | Property | Raw file | Unit | Shape | Description | +| --------------------- | -------------------------------------------------------------------------------- | ------------------------- | ---- | ------------------------------------- | ----------------------------------------- | +| energy | Frame energies | energy.raw | eV | Nframes | +| force | Atomic forces | force.raw | eV/Å | Nframes \* Natoms \* 3 | +| virial | Frame virial | virial.raw | eV | Nframes \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` | +| atom_ener | Atomic energies | atom_ener.raw | eV | Nframes \* Natoms | +| atom_pref | Weights of atomic forces | atom_pref.raw | 1 | Nframes \* Natoms | +| dipole | Frame dipole | dipole.raw | Any | Nframes \* 3 | +| atomic_dipole | Atomic dipole | atomic_dipole.raw | Any | Nframes \* Natoms \* 3 | +| polarizability | Frame polarizability | polarizability.raw | Any | Nframes \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` | +| atomic_polarizability | Atomic polarizability | atomic_polarizability.raw | Any | Nframes \* Natoms \* 9 | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` | +| drdq | Partial derivative of atomic coordinates with respect to generalized coordinates | drdq.raw | 1 | Nframes \* Natoms \* 3 \* Ngen_coords | In general, we always use the following convention of units: -Property | Unit ----------| ---- -Time | ps -Length | Å -Energy | eV -Force | eV/Å -Virial | eV -Pressure | Bar +| Property | Unit | +| -------- | ---- | +| Time | ps | +| Length | Å | +| Energy | eV | +| Force | eV/Å | +| Virial | eV | +| Pressure | Bar | diff --git a/doc/development/cmake.md b/doc/development/cmake.md index 3073327856..f8508d8992 100644 --- a/doc/development/cmake.md +++ b/doc/development/cmake.md @@ -9,11 +9,13 @@ find_package(DeePMD REQUIRED) Note that you may need to add ${deepmd_root} to the cached CMake variable `CMAKE_PREFIX_PATH`. To link against the C interface library, using + ```cmake target_link_libraries(some_library PRIVATE DeePMD::deepmd_c) ``` To link against the C++ interface library, using + ```cmake target_link_libraries(some_library PRIVATE DeePMD::deepmd_cc) ``` diff --git a/doc/development/coding-conventions.rst b/doc/development/coding-conventions.rst index ad4203ee4f..137b0d0d51 100644 --- a/doc/development/coding-conventions.rst +++ b/doc/development/coding-conventions.rst @@ -30,7 +30,7 @@ Rules ----- The code must be compatible with the oldest supported version of python -which is 3.7 +which is 3.8. The project follows the generic coding conventions as specified in the `Style Guide for Python Code`_, `Docstring diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md new file mode 100644 index 0000000000..35d81b364a --- /dev/null +++ b/doc/development/create-a-model-pt.md @@ -0,0 +1,163 @@ +# Create a model in PyTorch + +If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section. + +To incorporate your custom model you'll need to: + +1. Register and implement new components (e.g. descriptor) in a Python file. +2. Register new arguments for user inputs. +3. Package new codes into a Python package. +4. Test new models. + +## Design a new component + +With DeePMD-kit v3, we have expanded support to include two additional backends alongside TensorFlow: the PyTorch backend and the framework-independent backend (dpmodel). The PyTorch backend adopts a highly modularized design to provide flexibility and extensibility. It ensures a consistent experience for both training and inference, aligning with the TensorFlow backend. + +The framework-independent backend is implemented in pure NumPy, serving as a reference backend to ensure consistency in tests. Its design pattern closely parallels that of the PyTorch backend. + +### New descriptors + +When creating a new descriptor, it is essential to inherit from both the {py:class}`deepmd.pt.model.descriptor.base_descriptor.BaseDescriptor` class and the {py:class}`torch.nn.Module` class. Abstract methods, including {py:class}`deepmd.pt.model.descriptor.base_descriptor.BaseDescriptor.forward`, must be implemented, while others remain optional. It is crucial to adhere to the original method arguments without any modifications. Once the implementation is complete, the next step involves registering the component with a designated key: + +```py +from deepmd.pt.model.descriptor.base_descriptor import ( + BaseDescriptor, +) + + +@BaseDescriptor.register("some_descrpt") +class SomeDescript(BaseDescriptor, torch.nn.Module): + def __init__(self, arg1: bool, arg2: float) -> None: + pass + + def get_rcut(self) -> float: + pass + + def get_nnei(self) -> int: + pass + + def get_ntypes(self) -> int: + pass + + def get_dim_out(self) -> int: + pass + + def get_dim_emb(self) -> int: + pass + + def mixed_types(self) -> bool: + pass + + def forward( + self, + coord_ext: torch.Tensor, + atype_ext: torch.Tensor, + nlist: torch.Tensor, + mapping: Optional[torch.Tensor] = None, + ): + pass + + def serialize(self) -> dict: + pass + + def deserialize(cls, data: dict) -> "SomeDescript": + pass + + def update_sel(cls, global_jdata: dict, local_jdata: dict): + pass +``` + +The serialize and deserialize methods are important for cross-backend model conversion. + +### New fitting nets + +In many instances, there is no requirement to create a new fitting net. For fitting user-defined scalar properties, the {py:class}`deepmd.pt.model.task.ener.InvarFitting` class can be utilized. However, if there is a need for a new fitting net, one should inherit from both the {py:class}`deepmd.pt.model.task.base_fitting.BaseFitting` class and the {py:class}`torch.nn.Module` class. Alternatively, for a more straightforward approach, inheritance from the {py:class}`deepmd.pt.model.task.fitting.GeneralFitting` class is also an option. + +```py +from deepmd.pt.model.task.fitting import ( + GeneralFitting, +) +from deepmd.dpmodel import ( + FittingOutputDef, + fitting_check_output, +) + + +@GeneralFitting.register("some_fitting") +@fitting_check_output +class SomeFittingNet(GeneralFitting): + def __init__(self, arg1: bool, arg2: float) -> None: + pass + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + pass + + def output_def(self) -> FittingOutputDef: + pass +``` + +### New models + +The PyTorch backend's model architecture is meticulously structured with multiple layers of abstraction, ensuring a high degree of flexibility. Typically, the process commences with an atomic model responsible for atom-wise property calculations. This atomic model inherits from both the {py:class}`deepmd.pt.model.atomic_model.base_atomic_model.BaseAtomicModel` class and the {py:class}`torch.nn.Module` class. + +Subsequently, the `AtomicModel` is encapsulated using the `make_model(AtomicModel)` function, which leverages the `deepmd.pt.model.model.make_model.make_model` function. The purpose of the `make_model` wrapper is to facilitate the translation between atomic property predictions and the extended property predictions and differentiation , e.g. the reduction of atomic energy contribution and the autodiff for calculating the forces and virial. The developers usually need to implement an `AtomicModel` not a `Model`. + +```py +from deepmd.pt.model.atomic_model.base_atomic_model import ( + BaseAtomicModel, +) + + +class SomeAtomicModel(BaseAtomicModel, torch.nn.Module): + def __init__(self, arg1: bool, arg2: float) -> None: + pass + + def forward_atomic(self): + pass +``` + +## Register new arguments + +To let someone uses your new component in their input file, you need to create a new method that returns some `Argument` of your new component, and then register new arguments. For example, the code below + +```py +from typing import List + +from dargs import Argument +from deepmd.utils.argcheck import descrpt_args_plugin + + +@descrpt_args_plugin.register("some_descrpt") +def descrpt_some_args() -> List[Argument]: + return [ + Argument("arg1", bool, optional=False, doc="balabala"), + Argument("arg2", float, optional=True, default=6.0, doc="haha"), + ] +``` + +allows one to use your new descriptor as below: + +```json +"descriptor" :{ + "type": "some_descrpt", + "arg1": true, + "arg2": 6.0 +} +``` + +The arguments here should be consistent with the class arguments of your new component. + +## Unit tests + +When transferring features from another backend to the PyTorch backend, it is essential to include a regression test in `/source/tests/consistent` to validate the consistency of the PyTorch backend with other backends. Presently, the regression tests cover self-consistency and cross-backend consistency between TensorFlow, PyTorch, and DP (Numpy) through the serialization/deserialization technique. + +During the development of new components within the PyTorch backend, it is necessary to provide a DP (Numpy) implementation and incorporate corresponding regression tests. For PyTorch components, developers are also required to include a unit test using `torch.jit`. diff --git a/doc/development/create-a-model.md b/doc/development/create-a-model-tf.md similarity index 83% rename from doc/development/create-a-model.md rename to doc/development/create-a-model-tf.md index 6634403021..b39313a8d3 100644 --- a/doc/development/create-a-model.md +++ b/doc/development/create-a-model-tf.md @@ -1,8 +1,9 @@ -# Create a model +# Create a model in TensorFlow If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section. To incorporate your custom model you'll need to: + 1. Register and implement new components (e.g. descriptor) in a Python file. You may also want to register new TensorFlow OPs if necessary. 2. Register new arguments for user inputs. 3. Package new codes into a Python package. @@ -10,11 +11,12 @@ To incorporate your custom model you'll need to: ## Design a new component -When creating a new component, take descriptor as the example, you should inherit {py:class}`deepmd.descriptor.descriptor.Descriptor` class and override several methods. Abstract methods such as {py:class}`deepmd.descriptor.descriptor.Descriptor.build` must be implemented and others are not. You should keep arguments of these methods unchanged. +When creating a new component, take descriptor as the example, one should inherit from the {py:class}`deepmd.tf.descriptor.descriptor.Descriptor` class and override several methods. Abstract methods such as {py:class}`deepmd.tf.descriptor.descriptor.Descriptor.build` must be implemented and others are not. You should keep arguments of these methods unchanged. After implementation, you need to register the component with a key: + ```py -from deepmd.descriptor import Descriptor +from deepmd.tf.descriptor import Descriptor @Descriptor.register("some_descrpt") diff --git a/doc/development/type-embedding.md b/doc/development/type-embedding.md index 5919d6c944..10eeed6ee9 100644 --- a/doc/development/type-embedding.md +++ b/doc/development/type-embedding.md @@ -1,11 +1,15 @@ # Atom Type Embedding + ## Overview + Here is an overview of the DeePMD-kit algorithm. Given a specific centric atom, we can obtain the matrix describing its local environment, named $\mathcal R$. It consists of the distance between the centric atom and its neighbors, as well as a direction vector. We can embed each distance into a vector of $M_1$ dimension by an `embedding net`, so the environment matrix $\mathcal R$ can be embedded into matrix $\mathcal G$. We can thus extract a descriptor vector (of $M_1 \times M_2$ dim) of the centric atom from the $\mathcal G$ by some matrix multiplication, and put the descriptor into `fitting net` to get the predicted energy $E$. The vanilla version of DeePMD-kit builds `embedding net` and `fitting net` relying on the atom type, resulting in $O(N)$ memory usage. After applying atom type embedding, in DeePMD-kit v2.0, we can share one `embedding net` and one `fitting net` in total, which reduces training complexity largely. ## Preliminary + In the following chart, you can find the meaning of symbols used to clarify the atom-type embedding algorithm. + $i$: Type of centric atom $j$: Type of neighbor atom @@ -40,8 +44,10 @@ $$E = F( [ \text{Multi}( \mathcal G( [s_{ij}, A(j)] ) ), A(j)] )$$ The difference between the two variants above is whether using the information of centric atom when generating the descriptor. Users can choose by modifying the `type_one_side` hyper-parameter in the input JSON file. ## How to use + A detailed introduction can be found at [`se_e2_a_tebd`](../model/train-se-e2-a-tebd.md). Looking for a fast start-up, you can simply add a `type_embedding` section in the input JSON file as displayed in the following, and the algorithm will adopt the atom type embedding algorithm automatically. An example of `type_embedding` is like + ```json "type_embedding":{ "neuron": [2, 4, 8], @@ -50,19 +56,26 @@ An example of `type_embedding` is like } ``` - ## Code Modification + Atom-type embedding can be applied to varied `embedding net` and `fitting net`, as a result, we build a class `TypeEmbedNet` to support this free combination. In the following, we will go through the execution process of the code to explain our code modification. ### trainer (train/trainer.py) + In trainer.py, it will parse the parameter from the input JSON file. If a `type_embedding` section is detected, it will build a `TypeEmbedNet`, which will be later input in the `model`. `model` will be built in the function `_build_network`. + ### model (model/ener.py) + When building the operation graph of the `model` in `model.build`. If a `TypeEmbedNet` is detected, it will build the operation graph of `type embed net`, `embedding net` and `fitting net` by order. The building process of `type embed net` can be found in `TypeEmbedNet.build`, which output the type embedding vector of each atom type (of [$\text{ntypes} \times \text{nchanl}$] dimensions). We then save the type embedding vector into `input_dict`, so that they can be fetched later in `embedding net` and `fitting net`. -### embedding net (descriptor/se*.py) + +### embedding net (descriptor/se\*.py) + In `embedding net`, we shall take local environment $\mathcal R$ as input and output matrix $\mathcal G$. Functions called in this process by the order is + ``` build -> _pass_filter -> _filter -> _filter_lower ``` + `_pass_filter`: It will first detect whether an atom type embedding exists, if so, it will apply atom type embedding algorithm and doesn't divide the input by type. `_filter`: It will call `_filter_lower` function to obtain the result of matrix multiplication ($\mathcal G^T\cdot \mathcal R$), do further multiplication involved in $\text{Multi}(\cdot)$, and finally output the result of descriptor vector of $M_1 \times M_2$ dim. @@ -70,8 +83,8 @@ build -> _pass_filter -> _filter -> _filter_lower `_filter_lower`: The main function handling input modification. If type embedding exists, it will call `_concat_type_embedding` function to concat the first column of input $\mathcal R$ (the column of $s_{ij}$) with the atom type embedding information. It will decide whether to use the atom type embedding vector of the centric atom according to the value of `type_one_side` (if set **True**, then we only use the vector of the neighbor atom). The modified input will be put into the `fitting net` to get $\mathcal G$ for further matrix multiplication stage. ### fitting net (fit/ener.py) -In `fitting net`, it takes the descriptor vector as input, whose dimension is [natoms, $M_1\times M_2$]. Because we need to involve information on the centric atom in this step, we need to generate a matrix named `atype_embed` (of dim [natoms, nchanl]), in which each row is the type embedding vector of the specific centric atom. The input is sorted by type of centric atom, we also know the number of a particular atom type (stored in `natoms[2+i]`), thus we get the type vector of the centric atom. In the build phase of the fitting net, it will check whether type embedding exists in `input_dict` and fetch them. After that, call `embed_atom_type` function to look up the embedding vector for the type vector of the centric atom to obtain `atype_embed`, and concat input with it ([input, atype_embed]). The modified input goes through `fitting` net` to get predicted energy. +In `fitting net`, it takes the descriptor vector as input, whose dimension is [natoms, $M_1\times M_2$]. Because we need to involve information on the centric atom in this step, we need to generate a matrix named `atype_embed` (of dim [natoms, nchanl]), in which each row is the type embedding vector of the specific centric atom. The input is sorted by type of centric atom, we also know the number of a particular atom type (stored in `natoms[2+i]`), thus we get the type vector of the centric atom. In the build phase of the fitting net, it will check whether type embedding exists in `input_dict` and fetch them. After that, call `embed_atom_type` function to look up the embedding vector for the type vector of the centric atom to obtain `atype_embed`, and concat input with it ([input, atype_embed]). The modified input goes through `fitting` net` to get predicted energy. :::{note} You can't apply the compression method while using atom-type embedding. diff --git a/doc/environment.yml b/doc/environment.yml index 97060c3004..85d5a97c5b 100644 --- a/doc/environment.yml +++ b/doc/environment.yml @@ -7,7 +7,7 @@ dependencies: - python=3.9 - pip>=20.1 - pip: - - ..[docs,cpu] - - "exhale @ https://github.com/svenevs/exhale/archive/2759a394268307b88f5440487ae0920ee4ebf81e.zip" - # https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/309 - - docutils!=0.18.*,!=0.19.* + - ..[docs,cpu,torch] + - "exhale @ https://github.com/svenevs/exhale/archive/2759a394268307b88f5440487ae0920ee4ebf81e.zip" + # https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/309 + - docutils!=0.18.*,!=0.19.* diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md index 7394f77143..01cc9fa3a8 100644 --- a/doc/freeze/compress.md +++ b/doc/freeze/compress.md @@ -1,4 +1,8 @@ -# Compress a model +# Compress a model {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: ## Theory @@ -7,37 +11,46 @@ The compression of the DP model uses three techniques, tabulated inference, oper For better performance, the NN inference can be replaced by tabulated function evaluations if the input of the NN is of dimension one. The idea is to approximate the output of the NN by a piece-wise polynomial fitting. The input domain (a compact domain in $\mathbb R$) is divided into $L_c$ equally spaced intervals, in which we apply a fifth-order polynomial $g^l_m(x)$ approximation of the $m$-th output component of the NN function: + ```math g^l_m(x) = a^l_m x^5 + b^l_m x^4 + c^l_m x^3 + d^l_m x^2 + e^l_m x + f^l_m,\quad x \in [x_l, x_{l+1}), ``` + where $l=1,2,\dots,L_c$ is the index of the intervals, $x_1, \dots, x_{L_c}, x_{L_c+1}$ are the endpoints of the intervals, and $a^l_m$, $b^l_m$, $c^l_m$, $d^l_m$, $e^l_m$, and $f^l_m$ are the fitting parameters. The fitting parameters can be computed by the equations below: + ```math a^l_m = \frac{1}{2\Delta x_l^5}[12h_{m,l}-6(y'_{m,l+1}+y'_{m,l})\Delta x_l + (y''_{m,l+1}-y''_{m,l})\Delta x_l^2], ``` + ```math b^l_m = \frac{1}{2\Delta x_l^4}[-30h_{m,l} +(14y'_{m,l+1}+16y'_{m,l})\Delta x_l + (-2y''_{m,l+1}+3y''_{m,l})\Delta x_l^2], ``` + ```math c^l_m = \frac{1}{2\Delta x_l^3}[20h_{m,l}-(8y'_{m,l+1}+12y'_{m,l})\Delta x_l + (y''_{m,l+1}-3y''_{m,l})\Delta x_l^2], ``` + ```math d^l_m = \frac{1}{2}y''_{m,l}, ``` + ```math e^l_m = y_{m,l}', ``` + ```math f^l_m = y_{m,l}, ``` + where $\Delta x_l=x_{l+1}-x_l$ denotes the size of the interval. $h_{m,l}=y_{m,l+1}-y_{m,l}$. $y_{m,l} = y_m(x_l)$, $y'_{m,l} = y'_m(x_l)$ and $y''_{m,l} = y''_m(x_l)$ are the value, the first-order derivative, and the second-order derivative of the $m$-th component of the target NN function at the interval point $x_l$, respectively. The first and second-order derivatives are easily calculated by the back-propagation of the NN functions. -In the standard DP model inference, taking the [two-body embedding descriptor](../model/train-se-e2-a.md) as an example, the matrix product $(\mathcal G^i)^T \mathcal R$ requires the transfer of the tensor $\mathcal G^i$ between the register and the host/device memories, which usually becomes the bottle-neck of the computation due to the relatively small memory bandwidth of the GPUs. +In the standard DP model inference, taking the [two-body embedding descriptor](../model/train-se-e2-a.md) as an example, the matrix product $(\mathcal G^i)^T \mathcal R$ requires the transfer of the tensor $\mathcal G^i$ between the register and the host/device memories, which usually becomes the bottle-neck of the computation due to the relatively small memory bandwidth of the GPUs. The compressed DP model merges the matrix multiplication $(\mathcal G^i)^T \mathcal R$ with the tabulated inference step. More specifically, once one column of the $(\mathcal G^i)^T$ is evaluated, it is immediately multiplied with one row of the environment matrix in the register, and the outer product is deposited to the result of $(\mathcal G^i)^T \mathcal R$. -By the operator merging technique, the allocation of $\mathcal G^i$ and the memory movement between register and host/device memories is avoided. +By the operator merging technique, the allocation of $\mathcal G^i$ and the memory movement between register and host/device memories is avoided. The operator merging of the three-body embedding can be derived analogously. The first dimension, $N_c$, of the environment ($\mathcal R^i$) and embedding ($\mathcal G^i$) matrices is the expected maximum number of neighbors. @@ -45,19 +58,24 @@ If the number of neighbors of an atom is smaller than $N_c$, the corresponding p In practice, if the real number of neighbors is significantly smaller than $N_c$, a notable operation is spent on the multiplication of padding zeros. In the compressed DP model, the number of neighbors is precisely indexed at the tabulated inference stage, further saving computational costs.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions Once the frozen model is obtained from DeePMD-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way: + ```bash dp compress -i graph.pb -o graph-compress.pb ``` + where `-i` gives the original frozen model, `-o` gives the compressed model. Several other command line options can be passed to `dp compress`, which can be checked with + ```bash $ dp compress --help ``` + An explanation will be provided + ``` usage: dp compress [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}] [-l LOG_PATH] [-m {master,collect,workers}] [-i INPUT] [-o OUTPUT] @@ -114,11 +132,12 @@ optional arguments: The training script of the input frozen model (default: None) ``` + **Parameter explanation** Model compression, which includes tabulating the embedding net. -The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. For model descriptor with `se_e2_a` type, the first sub-table takes the stride(parameter) as its uniform stride, while the second sub-table takes 10 * stride as its uniform stride; For model descriptor with `se_e3` type, the first sub-table takes 10 * stride as it's uniform stride, while the second sub-table takes 100 * stride as it's uniform stride. -The range of the first table is automatically detected by DeePMD-kit, while the second table ranges from the first table's upper boundary(upper) to the extrapolate(parameter) * upper. +The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. For model descriptor with `se_e2_a` type, the first sub-table takes the stride(parameter) as its uniform stride, while the second sub-table takes 10 _ stride as its uniform stride; For model descriptor with `se_e3` type, the first sub-table takes 10 _ stride as it's uniform stride, while the second sub-table takes 100 _ stride as it's uniform stride. +The range of the first table is automatically detected by DeePMD-kit, while the second table ranges from the first table's upper boundary(upper) to the extrapolate(parameter) _ upper. Finally, we added a check frequency parameter. It indicates how often the program checks for overflow(if the input environment matrix overflows the first or second table range) during the MD inference. **Justification of model compression** @@ -127,14 +146,14 @@ Model compression, with little loss of accuracy, can greatly speed up MD inferen **Acceptable original model version** -The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: ```dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb```) +The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: `dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb`) **Acceptable descriptor type** Descriptors with `se_e2_a`, `se_e3`, `se_e2_r` and `se_atten_v2` types are supported by the model compression feature. `Hybrid` mixed with the above descriptors is also supported. - **Available activation functions for descriptor:** + - tanh - gelu - relu diff --git a/doc/freeze/freeze.md b/doc/freeze/freeze.md index ba0cd44606..b80928a119 100644 --- a/doc/freeze/freeze.md +++ b/doc/freeze/freeze.md @@ -1,14 +1,35 @@ # Freeze a model The trained neural network is extracted from a checkpoint and dumped into a protobuf(.pb) file. This process is called "freezing" a model. The idea and part of our code are from [Morgan](https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc). To freeze a model, typically one does + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + ```bash -$ dp freeze -o graph.pb +$ dp freeze -o model.pb ``` -in the folder where the model is trained. The output model is called `graph.pb`. + +in the folder where the model is trained. The output model is called `model.pb`. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```bash +$ dp --pt freeze -o model.pth +``` + +in the folder where the model is trained. The output model is called `model.pth`. + +::: + +:::: In [multi-task mode](../train/multi-task-training.md): + - This process will in default output several models, each of which contains the common descriptor and -one of the user-defined fitting nets in {ref}`fitting_net_dict `, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`. -Those frozen models are exactly the same as single-task output with fitting net `fitting_key`. + one of the user-defined fitting nets in {ref}`fitting_net_dict `, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`. + Those frozen models are exactly the same as single-task output with fitting net `fitting_key`. - If you add `--united-model` option in this situation, -the total multi-task model will be frozen into one unit `graph.pb`, which is mainly for multi-task initialization and can not be used directly for inference. + the total multi-task model will be frozen into one unit `graph.pb`, which is mainly for multi-task initialization and can not be used directly for inference. diff --git a/doc/freeze/index.md b/doc/freeze/index.md deleted file mode 100644 index 0bc3664144..0000000000 --- a/doc/freeze/index.md +++ /dev/null @@ -1,4 +0,0 @@ -# Freeze and Compress - -- [Freeze a model](freeze.md) -- [Compress a model](compress.md) diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb index ec939265fd..1c53665b7d 100644 --- a/doc/getting-started/quick_start.ipynb +++ b/doc/getting-started/quick_start.ipynb @@ -239,7 +239,7 @@ "id": "a999f41b-e343-4dc2-8499-84fee6e52221", "metadata": {}, "source": [ - "The DeePMD-kit adopts a compressed data format. All training data should first be converted into this format and can then be used by DeePMD-kit. The data format is explained in detail in the DeePMD-kit manual that can be found in [the DeePMD-kit Data Introduction](../data/index.md)." + "The DeePMD-kit adopts a compressed data format. All training data should first be converted into this format and can then be used by DeePMD-kit. The data format is explained in detail in the DeePMD-kit manual that can be found in [the DeePMD-kit Data Introduction](../data/system.md)." ] }, { @@ -1001,7 +1001,7 @@ "WARNING:tensorflow:From /opt/mamba/lib/python3.10/site-packages/deepmd/utils/batch_size.py:61: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use `tf.config.list_physical_devices('GPU')` instead.\n", - "WARNING:deepmd.utils.batch_size:You can use the environment variable DP_INFER_BATCH_SIZE tocontrol the inference batch size (nframes * natoms). The default value is 1024.\n" + "WARNING:deepmd.tf.utils.batch_size:You can use the environment variable DP_INFER_BATCH_SIZE tocontrol the inference batch size (nframes * natoms). The default value is 1024.\n" ] } ], diff --git a/doc/index.rst b/doc/index.rst index b60430b566..7bff8d3957 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,7 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r :numbered: :caption: Advanced + backend install/index data/index model/index @@ -63,7 +64,8 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r :caption: Developer Guide development/cmake - development/create-a-model + development/create-a-model-tf + development/create-a-model-pt development/type-embedding development/coding-conventions development/cicd diff --git a/doc/inference/cxx.md b/doc/inference/cxx.md index 6188daba4c..58c74df068 100644 --- a/doc/inference/cxx.md +++ b/doc/inference/cxx.md @@ -1,6 +1,9 @@ # C/C++ interface + ## C++ interface + The C++ interface of DeePMD-kit is also available for the model interface, which is considered faster than the Python interface. An example `infer_water.cpp` is given below: + ```cpp #include "deepmd/DeepPot.h" @@ -14,14 +17,18 @@ int main(){ dp.compute (e, f, v, coord, atype, cell); } ``` + where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively. See {cpp:class}`deepmd::DeepPot` for details. You can compile `infer_water.cpp` using `gcc`: + ```sh gcc infer_water.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_cc -lstdc++ -ltensorflow_cc -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water ``` + and then run the program: + ```sh ./infer_water ``` @@ -31,6 +38,7 @@ and then run the program: Although C is harder to write, the C library will not be affected by different versions of C++ compilers. An example `infer_water.c` is given below: + ```cpp #include #include @@ -62,7 +70,7 @@ int main(){ free(v); free(ae); free(av); - free(dp); + DP_DeleteDeepPot(dp); } ``` @@ -71,10 +79,13 @@ where `e`, `f` and `v` are predicted energy, force and virial of the system, res See {cpp:func}`DP_DeepPotCompute` for details. You can compile `infer_water.c` using `gcc`: + ```sh gcc infer_water.c -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water ``` + and then run the program: + ```sh ./infer_water ``` @@ -103,10 +114,13 @@ Note that the feature of the header-only C++ library is still limited compared t See {cpp:class}`deepmd::hpp::DeepPot` for details. You can compile `infer_water_hpp.cpp` using `gcc`: + ```sh gcc infer_water_hpp.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water_hpp ``` + and then run the program: + ```sh ./infer_water_hpp ``` diff --git a/doc/inference/index.md b/doc/inference/index.md deleted file mode 100644 index fa0a747eb4..0000000000 --- a/doc/inference/index.md +++ /dev/null @@ -1,7 +0,0 @@ -# Inference - -Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details. - -- [Python interface](python.md) -- [C++ interface](cxx.md) -- [Node.js interface](nodejs.md) diff --git a/doc/inference/nodejs.md b/doc/inference/nodejs.md index 72bfa6f9d9..8d58881898 100644 --- a/doc/inference/nodejs.md +++ b/doc/inference/nodejs.md @@ -9,9 +9,9 @@ const deepmd = require("deepmd-kit"); const dp = new deepmd.DeepPot("graph.pb"); -const coord = [1., 0., 0., 0., 0., 1.5, 1., 0., 3.]; +const coord = [1, 0, 0, 0, 0, 1.5, 1, 0, 3]; const atype = [1, 0, 1]; -const cell = [10., 0., 0., 0., 10., 0., 0., 0., 10.]; +const cell = [10, 0, 0, 0, 10, 0, 0, 0, 10]; const v_coord = new deepmd.vectord(coord.length); const v_atype = new deepmd.vectori(atype.length); @@ -20,15 +20,21 @@ for (var i = 0; i < coord.length; i++) v_coord.set(i, coord[i]); for (var i = 0; i < atype.length; i++) v_atype.set(i, atype[i]); for (var i = 0; i < cell.length; i++) v_cell.set(i, cell[i]); -var energy = 0.0 +var energy = 0.0; var v_forces = new deepmd.vectord(); var v_virials = new deepmd.vectord(); energy = dp.compute(energy, v_forces, v_virials, v_coord, v_atype, v_cell); console.log("energy:", energy); -console.log("forces:", [...Array(v_forces.size()).keys()].map(i => v_forces.get(i))); -console.log("virials:", [...Array(v_virials.size()).keys()].map(i => v_virials.get(i))); +console.log( + "forces:", + [...Array(v_forces.size()).keys()].map((i) => v_forces.get(i)), +); +console.log( + "virials:", + [...Array(v_virials.size()).keys()].map((i) => v_virials.get(i)), +); ``` Energy, forces, and virials will be printed to the screen. diff --git a/doc/inference/python.md b/doc/inference/python.md index b5d3ca1efc..73faa2b329 100644 --- a/doc/inference/python.md +++ b/doc/inference/python.md @@ -1,6 +1,7 @@ # Python interface One may use the python interface of DeePMD-kit for model inference, an example is given as follows + ```python from deepmd.infer import DeepPot import numpy as np @@ -11,9 +12,11 @@ cell = np.diag(10 * np.ones(3)).reshape([1, -1]) atype = [1, 0, 1] e, f, v = dp.eval(coord, cell, atype) ``` + where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively. Furthermore, one can use the python interface to calculate model deviation. + ```python from deepmd.infer import calc_model_devi from deepmd.infer import DeepPot as DP @@ -26,9 +29,14 @@ graphs = [DP("graph.000.pb"), DP("graph.001.pb")] model_devi = calc_model_devi(coord, cell, atype, graphs) ``` -Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. Otherwise, tensorFlow will never release the memory and this may lead to an out-of-memory (OOM) error. +Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. +Otherwise, TensorFlow or PyTorch will never release the memory, and this may lead to an out-of-memory (OOM) error. + +## External neighbor list algorithm {{ tensorflow_icon }} -## External neighbor list algorithm +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: The native neighbor list algorithm of the DeePMD-kit is in $O(N^2)$ complexity ($N$ is the number of atoms). While this is not a problem for small systems that quantum methods can afford, the large systems for molecular dynamics have slow performance. diff --git a/doc/install/build-conda.md b/doc/install/build-conda.md index 41c9f90a6e..14dee5c263 100644 --- a/doc/install/build-conda.md +++ b/doc/install/build-conda.md @@ -1,5 +1,12 @@ # Building conda packages +::::{danger} +:::{deprecated} 3.0.0 +The official channel has been deprecated since 3.0.0. +Refer to [conda-forge documentation](https://conda-forge.org/docs/maintainer/adding_pkgs/) for how to contribute and build packages locally. +::: +:::: + One may want to keep both convenience and personalization of the DeePMD-kit. To achieve this goal, one can consider building conda packages. We provide building scripts in [deepmd-kit-recipes organization](https://github.com/deepmd-kit-recipes/). These building tools are driven by [conda-build](https://github.com/conda/conda-build) and [conda-smithy](https://github.com/conda-forge/conda-smithy). For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-feedstock`](https://github.com/deepmd-kit-recipes/lammps-feedstock/) repository and modify `recipe/build.sh`. `-D PKG_MPIIO=OFF` should be changed to `-D PKG_MPIIO=ON`. Then go to the main directory and execute @@ -9,6 +16,7 @@ For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-f ``` This requires that Docker has been installed. After the building, the packages will be generated in `build_artifacts/linux-64` and `build_artifacts/noarch`, and then one can install then executing + ```sh conda create -n deepmd lammps -c file:///path/to/build_artifacts -c https://conda.deepmodeling.com -c nvidia ``` diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md index f3cf52c1f5..bb68272ace 100644 --- a/doc/install/easy-install-dev.md +++ b/doc/install/easy-install-dev.md @@ -19,15 +19,24 @@ For CUDA 11.8 support, use the `devel_cu11` tag. Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`: ```sh -pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple +pip install -U --pre deepmd-kit[gpu,cu12,lmp,torch] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple ``` `cu12` and `lmp` are optional, which is the same as the stable version. -## Download pre-compiled C Library +## Download pre-compiled C Library {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip), or via a shell command: ```sh wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip && unzip libdeepmd_c-0-libdeepmd_c.tar.gz.zip ``` + +## Pre-release conda-forge packages + +Pre-release conda-forge packages are in `conda-forge/label/deepmd-kit_dev` or `conda-forge/label/deepmd-kit_rc` channels, other than the `conda-forge` channel. +See [conda-forge documentation](https://conda-forge.org/docs/maintainer/knowledge_base/#pre-release-builds) for more information. diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index 3bc1f4b944..0c56fdb0c5 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -6,6 +6,11 @@ After your easy installation, DeePMD-kit (`dp`) and LAMMPS (`lmp`) will be avail :::{note} Note: The off-line packages and conda packages require the [GNU C Library](https://www.gnu.org/software/libc/) 2.17 or above. The GPU version requires [compatible NVIDIA driver](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#minor-version-compatibility) to be installed in advance. It is possible to force conda to [override detection](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html#overriding-detected-packages) when installation, but these requirements are still necessary during runtime. +You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for more information. +::: + +:::{note} +Python 3.8 or above is required for Python interface. ::: - [Install off-line packages](#install-off-line-packages) @@ -13,73 +18,88 @@ Note: The off-line packages and conda packages require the [GNU C Library](https - [Install with docker](#install-with-docker) - [Install Python interface with pip](#install-python-interface-with-pip) - ## Install off-line packages -Both CPU and GPU version offline packages are available in [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases). -Some packages are splited into two files due to size limit of GitHub. One may merge them into one after downloading: +Both CPU and GPU version offline packages are available on [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases). + +Some packages are split into two files due to the size limit of GitHub. One may merge them into one after downloading: + ```bash -cat deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.0 deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.1 > deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh +cat deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.0 deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.1 > deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh ``` One may enable the environment using + ```bash conda activate /path/to/deepmd-kit ``` ## Install with conda -DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html) first. -### Official channel +DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section), [Miniconda](https://docs.conda.io/en/latest/miniconda.html), or [miniforge](https://conda-forge.org/download/) first. +You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for how to setup a conda environment. + +### conda-forge channel + +DeePMD-kit is available on the [conda-forge](https://conda-forge.org/) channel: + +```bash +conda create -n deepmd deepmd-kit lammps horovod -c conda-forge +``` + +The supported platforms include Linux x86-64, macOS x86-64, and macOS arm64. +Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages. + +### Official channel (deprecated) + +::::{danger} +:::{deprecated} 3.0.0 +The official channel has been deprecated since 3.0.0, due to the challenging work of building dependencies for [multiple backends](../backend.md). +Old packages will still be available at https://conda.deepmodeling.com. +Maintainers will build packages in the conda-forge organization together with other conda-forge members. +::: +:::: One may create an environment that contains the CPU version of DeePMD-kit and LAMMPS: + ```bash conda create -n deepmd deepmd-kit=*=*cpu libdeepmd=*=*cpu lammps -c https://conda.deepmodeling.com -c defaults ``` Or one may want to create a GPU environment containing [CUDA Toolkit](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver): + ```bash conda create -n deepmd deepmd-kit=*=*gpu libdeepmd=*=*gpu lammps cudatoolkit=11.6 horovod -c https://conda.deepmodeling.com -c defaults ``` + One could change the CUDA Toolkit version from `10.2` or `11.6`. -One may specify the DeePMD-kit version such as `2.1.1` using -```bash -conda create -n deepmd deepmd-kit=2.1.1=*cpu libdeepmd=2.1.1=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults -``` +One may specify the DeePMD-kit version such as `2.2.9` using -One may enable the environment using ```bash -conda activate deepmd +conda create -n deepmd deepmd-kit=2.2.9=*cpu libdeepmd=2.2.9=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults ``` -### conda-forge channel - -DeePMD-kit is also available on the [conda-forge](https://conda-forge.org/) channel: +One may enable the environment using ```bash -conda create -n deepmd deepmd-kit lammps horovod -c conda-forge +conda activate deepmd ``` -The supported platform includes Linux x86-64, macOS x86-64, and macOS arm64. -Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages. - ## Install with docker -A docker for installing the DeePMD-kit is available [here](https://github.com/orgs/deepmodeling/packages/container/package/deepmd-kit). + +A docker for installing the DeePMD-kit is available [here](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit). To pull the CPU version: + ```bash -docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cpu +docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cpu ``` To pull the GPU version: -```bash -docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cuda11.6_gpu -``` -To pull the ROCm version: ```bash -docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021 +docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cuda12.0_gpu ``` ## Install Python interface with pip @@ -87,7 +107,7 @@ docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021 If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported: ```bash -pip install deepmd-kit[gpu,cu12] +pip install deepmd-kit[gpu,cu12,torch] ``` `cu12` is required only when CUDA Toolkit and cuDNN were not installed. @@ -95,24 +115,29 @@ pip install deepmd-kit[gpu,cu12] To install the package built against CUDA 11.8, use ```bash +pip install torch --index-url https://download.pytorch.org/whl/cu118 pip install deepmd-kit-cu11[gpu,cu11] ``` Or install the CPU version without CUDA supported: + ```bash +pip install torch --index-url https://download.pytorch.org/whl/cpu pip install deepmd-kit[cpu] ``` -[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras: +[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras: + ```bash -pip install deepmd-kit[gpu,cu12,lmp,ipi] +pip install deepmd-kit[gpu,cu12,torch,lmp,ipi] ``` + MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.) It is suggested to install the package into an isolated environment. The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64. -A specific version of TensorFlow which is compatible with DeePMD-kit will be also installed. +A specific version of TensorFlow and PyTorch which is compatible with DeePMD-kit will be also installed. :::{Warning} -If your platform is not supported, or want to build against the installed TensorFlow, or want to enable ROCM support, please [build from source](install-from-source.md). +If your platform is not supported, or you want to build against the installed TensorFlow, or you want to enable ROCM support, please [build from source](install-from-source.md). ::: diff --git a/doc/install/index.md b/doc/install/index.md deleted file mode 100644 index 8428255f5a..0000000000 --- a/doc/install/index.md +++ /dev/null @@ -1,11 +0,0 @@ -# Installation - -- [Easy install](easy-install.md) -- [Install from source code](install-from-source.md) -- [Install from pre-compiled C library](doc/install/install-from-c-library.md) -- [Install LAMMPS](install-lammps.md) -- [Install i-PI](install-ipi.md) -- [Install GROMACS](install-gromacs.md) -- [Building conda packages](build-conda.md) -- [Install Node.js interface](install-nodejs.md) -- [Easy install the latest development version](easy-install-dev.md) diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md index 7613fdb772..f1a5496b59 100644 --- a/doc/install/install-from-c-library.md +++ b/doc/install/install-from-c-library.md @@ -1,4 +1,8 @@ -# Install from pre-compiled C library +# Install from pre-compiled C library {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own. It can be downloaded via the shell command: @@ -10,7 +14,7 @@ tar xzf libdeepmd_c.tar.gz The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. -## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch +## Use Pre-compiled C Library to build the LAMMPS plugin, i-PI driver, and GROMACS patch When one [installs DeePMD-kit's C++ interface](./install-from-source.md#install-deepmd-kits-c-interface), one can use the CMake argument `DEEPMD_C_ROOT` to the path `libdeepmd_c`. @@ -23,4 +27,5 @@ make -j8 make install ``` -Then one can follow the manual [Install LAMMPS](./install-lammps.md) and/or [Install GROMACS](./install-gromacs.md). +Then the i-PI driver `dp_ipi` will be built and installed. +One can also follow the manual [Install LAMMPS](./install-lammps.md) and/or [Install GROMACS](./install-gromacs.md). diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index 4f94b9c793..5195992853 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -3,54 +3,102 @@ Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). Or get the DeePMD-kit source code by `git clone` + ```bash cd /some/workspace git clone https://github.com/deepmodeling/deepmd-kit.git deepmd-kit ``` For convenience, you may want to record the location of the source to a variable, saying `deepmd_source_dir` by + ```bash cd deepmd-kit deepmd_source_dir=`pwd` ``` -## Install the python interface -### Install Tensorflow's python interface -First, check the python version on your machine +## Install the Python interface + +### Install Backend's Python interface + +First, check the Python version on your machine. +Python 3.8 or above is required. + ```bash python --version ``` -We follow the virtual environment approach to install TensorFlow's Python interface. The full instruction can be found on the official [TensorFlow website](https://www.tensorflow.org/install/pip). TensorFlow 1.8 or later is supported. Now we assume that the Python interface will be installed to the virtual environment directory `$tensorflow_venv` +We follow the virtual environment approach to install the backend's Python interface. +Now we assume that the Python interface will be installed in the virtual environment directory `$deepmd_venv`: + ```bash -virtualenv -p python3 $tensorflow_venv -source $tensorflow_venv/bin/activate +virtualenv -p python3 $deepmd_venv +source $deepmd_venv/bin/activate pip install --upgrade pip +``` + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +The full instruction to install TensorFlow can be found on the official [TensorFlow website](https://www.tensorflow.org/install/pip). TensorFlow 2.2 or later is supported. + +```bash pip install --upgrade tensorflow ``` -It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by + +If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by + ```bash -source $tensorflow_venv/bin/activate +pip install --upgrade tensorflow-cpu ``` -if one wants to skip out of the virtual environment, he/she can do + +One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to install TensorFlow from [conda-forge](https://conda-forge.org). + +To verify the installation, run + ```bash -deactivate +python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))" ``` -If one has multiple python interpreters named something like python3.x, it can be specified by, for example + +One can also [build the TensorFlow Python interface from source](https://www.tensorflow.org/install/source) for customized hardware optimization, such as CUDA, ROCM, or OneDNN support. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +To install PyTorch, run + +```sh +pip install torch +``` + +Follow [PyTorch documentation](https://pytorch.org/get-started/locally/) to install PyTorch built against different CUDA versions or without CUDA. + +One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to install PyTorch from [conda-forge](https://conda-forge.org). + +::: + +:::: + +It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by + ```bash -virtualenv -p python3.8 $tensorflow_venv +source $deepmd_venv/bin/activate ``` -If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by + +if one wants to skip out of the virtual environment, he/she can do + ```bash -pip install --upgrade tensorflow-cpu +deactivate ``` -To verify the installation, run + +If one has multiple python interpreters named something like python3.x, it can be specified by, for example + ```bash -python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))" +virtualenv -p python3.8 $deepmd_venv ``` -One should remember to activate the virtual environment every time he/she uses DeePMD-kit. -One can also [build the TensorFlow Python interface from source](https://www.tensorflow.org/install/source) for custom hardware optimization, such as CUDA, ROCM, or OneDNN support. +One should remember to activate the virtual environment every time he/she uses DeePMD-kit. ### Install the DeePMD-kit's python interface @@ -60,9 +108,30 @@ Check the compiler version on your machine gcc --version ``` -The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. +The compiler GCC 4.8 or later is supported in the DeePMD-kit. + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +You can set the environment variable `export DP_ENABLE_PYTORCH=1` to enable customized C++ OPs in the PyTorch backend. +Note that PyTorch may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by PyTorch. + +The customized C++ OPs are not enabled by default because TensorFlow and PyTorch packages from the PyPI use different `_GLIBCXX_USE_CXX11_ABI` flags. +We recommend conda-forge packages in this case. + +::: + +:::: Execute + ```bash cd $deepmd_source_dir pip install . @@ -70,25 +139,32 @@ pip install . One may set the following environment variables before executing `pip`: -| Environment variables | Allowed value | Default value | Usage | -| --------------------- | ---------------------- | ------------- | -------------------------- | -| DP_VARIANT | `cpu`, `cuda`, `rocm` | `cpu` | Build CPU variant or GPU variant with CUDA or ROCM support. | -| CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | -| ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. | -| TENSORFLOW_ROOT | Path | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.| -| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | -| CMAKE_ARGS | str | - | Additional CMake arguments | -| <LANG>FLAGS (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | +| Environment variables | Allowed value | Default value | Usage | +| --------------------------------------------------- | --------------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DP_VARIANT | `cpu`, `cuda`, `rocm` | `cpu` | Build CPU variant or GPU variant with CUDA or ROCM support. | +| CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | +| ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. | +| DP_ENABLE_TENSORFLOW | 0, 1 | 1 | {{ tensorflow_icon }} Enable the TensorFlow backend. | +| DP_ENABLE_PYTORCH | 0, 1 | 0 | {{ pytorch_icon }} Enable customized C++ OPs for the PyTorch backend. PyTorch can still run without customized C++ OPs, but features will be limited. | +| TENSORFLOW_ROOT | Path | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against. | +| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | +| CMAKE_ARGS | str | - | Additional CMake arguments | +| <LANG>FLAGS (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | To test the installation, one should first jump out of the source directory + ``` cd /some/other/workspace ``` + then execute + ```bash dp -h ``` + It will print the help information like + ```text usage: dp [-h] {train,freeze,test} ... @@ -105,15 +181,17 @@ Valid subcommands: test test the model ``` -### Install horovod and mpi4py +### Install horovod and mpi4py {{ tensorflow_icon }} [Horovod](https://github.com/horovod/horovod) and [mpi4py](https://github.com/mpi4py/mpi4py) are used for parallel training. For better performance on GPU, please follow the tuning steps in [Horovod on GPU](https://github.com/horovod/horovod/blob/master/docs/gpus.rst). + ```bash # With GPU, prefer NCCL as a communicator. HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_HOME=/path/to/nccl pip install horovod mpi4py ``` If your work in a CPU environment, please prepare runtime as below: + ```bash # By default, MPI is used as communicator. HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_TENSORFLOW=1 pip install horovod mpi4py @@ -151,7 +229,11 @@ If you don't install Horovod, DeePMD-kit will fall back to serial mode. If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section. -### Install Tensorflow's C++ interface (optional) +### Install Backends' C++ interface (optional) + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} Since TensorFlow 2.12, TensorFlow C++ library (`libtensorflow_cc`) is packaged inside the Python library. Thus, you can skip building TensorFlow C++ library manually. If that does not work for you, you can still build it manually. @@ -159,9 +241,21 @@ The C++ interface of DeePMD-kit was tested with compiler GCC >= 4.8. It is notic First, the C++ interface of Tensorflow should be installed. It is noted that the version of Tensorflow should be consistent with the python interface. You may follow [the instruction](install-tf.2.12.md) or run the script `$deepmd_source_dir/source/install/build_tf.py` to install the corresponding C++ interface. +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +If you have installed PyTorch using pip, you can use libtorch inside the PyTorch Python package. +You can also download libtorch prebuilt library from the [PyTorch website](https://pytorch.org/get-started/locally/). + +::: + +:::: + ### Install DeePMD-kit's C++ interface Now go to the source code directory of DeePMD-kit and make a building place. + ```bash cd $deepmd_source_dir/source mkdir build @@ -174,36 +268,72 @@ The installation requires CMake 3.16 or later for the CPU version, CMake 3.23 or pip install -U cmake ``` +You must enable at least one backend. +If you enable two or more backends, these backend libraries must be built in a compatible way, e.g. using the same `_GLIBCXX_USE_CXX11_ABI` flag. +We recommend using [conda pacakges](https://docs.deepmodeling.org/faq/conda.html) from [conda-forge](https://conda-forge.org), which are usually compatible to each other. + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + I assume you have activated the TensorFlow Python environment and want to install DeePMD-kit into path `$deepmd_root`, then execute CMake + ```bash -cmake -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root .. +cmake -DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root .. ``` If you specify `-DUSE_TF_PYTHON_LIBS=FALSE`, you need to give the location where TensorFlow's C++ interface is installed to `-DTENSORFLOW_ROOT=${tensorflow_root}`. +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +I assume you have installed the PyTorch (either Python or C++ interface) to `$torch_root`, then execute CMake + +```bash +cmake -DENABLE_PYTORCH=TRUE -DCMAKE_PREFIX_PATH=$torch_root -DCMAKE_INSTALL_PREFIX=$deepmd_root .. +``` + +You can specify `-DUSE_PT_PYTHON_LIBS=TRUE` to use libtorch from the Python installation, +but you need to be careful that [PyTorch PyPI packages are still built using `_GLIBCXX_USE_CXX11_ABI=0`](https://github.com/pytorch/pytorch/issues/51039), which may be not compatible with other libraries. + +```bash +cmake -DENABLE_PYTORCH=TRUE -DUSE_PT_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root .. +``` + +::: + +:::: + One may add the following arguments to `cmake`: -| CMake Aurgements | Allowed value | Default value | Usage | -| ------------------------ | ------------------- | ------------- | ------------------------| -| -DTENSORFLOW_ROOT=<value> | Path | - | The Path to TensorFlow's C++ interface. | -| -DCMAKE_INSTALL_PREFIX=<value> | Path | - | The Path where DeePMD-kit will be installed. | -| -DUSE_CUDA_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with CUDA toolkit. | -| -DCUDAToolkit_ROOT=<value> | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | -| -DUSE_ROCM_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with ROCM toolkit. | -| -DCMAKE_HIP_COMPILER_ROCM_ROOT=<value> | Path | Detected automatically | The path to the ROCM toolkit directory. | -| -DLAMMPS_SOURCE_ROOT=<value> | Path | - | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled. | -| -DUSE_TF_PYTHON_LIBS=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build C++ interface with TensorFlow's Python libraries(TensorFlow's Python Interface is required). And there's no need for building TensorFlow's C++ interface.| -| -DENABLE_NATIVE_OPTIMIZATION=<value> | `TRUE` or `FALSE` | `FALSE` | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | -| -DCMAKE_<LANG>_FLAGS=<value> (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | +| CMake Aurgements | Allowed value | Default value | Usage | +| ---------------------------------------------------------------------------- | ----------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -DENABLE_TENSORFLOW=<value> | `TRUE` or `FALSE` | `FALSE` | {{ tensorflow_icon }} Whether building the TensorFlow backend. | +| -DENABLE_PYTORCH=<value> | `TRUE` or `FALSE` | `FALSE` | {{ pytorch_icon }} Whether building the PyTorch backend. | +| -DTENSORFLOW_ROOT=<value> | Path | - | {{ tensorflow_icon }} The Path to TensorFlow's C++ interface. | +| -DCMAKE_INSTALL_PREFIX=<value> | Path | - | The Path where DeePMD-kit will be installed. | +| -DUSE_CUDA_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with CUDA toolkit. | +| -DCUDAToolkit_ROOT=<value> | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | +| -DUSE_ROCM_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with ROCM toolkit. | +| -DCMAKE_HIP_COMPILER_ROCM_ROOT=<value> | Path | Detected automatically | The path to the ROCM toolkit directory. | +| -DLAMMPS_SOURCE_ROOT=<value> | Path | - | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled. | +| -DUSE_TF_PYTHON_LIBS=<value> | `TRUE` or `FALSE` | `FALSE` | {{ tensorflow_icon }} If `TRUE`, Build C++ interface with TensorFlow's Python libraries (TensorFlow's Python Interface is required). And there's no need for building TensorFlow's C++ interface. | +| -DUSE_PT_PYTHON_LIBS=<value> | `TRUE` or `FALSE` | `FALSE` | {{ pytorch_icon }} If `TRUE`, Build C++ interface with PyTorch's Python libraries (PyTorch's Python Interface is required). And there's no need for downloading PyTorch's C++ libraries. | +| -DENABLE_NATIVE_OPTIMIZATION=<value> | `TRUE` or `FALSE` | `FALSE` | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | +| -DCMAKE\_<LANG>\_FLAGS=<value> (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | If the CMake has been executed successfully, then run the following make commands to build the package: + ```bash make -j4 make install ``` + Option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware. If everything works fine, you will have the executable and libraries installed in `$deepmd_root/bin` and `$deepmd_root/lib` + ```bash $ ls $deepmd_root/bin $ ls $deepmd_root/lib diff --git a/doc/install/install-gromacs.md b/doc/install/install-gromacs.md index 758ad7784a..147822cf17 100644 --- a/doc/install/install-gromacs.md +++ b/doc/install/install-gromacs.md @@ -3,11 +3,14 @@ Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed. ## Patch source code of GROMACS + Download the source code of a supported GROMACS version (2020.2) from https://manual.gromacs.org/2020.2/download.html. Run the following command: + ```bash export PATH=$PATH:$deepmd_kit_root/bin dp_gmx_patch -d $gromacs_root -v $version -p ``` + where `deepmd_kit_root` is the directory where the latest version of DeePMD-kit is installed, and `gromacs_root` refers to the source code directory of GROMACS. And `version` represents the version of GROMACS, where **only 2020.2 is supported now**. If attempting to patch another version of GROMACS you will still need to set `version` to `2020.2` as this is the only supported version, we cannot guarantee that patching other versions of GROMACS will work. ## Compile GROMACS with deepmd-kit + The C++ interface of `Deepmd-kit 2.x` and `TensorFlow 2.x` are required. And be aware that only DeePMD-kit with **high precision** is supported now since we cannot ensure single precision is enough for a GROMACS simulation. Here is a sample compile script: + ```bash #!/bin/bash export CC=/usr/bin/gcc diff --git a/doc/install/install-ipi.md b/doc/install/install-ipi.md index 1f4de7474c..3dd45d6749 100644 --- a/doc/install/install-ipi.md +++ b/doc/install/install-ipi.md @@ -1,11 +1,14 @@ # Install i-PI + The i-PI works in a client-server model. The i-PI provides the server for integrating the replica positions of atoms, while the DeePMD-kit provides a client named `dp_ipi` that computes the interactions (including energy, forces and virials). The server and client communicate via the Unix domain socket or the Internet socket. Full documentation for i-PI can be found [here](http://ipi-code.org/). The source code and a complete installation guide for i-PI can be found [here](https://github.com/i-pi/i-pi). To use i-PI with already existing drivers, install and update using Pip: + ```bash pip install -U i-PI ``` Test with Pytest: + ```bash pip install pytest pytest --pyargs ipi.tests diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md index 5dbf690c67..c24bfac06b 100644 --- a/doc/install/install-lammps.md +++ b/doc/install/install-lammps.md @@ -3,6 +3,7 @@ There are two ways to install LAMMPS: the built-in mode and the plugin mode. The built-in mode builds LAMMPS along with the DeePMD-kit and DeePMD-kit will be loaded automatically when running LAMMPS. The plugin mode builds LAMMPS and a plugin separately, so one needs to use `plugin load` command to load the DeePMD-kit's LAMMPS plugin library. ## Install LAMMPS's DeePMD-kit module (built-in mode) + Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed. DeePMD-kit provides a module for running MD simulations with LAMMPS. Now make the DeePMD-kit module for LAMMPS. @@ -11,37 +12,45 @@ DeePMD-kit provides a module for running MD simulations with LAMMPS. Now make th cd $deepmd_source_dir/source/build make lammps ``` + DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory, which supports either double or single float precision interface. Now download the LAMMPS code, and uncompress it. + ```bash cd /some/workspace -wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz -tar xf stable_2Aug2023_update2.tar.gz +wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update3.tar.gz +tar xf stable_2Aug2023_update3.tar.gz ``` -The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`. + +The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update3`. Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake. ### With make Now go into the LAMMPS code and copy the DeePMD-kit module like this + ```bash -cd lammps-stable_2Aug2023_update2/src/ +cd lammps-stable_2Aug2023_update3/src/ cp -r $deepmd_source_dir/source/build/USER-DEEPMD . make yes-kspace make yes-extra-fix make yes-user-deepmd ``` + You can enable any other package you want. Now build LAMMPS + ```bash make mpi -j4 ``` If everything works fine, you will end up with an executable `lmp_mpi`. + ```bash ./lmp_mpi -h ``` The DeePMD-kit module can be removed from the LAMMPS source code by + ```bash make no-user-deepmd ``` @@ -51,8 +60,8 @@ make no-user-deepmd Now go into the LAMMPS directory and create a directory called `build`: ```bash -mkdir -p lammps-stable_2Aug2023_update2/build/ -cd lammps-stable_2Aug2023_update2/build/ +mkdir -p lammps-stable_2Aug2023_update3/build/ +cd lammps-stable_2Aug2023_update3/build/ ``` Patch the LAMMPS `CMakeLists.txt` file: @@ -64,6 +73,7 @@ echo "include(${deepmd_source_dir}/source/lmp/builtin.cmake)" >> ../cmake/CMakeL It's expected to see one extra line in the end of `CMakeLists.txt`. Now build LAMMPS. You can install any other package you want. + ```bash cmake -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -DCMAKE_PREFIX_PATH=${deepmd_root} ../cmake make -j4 @@ -71,27 +81,32 @@ make install ``` If everything works fine, you will end up with an executable `${deepmd_root}/bin/lmp`. + ```bash ${deepmd_root}/bin/lmp -h ``` ## Install LAMMPS (plugin mode) + Starting from `8Apr2021`, LAMMPS also provides a plugin mode, allowing one to build LAMMPS and a plugin separately. Now download the LAMMPS code (`8Apr2021` or later), and uncompress it: + ```bash cd /some/workspace -wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz -tar xf stable_2Aug2023_update2.tar.gz +wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update3.tar.gz +tar xf stable_2Aug2023_update3.tar.gz ``` -The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build` +The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update3`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build` ```bash -mkdir -p lammps-stable_2Aug2023_update2/build/ -cd lammps-stable_2Aug2023_update2/build/ +mkdir -p lammps-stable_2Aug2023_update3/build/ +cd lammps-stable_2Aug2023_update3/build/ ``` + Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want. + ```bash cmake -D PKG_PLUGIN=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${deepmd_root}/lib ../cmake make -j4 @@ -99,6 +114,7 @@ make install ``` If everything works fine, you will end up with an executable `${deepmd_root}/bin/lmp`. + ```bash ${deepmd_root}/bin/lmp -h ``` @@ -109,4 +125,5 @@ If `${tensorflow_root}`, `${deepmd_root}`, or the path to TensorFlow Python pack ```sh patchelf --add-rpath "${tensorflow_root}/lib" liblammps.so ``` + ::: diff --git a/doc/install/install-tf.1.12.md b/doc/install/install-tf.1.12.md index f4009405d7..13abd8f7a7 100644 --- a/doc/install/install-tf.1.12.md +++ b/doc/install/install-tf.1.12.md @@ -1,5 +1,7 @@ # Install TensorFlow's C++ interface + The TensorFlow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.15.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html). + ```bash cd /some/workspace wget https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip @@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH ``` Firstly get the source code of the TensorFlow + ```bash cd /some/workspace git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.12.0 --depth=1 @@ -18,26 +21,35 @@ cd tensorflow ``` DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute + ```bash ./configure ``` + You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory): + ```bash Please specify the location of python. [Default is $tensorflow_venv/bin/python]: ``` + The library path for Python should be set accordingly. Now build the shared library of TensorFlow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Before moving on, we need to compile the dependencies of TensorFlow, including Protobuf, Eigen, nsync and absl. Firstly, protobuf + ```bash mkdir /tmp/proto sed -i 's;PROTOBUF_URL=.*;PROTOBUF_URL=\"https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz\";g' tensorflow/contrib/makefile/download_dependencies.sh @@ -48,7 +60,9 @@ cd tensorflow/contrib/makefile/downloads/protobuf/ make make install ``` + Then Eigen + ```bash mkdir /tmp/eigen cd ../eigen @@ -57,7 +71,9 @@ cd build_dir cmake -DCMAKE_INSTALL_PREFIX=/tmp/eigen/ ../ make install ``` + nsync + ```bash mkdir /tmp/nsync cd ../../nsync @@ -67,7 +83,9 @@ cmake -DCMAKE_INSTALL_PREFIX=/tmp/nsync/ ../ make make install ``` + And absl + ```bash cd ../../absl bazel build @@ -75,7 +93,9 @@ mkdir -p $tensorflow_root/include/ rsync -avzh --include '*/' --include '*.h' --exclude '*' absl $tensorflow_root/include/ cd ../../../../.. ``` + Now, copy the libraries to the tensorflow's installation directory: + ```bash mkdir $tensorflow_root/lib cp bazel-bin/tensorflow/libtensorflow_cc.so $tensorflow_root/lib/ @@ -83,7 +103,9 @@ cp bazel-bin/tensorflow/libtensorflow_framework.so $tensorflow_root/lib/ cp /tmp/proto/lib/libprotobuf.a $tensorflow_root/lib/ cp /tmp/nsync/lib64/libnsync.a $tensorflow_root/lib/ ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow cp -r bazel-genfiles/* $tensorflow_root/include/ @@ -94,12 +116,16 @@ cp -r /tmp/proto/include/* $tensorflow_root/include cp -r /tmp/eigen/include/eigen3/* $tensorflow_root/include cp -r /tmp/nsync/include/*h $tensorflow_root/include ``` + Now clean up the source files in the header directories: + ```bash cd $tensorflow_root/include find . -name "*.cc" -type f -delete ``` + The temporary installation directories for the dependencies can be removed: + ```bash rm -fr /tmp/proto /tmp/eigen /tmp/nsync ``` diff --git a/doc/install/install-tf.1.14-gpu.md b/doc/install/install-tf.1.14-gpu.md index 4e9fcaf7fc..5850af24ba 100644 --- a/doc/install/install-tf.1.14-gpu.md +++ b/doc/install/install-tf.1.14-gpu.md @@ -1,5 +1,7 @@ # Install TensorFlow-GPU's C++ interface + TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. It is highly recommended that the Bazel version 0.24.1 is used. Full instructions on Bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html). + ```bash cd /some/workspace wget https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel-0.24.1-dist.zip @@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH ``` Firstly get the source code of the TensorFlow + ```bash cd /some/workspace git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.14.0 --depth=1 @@ -20,6 +23,7 @@ cd tensorflow DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory): + ```bash ./configure Please specify the location of python. [Default is xxx]: @@ -93,23 +97,30 @@ Configuration finished The library path for Python should be set accordingly. Now build the shared library of TensorFlow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Now, copy the libraries to the TensorFlow's installation directory: + ```bash mkdir $tensorflow_root/lib cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/ cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/ cp -d $tensorflow_root/lib/libtensorflow_framework.so.1 $tensorflow_root/lib/libtensorflow_framework.so ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow cp -r bazel-genfiles/* $tensorflow_root/include/ @@ -121,16 +132,20 @@ cp -r bazel-tensorflow/external/eigen_archive/unsupported/ $tensorflow_root/incl rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/protobuf_archive/src/ $tensorflow_root/include/ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/com_google_absl/absl/ $tensorflow_root/include/absl ``` + Now clean up the source files in the header directories: + ```bash cd $tensorflow_root/include find . -name "*.cc" -type f -delete ``` # Troubleshooting + ```bash git: unknown command -C ... ``` + This may be your git version issue because the low version of Git does not support this command. Upgrading your Git may be helpful. ```bash @@ -139,9 +154,11 @@ Please set them or make sure they are set and tested correctly in the CMake file FFTW_LIB (ADVANCED) linked by target "FFTW" in directory xxx ``` + Currently, when building the Eigen package, you can delete the FFTW in the CMake file. ```bash fatal error: absl/numeric/int128_have_intrinsic.inc: No such file or directory ``` + Basically, you could build an empty file named "int128_have_intrinsic.inc" in the same directory of "int128.h". diff --git a/doc/install/install-tf.1.14.md b/doc/install/install-tf.1.14.md index 065df9cad9..6457d484ad 100644 --- a/doc/install/install-tf.1.14.md +++ b/doc/install/install-tf.1.14.md @@ -1,5 +1,7 @@ # Install tensorflow's C++ interface + The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.24.1 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html). + ```bash cd /some/workspace wget https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel-0.24.1-dist.zip @@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH ``` Firstly get the source code of the tensorflow + ```bash cd /some/workspace git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.14.0 --depth=1 @@ -18,33 +21,44 @@ cd tensorflow ``` DeePMD-kit is compiled by cmake, so we need to compile and integrate tensorflow with cmake projects. The rest of this section basically follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute + ```bash ./configure ``` + You will answer a list of questions that help configure the building of tensorflow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` by the virtual environment directory): + ```bash Please specify the location of python. [Default is $tensorflow_venv/bin/python]: ``` + The library path for Python should be set accordingly. Now build the shared library of tensorflow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. Now I assume you want to install tensorflow in directory `$tensorflow_root`. Create the directory if it does not exists + ```bash mkdir -p $tensorflow_root ``` + Now, copy the libraries to the tensorflow's installation directory: + ```bash mkdir $tensorflow_root/lib cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/ cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/ cp -d $tensorflow_root/lib/libtensorflow_framework.so.1 $tensorflow_root/lib/libtensorflow_framework.so ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow cp -r bazel-genfiles/* $tensorflow_root/include/ @@ -56,7 +70,9 @@ cp -r bazel-tensorflow/external/eigen_archive/unsupported/ $tensorflow_root/incl rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/protobuf_archive/src/ $tensorflow_root/include/ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/com_google_absl/absl/ $tensorflow_root/include/absl ``` + Now clean up the source files in the header directories: + ```bash cd $tensorflow_root/include find . -name "*.cc" -type f -delete diff --git a/doc/install/install-tf.1.8.md b/doc/install/install-tf.1.8.md index bfc1a616d4..f9554f9348 100644 --- a/doc/install/install-tf.1.8.md +++ b/doc/install/install-tf.1.8.md @@ -1,5 +1,7 @@ # Install tensorflow's C++ interface + The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.10.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html). + ```bash cd /some/workspace wget https://github.com/bazelbuild/bazel/releases/download/0.10.0/bazel-0.10.0-dist.zip @@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH ``` Firstly get the source code of the TensorFlow + ```bash cd /some/workspace git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.8.0 --depth=1 @@ -18,26 +21,35 @@ cd tensorflow ``` DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section basically follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute + ```bash ./configure ``` + You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory): + ```bash Please specify the location of python. [Default is $tensorflow_venv/bin/python]: ``` + The library path for Python should be set accordingly. Now build the shared library of TensorFlow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Before moving on, we need to compile the dependencies of TensorFlow, including Protobuf, Eigen and nsync. Firstly, protobuf + ```bash mkdir /tmp/proto tensorflow/contrib/makefile/download_dependencies.sh @@ -47,7 +59,9 @@ cd tensorflow/contrib/makefile/downloads/protobuf/ make make install ``` + Then Eigen + ```bash mkdir /tmp/eigen cd ../eigen @@ -56,7 +70,9 @@ cd build_dir cmake -DCMAKE_INSTALL_PREFIX=/tmp/eigen/ ../ make install ``` + And nsync + ```bash mkdir /tmp/nsync cd ../../nsync @@ -67,7 +83,9 @@ make make install cd ../../../../../.. ``` + Now, copy the libraries to the TensorFlow's installation directory: + ```bash mkdir $tensorflow_root/lib cp bazel-bin/tensorflow/libtensorflow_cc.so $tensorflow_root/lib/ @@ -75,7 +93,9 @@ cp bazel-bin/tensorflow/libtensorflow_framework.so $tensorflow_root/lib/ cp /tmp/proto/lib/libprotobuf.a $tensorflow_root/lib/ cp /tmp/nsync/lib/libnsync.a $tensorflow_root/lib/ ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow cp -r bazel-genfiles/* $tensorflow_root/include/ @@ -86,12 +106,16 @@ cp -r /tmp/proto/include/* $tensorflow_root/include cp -r /tmp/eigen/include/eigen3/* $tensorflow_root/include cp -r /tmp/nsync/include/*h $tensorflow_root/include ``` + Now clean up the source files in the header directories: + ```bash cd $tensorflow_root/include find . -name "*.cc" -type f -delete ``` + The temporary installation directories for the dependencies can be removed: + ```bash rm -fr /tmp/proto /tmp/eigen /tmp/nsync ``` diff --git a/doc/install/install-tf.2.12.md b/doc/install/install-tf.2.12.md index dce0c224d5..8523345d3d 100644 --- a/doc/install/install-tf.2.12.md +++ b/doc/install/install-tf.2.12.md @@ -1,4 +1,5 @@ # Install TensorFlow's C++ interface + TensorFlow's C++ interface will be compiled from the source code. In this manual, we install TensorFlow 2.12.0. It is noted that the source code of TensorFlow 2.12.0 uses C++ 17, so one needs a C++ compiler that supports C++ 17. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel). @@ -10,6 +11,7 @@ export PATH=/some/workspace/bazel/bin:$PATH ``` Firstly get the source code of the TensorFlow + ```bash git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.12.0 --depth=1 cd tensorflow @@ -76,23 +78,30 @@ Configuration finished The library path for Python should be set accordingly. Now build the shared library of TensorFlow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Now, copy the libraries to the TensorFlow's installation directory: + ```bash mkdir -p $tensorflow_root/lib cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/ cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/ cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/ @@ -107,12 +116,15 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel ``` If you've enabled oneDNN, also copy `libiomp5.so`: + ```bash cp -d bazel-out/k8-opt/bin/external/llvm_openmp/libiomp5.so $tensorflow_root/lib/ ``` # Troubleshooting + ```bash git: unknown command -C ... ``` + This may be an issue with your Git version issue. Early versions of Git do not support this command, in this case upgrading your Git to a newer version may resolve any issues. diff --git a/doc/install/install-tf.2.3.md b/doc/install/install-tf.2.3.md index e538607db0..2fc7b35f2c 100644 --- a/doc/install/install-tf.2.3.md +++ b/doc/install/install-tf.2.3.md @@ -1,5 +1,7 @@ # Install TensorFlow's C++ interface + The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. The bazel version 3.1.0 should be used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html). + ```bash cd /some/workspace wget https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-linux-x86_64.sh @@ -9,6 +11,7 @@ export PATH=/some/workspace/bazel/bin:$PATH ``` Firstly get the source code of the TensorFlow + ```bash git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.3.0 --depth=1 cd tensorflow @@ -75,23 +78,30 @@ Configuration finished The library path for Python should be set accordingly. Now build the shared library of tensorflow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Now, copy the libraries to the tensorflow's installation directory: + ```bash mkdir -p $tensorflow_root/lib cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/ cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/ cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/ @@ -105,7 +115,9 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel ``` # Troubleshooting + ```bash git: unknown command -C ... ``` + This may be an issue with your git version issue. Early versions of git do not support this command, in this case upgrading your git to a newer version may resolve any issues. diff --git a/doc/install/install-tf.2.8.md b/doc/install/install-tf.2.8.md index da1f299131..4145ba01d1 100644 --- a/doc/install/install-tf.2.8.md +++ b/doc/install/install-tf.2.8.md @@ -1,4 +1,5 @@ # Install TensorFlow's C++ interface + TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel). ```bash @@ -8,6 +9,7 @@ export PATH=/some/workspace/bazel/bin:$PATH ``` Firstly get the source code of the TensorFlow + ```bash git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.8.0 --depth=1 cd tensorflow @@ -74,23 +76,30 @@ Configuration finished The library path for Python should be set accordingly. Now build the shared library of TensorFlow: + ```bash bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so ``` -You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`. + +You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`. Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist + ```bash mkdir -p $tensorflow_root ``` + Now, copy the libraries to the TensorFlow's installation directory: + ```bash mkdir -p $tensorflow_root/lib cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/ cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/ cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so ``` + Then copy the headers + ```bash mkdir -p $tensorflow_root/include/tensorflow rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/ @@ -104,12 +113,15 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel ``` If you've enabled oneDNN, also copy `libiomp5.so`: + ```bash cp -d bazel-out/k8-opt/bin/external/llvm_openmp/libiomp5.so $tensorflow_root/lib/ ``` # Troubleshooting + ```bash git: unknown command -C ... ``` + This may be an issue with your Git version issue. Early versions of Git do not support this command, in this case upgrading your Git to a newer version may resolve any issues. diff --git a/doc/logo.md b/doc/logo.md index 420f378336..67c303f651 100644 --- a/doc/logo.md +++ b/doc/logo.md @@ -1,5 +1,5 @@ -# Logo - -DeePMD-kit logo - -The logo of DeePMD-kit is a beaver. Beavers were widely distributed in Europe and Asia but became nearly extinct due to hunting. Listed as a first-class state-protected animal in China, the population of beavers in China is less than the giant pandas. We hope that users of DeePMD-kit can enhance the awareness to protect beavers. +# Logo + +DeePMD-kit logo + +The logo of DeePMD-kit is a beaver. Beavers were widely distributed in Europe and Asia but became nearly extinct due to hunting. Listed as a first-class state-protected animal in China, the population of beavers in China is less than the giant pandas. We hope that users of DeePMD-kit can enhance the awareness to protect beavers. diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md new file mode 100644 index 0000000000..e295f6b6bb --- /dev/null +++ b/doc/model/dpa2.md @@ -0,0 +1,5 @@ +# Descriptor DPA-2 {{ pytorch_icon }} + +:::{note} +**Supported backends**: PyTorch {{ pytorch_icon }} +::: diff --git a/doc/model/dplr.md b/doc/model/dplr.md index feea84e562..ec95f9f424 100644 --- a/doc/model/dplr.md +++ b/doc/model/dplr.md @@ -1,4 +1,8 @@ -# Deep potential long-range (DPLR) +# Deep potential long-range (DPLR) {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: Notice: **The interfaces of DPLR are not stable and subject to change** @@ -9,33 +13,42 @@ In the following, we take the DPLR model for example to introduce the training a ## Theory The Deep Potential Long Range (DPLR) model adds the electrostatic energy to the total energy: + ```math E=E_{\text{DP}} + E_{\text{ele}}, ``` + where $E_{\text{DP}}$ is the short-range contribution constructed as the [standard energy model](./train-energy.md) that is fitted against $(E^\ast-E_{\text{ele}})$. $E_{\text{ele}}$ is the electrostatic energy introduced by a group of Gaussian distributions that is an approximation of the electronic structure of the system, and is calculated in Fourier space by + ```math E_{\text{ele}} = \frac{1}{2\pi V}\sum_{m \neq 0, \|m\|\leq L} \frac{\exp({-\pi ^2 m^2/\beta ^2})}{m^2}S^2(m), ``` + where $\beta$ is a freely tunable parameter that controls the spread of the Gaussians. $L$ is the cutoff in Fourier space and $S(m)$, the structure factor, is given by + ```math S(m)=\sum_i q_i e^{-2\pi \imath m \boldsymbol r_i} + \sum_n q_n e^{-2\pi \imath m \boldsymbol W_n}, ``` + where $\imath = \sqrt{-1}$ denotes the imaginary unit, $\boldsymbol r_i$ indicates ion coordinates, $q_i$ is the charge of the ion $i$, and $W_n$ is the $n$-th Wannier centroid (WC) which can be obtained from a separated [dipole model](./train-fitting-tensor.md). It can be proved that the error in the electrostatic energy introduced by the Gaussian approximations is dominated by a summation of dipole-quadrupole interactions that decay as $r^{-4}$, where $r$ is the distance between the dipole and quadrupole.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Train a deep Wannier model for Wannier centroids We use the deep Wannier model (DW) to represent the relative position of the Wannier centroid (WC) with the atom with which it is associated. One may consult the introduction of the [dipole model](train-fitting-tensor.md) for a detailed introduction. An example input `wc.json` and a small dataset `data` for tutorial purposes can be found in + ```bash $deepmd_source_dir/examples/water/dplr/train/ ``` + It is noted that **the tutorial dataset is not enough for training a productive model**. Two settings make the training input script different from an energy training input: + ```json "fitting_net": { "type": "dipole", @@ -44,8 +57,10 @@ Two settings make the training input script different from an energy training in "seed": 1 }, ``` + The type of fitting is set to {ref}`dipole `. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms. The loss section is provided as follows + ```json "loss": { "type": "tensor", @@ -53,9 +68,11 @@ The loss section is provided as follows "pref_atomic": 1.0 }, ``` + so that the atomic dipole is trained as labels. Note that the NumPy compressed file `atomic_dipole.npy` should be provided in each dataset. The training and freezing can be started from the example directory by + ```bash dp train dw.json && dp freeze -o dw.pb ``` @@ -63,6 +80,7 @@ dp train dw.json && dp freeze -o dw.pb ## Train the DPLR model The training of the DPLR model is very similar to the standard short-range DP models. An example input script can be found in the example directory. The following section is introduced to compute the long-range energy contribution of the DPLR model, and modify the short-range DP model by this part. + ```json "modifier": { "type": "dipole_charge", @@ -73,8 +91,10 @@ The training of the DPLR model is very similar to the standard short-range DP mo "ewald_beta": 0.40 }, ``` -The {ref}`model_name ` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map ` gives the amount of charge assigned to WCs. {ref}`sys_charge_map ` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta ` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h ` (unit Å) assigns the grid size of Fourier transformation. + +The {ref}`model_name ` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map ` gives the amount of charge assigned to WCs. {ref}`sys_charge_map ` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta ` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h ` (unit Å) assigns the grid size of Fourier transformation. The DPLR model can be trained and frozen by (from the example directory) + ```bash dp train ener.json && dp freeze -o ener.pb ``` @@ -84,11 +104,13 @@ dp train ener.json && dp freeze -o ener.pb In MD simulations, the long-range part of the DPLR is calculated by the LAMMPS `kspace` support. Then the long-range interaction is back-propagated to atoms by DeePMD-kit. This setup is commonly used in classical molecular dynamics simulations as the "virtual site". Unfortunately, LAMMPS does not natively support virtual sites, so we have to hack the LAMMPS code, which makes the input configuration and script a little wired. An example of an input configuration file and script can be found in + ```bash $deepmd_source_dir/examples/water/dplr/lmp/ ``` We use `atom_style full` for DPLR simulations. the coordinates of the WCs are explicitly written in the configuration file. Moreover, a virtual bond is established between the oxygens and the WCs to indicate they are associated together. The configuration file containing 128 H2O molecules is thus written as + ``` 512 atoms @@ -123,13 +145,17 @@ Bonds 2 1 2 386 ... ``` + The oxygens and hydrogens are assigned with atom types 1 and 2 (corresponding to training atom types 0 and 1), respectively. The WCs are assigned with atom type 3. We want to simulate heavy water so the mass of hydrogens is set to 2. An example input script is provided in + ```bash $deepmd_source_dir/examples/water/dplr/lmp/in.lammps ``` + Here are some explanations + ```lammps # groups of real and virtual atoms group real_atom type 1 2 @@ -144,6 +170,7 @@ bond_style zero bond_coeff * special_bonds lj/coul 1 1 1 angle no ``` + Type 1 and 2 (O and H) are `real_atom`s, while type 3 (WCs) are `virtual_atom`s. The model file `ener.pb` stores both the DW and DPLR models, so the position of WCs and the energy can be inferred from it. A virtual bond type is specified by `bond_style zero`. The `special_bonds` command switches off the exclusion of intramolecular interactions. ```lammps @@ -153,19 +180,22 @@ Type 1 and 2 (O and H) are `real_atom`s, while type 3 (WCs) are `virtual_atom`s. kspace_style pppm/dplr 1e-5 kspace_modify gewald ${BETA} diff ik mesh ${KMESH} ${KMESH} ${KMESH} ``` + The long-range part is calculated by the `kspace` support of LAMMPS. The `kspace_style` `pppm/dplr` is required. The spread parameter set by variable `BETA` should be set the same as that used in training. The `KMESH` should be set dense enough so the long-range calculation is converged. ### fix dplr command **Syntax** - ``` fix ID group-ID style_name keyword value ... ``` -* ID, group-ID are documented in :doc:`fix ` command -* style_name = *dplr* -* three or more keyword/value pairs may be appended + + + +- ID, group-ID are documented in :doc:`fix ` command +- style\_name = _dplr_ +- three or more keyword/value pairs may be appended ``` keyword = *model* or *type_associate* or *bond_type* or *efield* @@ -197,6 +227,7 @@ The atom names specified in [pair_style `deepmd`](../third-party/lammps-command. If it is not set, the training parameter {ref}`type_map ` will be mapped to LAMMPS atom types. To use a time-dependent electric field, LAMMPS's `variable` feature can be utilized: + ```lammps variable EFIELD_Z equal 2*sin(2*PI*time/0.006) fix 0 all dplr model ener.pb type_associate 1 3 bond_type 1 efield 0 0 v_EFIELD_Z @@ -212,21 +243,23 @@ compute real_press all pressure real_temp fix 1 real_atom nvt temp ${TEMP} ${TEMP} ${TAU_T} fix_modify 1 temp real_temp ``` + The temperature of the system should be computed from the real atoms. The kinetic contribution in the pressure tensor is also computed from the real atoms. The thermostat is applied to only real atoms. The computed temperature and pressure of real atoms can be accessed by, e.g. + ```lammps fix thermo_print all print ${THERMO_FREQ} "$(step) $(pe) $(ke) $(etotal) $(enthalpy) $(c_real_temp) $(c_real_press) $(vol) $(c_real_press[1]) $(c_real_press[2]) $(c_real_press[3])" append thermo.out screen no title "# step pe ke etotal enthalpy temp press vol pxx pyy pzz" ``` The LAMMPS simulation can be started from the example directory by + ```bash lmp -i in.lammps ``` + If LAMMPS complains that no model file `ener.pb` exists, it can be copied from the training example directory. The MD simulation lasts for only 20 steps. If one runs a longer simulation, it will blow up, because the model is trained with a very limited dataset for very short training steps, thus is of poor quality. Another restriction that should be noted is that the energies printed at the zero steps are not correct. This is because at the zero steps the position of the WC has not been updated with the DW model. The energies printed in later steps are correct. - - [1]: https://arxiv.org/abs/2112.13327 diff --git a/doc/model/dprc.md b/doc/model/dprc.md index c7547a769f..33dde237d7 100644 --- a/doc/model/dprc.md +++ b/doc/model/dprc.md @@ -1,4 +1,8 @@ -# Deep Potential - Range Correction (DPRc) +# Deep Potential - Range Correction (DPRc) {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: Deep Potential - Range Correction (DPRc) is designed to combine with QM/MM method, and corrects energies from a low-level QM/MM method to a high-level QM/MM method: @@ -11,6 +15,7 @@ E=E_\text{QM}(\mathbf R; \mathbf P) + E_\text{QM/MM}(\mathbf R; \mathbf P) + E_ Deep Potential - Range Correction (DPRc) was initially designed to correct the potential energy from a fast, linear-scaling low-level semiempirical QM/MM theory to a high-level ''ab initio'' QM/MM theory in a range-correction way to quantitatively correct short and mid-range non-bonded interactions leveraging the non-bonded lists routinely used in molecular dynamics simulations using molecular mechanical force fields such as AMBER. In this way, long-ranged electrostatic interactions can be modeled efficiently using the particle mesh Ewald method or its extensions for multipolar and QM/MM potentials. In a DPRc model, the switch function is modified to disable MM-MM interaction: + ```math s_\text{DPRc}(r_{ij}) = \begin{cases} @@ -18,12 +23,16 @@ In a DPRc model, the switch function is modified to disable MM-MM interaction: s(r_{ij}), &\text{otherwise}, \end{cases} ``` + where $s_\text{DPRc}(r_{ij})$ is the new switch function and $s(r_{ij})$ is the old one. This ensures the forces between MM atoms are zero, i.e. + ```math {\boldsymbol F}_{ij} = - \frac{\partial E}{\partial \boldsymbol r_{ij}} = 0, \quad i \in \text{MM} \land j \in \text{MM}. ``` + The fitting network is revised to remove energy bias from MM atoms: + ```math E_i= \begin{cases} @@ -31,10 +40,11 @@ The fitting network is revised to remove energy bias from MM atoms: \mathcal{F}_0(\mathcal{D}^i) - \mathcal{F}_0(\mathbf{0}), &\text{if $i \in \text{MM}$}, \end{cases} ``` + where $\mathbf{0}$ is a zero matrix. It is worth mentioning that usage of DPRc is not limited to its initial design for QM/MM correction and can be expanded to any similar interaction.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). See the [JCTC paper](https://doi.org/10.1021/acs.jctc.1c00201) for details. @@ -58,6 +68,10 @@ In a DPRc model, QM atoms and MM atoms have different atom types. Assuming we ha As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\text{QM/MM}$ within the cutoff, so we use a hybrid descriptor to describe them separatedly: +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + ```json "descriptor" :{ "type": "hybrid", @@ -87,7 +101,47 @@ As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\te } ``` +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```json +"descriptor" :{ + "type": "hybrid", + "list" : [ + { + "type": "se_e2_a", + "sel": [6, 11, 0, 6, 0, 1], + "rcut_smth": 1.00, + "rcut": 9.00, + "neuron": [12, 25, 50], + "exclude_types": [[2, 2], [2, 4], [4, 4], [0, 2], [0, 4], [1, 2], [1, 4], [3, 2], [3, 4], [5, 2], [5, 4]], + "axis_neuron": 12, + "type_one_side": true, + "_comment": " QM/QM interaction" + }, + { + "type": "se_e2_a", + "sel": [6, 11, 100, 6, 50, 1], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [12, 25, 50], + "exclude_types": [[0, 0], [0, 1], [0, 3], [0, 5], [1, 1], [1, 3], [1, 5], [3, 3], [3, 5], [5, 5], [2, 2], [2, 4], [4, 4]], + "axis_neuron": 12, + "set_davg_zero": true, + "type_one_side": true, + "_comment": " QM/MM interaction" + } + ] +} +``` + +::: + +:::: + {ref}`exclude_types ` can be generated by the following Python script: + ```py from itertools import combinations_with_replacement, product @@ -127,10 +181,14 @@ The DPRc model has the best practices with the [AMBER](../third-party/out-of-dee ## Pairwise DPRc +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: + If one wants to correct from a low-level method into a full DFT level, and the system is too large to do full DFT calculation, one may try the experimental pairwise DPRc model. In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue $l$: -$$ E = E_\text{QM} + \sum_{l} E_{\text{QM/MM},l} $$ +$$ E = E*\text{QM} + \sum*{l} E\_{\text{QM/MM},l} $$ In this way, the interaction between the QM region and each MM fragmentation can be computed and trained separately. Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md). @@ -142,32 +200,19 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used { "model": { "type": "pairwise_dprc", - "type_map": [ - "C", - "P", - "O", - "H", - "OW", - "HW" - ], + "type_map": ["C", "P", "O", "H", "OW", "HW"], "type_embedding": { - "neuron": [ - 8 - ], + "neuron": [8], "precision": "float32" }, "qm_model": { "descriptor": { "type": "se_atten_v2", "sel": 24, - "rcut_smth": 0.50, - "rcut": 9.00, + "rcut_smth": 0.5, + "rcut": 9.0, "attn_layer": 0, - "neuron": [ - 25, - 50, - 100 - ], + "neuron": [25, 50, 100], "resnet_dt": false, "axis_neuron": 12, "precision": "float32", @@ -175,21 +220,10 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used }, "fitting_net": { "type": "ener", - "neuron": [ - 240, - 240, - 240 - ], + "neuron": [240, 240, 240], "resnet_dt": true, "precision": "float32", - "atom_ener": [ - null, - null, - null, - null, - 0.0, - 0.0 - ], + "atom_ener": [null, null, null, null, 0.0, 0.0], "seed": 1 } }, @@ -197,92 +231,38 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used "descriptor": { "type": "se_atten_v2", "sel": 27, - "rcut_smth": 0.50, - "rcut": 6.00, + "rcut_smth": 0.5, + "rcut": 6.0, "attn_layer": 0, - "neuron": [ - 25, - 50, - 100 - ], + "neuron": [25, 50, 100], "resnet_dt": false, "axis_neuron": 12, "set_davg_zero": true, "exclude_types": [ - [ - 0, - 0 - ], - [ - 0, - 1 - ], - [ - 0, - 2 - ], - [ - 0, - 3 - ], - [ - 1, - 1 - ], - [ - 1, - 2 - ], - [ - 1, - 3 - ], - [ - 2, - 2 - ], - [ - 2, - 3 - ], - [ - 3, - 3 - ], - [ - 4, - 4 - ], - [ - 4, - 5 - ], - [ - 5, - 5 - ] + [0, 0], + [0, 1], + [0, 2], + [0, 3], + [1, 1], + [1, 2], + [1, 3], + [2, 2], + [2, 3], + [3, 3], + [4, 4], + [4, 5], + [5, 5] ], "precision": "float32", "seed": 1 }, "fitting_net": { "type": "ener", - "neuron": [ - 240, - 240, - 240 - ], + "neuron": [240, 240, 240], "resnet_dt": true, "seed": 1, "precision": "float32", - "atom_ener": [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ] + "atom_ener": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] } } } diff --git a/doc/model/index.md b/doc/model/index.md deleted file mode 100644 index 589b39b2b5..0000000000 --- a/doc/model/index.md +++ /dev/null @@ -1,20 +0,0 @@ -# Model - -- [Overall](overall.md) -- [Descriptor `"se_e2_a"`](train-se-e2-a.md) -- [Descriptor `"se_e2_r"`](train-se-e2-r.md) -- [Descriptor `"se_e3"`](train-se-e3.md) -- [Descriptor `"se_atten"`](train-se-atten.md) -- [Descriptor `"se_atten_v2"`](train-se-atten.md#descriptor-se_atten_v2) -- [Descriptor `"se_a_mask"`](train-se-a-mask.md) -- [Descriptor `"hybrid"`](train-hybrid.md) -- [Descriptor `sel`](sel.md) -- [Fit energy](train-energy.md) -- [Fit spin energy](train-energy-spin.md) -- [Fit `tensor` like `Dipole` and `Polarizability`](train-fitting-tensor.md) -- [Fit electronic density of states (DOS)](train-fitting-dos.md) -- [Train a Deep Potential model using `type embedding` approach](train-se-e2-a-tebd.md) -- [Deep potential long-range](dplr.md) -- [Deep Potential - Range Correction (DPRc)](dprc.md) -- [Linear model](linear.md) -- [Interpolation or combination with a pairwise potential](pairtab.md) diff --git a/doc/model/index.rst b/doc/model/index.rst index 1e850cac67..7b7fb082f1 100644 --- a/doc/model/index.rst +++ b/doc/model/index.rst @@ -9,6 +9,7 @@ Model train-se-e2-r train-se-e3 train-se-atten + dpa2 train-hybrid sel train-energy diff --git a/doc/model/linear.md b/doc/model/linear.md index b5e7c5c76a..3891559d90 100644 --- a/doc/model/linear.md +++ b/doc/model/linear.md @@ -1,4 +1,8 @@ -## Linear model +## Linear model {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: One can linearly combine existing models with arbitrary coefficients: diff --git a/doc/model/overall.md b/doc/model/overall.md index f8fb2fa151..102a8fc671 100644 --- a/doc/model/overall.md +++ b/doc/model/overall.md @@ -16,17 +16,20 @@ The indices of the neighboring atoms (i.e. atoms within a certain cutoff radius) Note that the Cartesian coordinates can be either under the periodic boundary condition (PBC) or in vacuum (under the open boundary condition). The network parameters are denoted by $\boldsymbol \theta = \{\boldsymbol \theta_d, \boldsymbol \theta_f\}$, where $\boldsymbol \theta_d$ and $\boldsymbol\theta_f$ yield the network parameters of the descriptor (if any) and those of the fitting network, respectively. From the above equation, one may compute the global property of the system by + ```math \boldsymbol y = \sum_{i=1}^N \boldsymbol y_i, ``` + where $N$ is the number of atoms in a frame. For example, if $y_i$ represents the potential energy contribution of atom $i$, then $y$ gives the total potential energy of the frame.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions A model has two parts, a descriptor that maps atomic configuration to a set of symmetry invariant features, and a fitting net that takes descriptor as input and predicts the atomic contribution to the target physical property. It's defined in the {ref}`model ` section of the `input.json`, for example, + ```json "model": { "type_map": ["O", "H"], @@ -38,11 +41,13 @@ A model has two parts, a descriptor that maps atomic configuration to a set of s } } ``` + The two subsections, {ref}`descriptor ` and {ref}`fitting_net `, define the descriptor and the fitting net, respectively. The {ref}`type_map ` is optional, which provides the element names (but not necessarily same as the actual name of the element) of the corresponding atom types. A water model, as in this example, has two kinds of atoms. The atom types are internally recorded as integers, e.g., `0` for oxygen and `1` for hydrogen here. A mapping from the atom type to their names is provided by {ref}`type_map `. DeePMD-kit implements the following descriptors: + 1. [`se_e2_a`](train-se-e2-a.md): DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input. 2. [`se_e2_r`](train-se-e2-r.md): DeepPot-SE constructed from radial information of atomic configurations. The embedding takes the distance between atoms as input. 3. [`se_e3`](train-se-e3.md): DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes angles between two neighboring atoms as input. @@ -51,6 +56,7 @@ DeePMD-kit implements the following descriptors: 6. [`hybrid`](train-hybrid.md): Concate a list of descriptors to form a new descriptor. The fitting of the following physical properties is supported + 1. [`ener`](train-energy.md): Fit the energy of the system. The force (derivative with atom positions) and the virial (derivative with the box tensor) can also be trained. 2. [`dipole`](train-fitting-tensor.md): The dipole moment. 3. [`polar`](train-fitting-tensor.md): The polarizability. diff --git a/doc/model/pairtab.md b/doc/model/pairtab.md index 115345796a..c8763705f7 100644 --- a/doc/model/pairtab.md +++ b/doc/model/pairtab.md @@ -1,17 +1,27 @@ -# Interpolation or combination with a pairwise potential +# Interpolation or combination with a pairwise potential {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: ## Theory + In applications like the radiation damage simulation, the interatomic distance may become too close, so that the DFT calculations fail. In such cases, the DP model that is an approximation of the DFT potential energy surface is usually replaced by an empirical potential, like the Ziegler-Biersack-Littmark (ZBL) screened nuclear repulsion potential in the radiation damage simulations. The DeePMD-kit package supports the interpolation between DP and an empirical pairwise potential + ```math E_i = (1-w_i) E_i^{\mathrm{DP}} + w_i (E_i^0 + E_i^{\mathrm{pair}}), ``` + where the $w_i$ is the interpolation weight and the $E_i^{\mathrm{pair}} $ is the atomic contribution due to the pairwise potential $u^{\mathrm{pair}}(r)$, i.e. + ```math E_i^{\mathrm{pair}} = \sum_{j\in n(i)} u^{\mathrm{pair}}(r_{ij}). ``` + The interpolation weight $w_i$ is defined by + ```math w_i = \begin{cases} @@ -20,19 +30,22 @@ The interpolation weight $w_i$ is defined by 0, & \sigma_i \geq r_b, \end{cases} ``` + where $u_i = (\sigma_i - r_a ) / (r_b - r_a)$. $E_i^0$ is the atom energy bias. In the range $[r_a, r_b]$, the DP model smoothly switched off and the pairwise potential smoothly switched on from $r_b$ to $r_a$. The $\sigma_i$ is the softmin of the distance between atom $i$ and its neighbors, + ```math \sigma_i = \dfrac {\sum\limits_{j\in n(i)} r_{ij} e^{-r_{ij} / \alpha_s}} {\sum\limits_{j\in n(i)} e^{-r_{ij} / \alpha_s}}, ``` + where the scale $\alpha_s$ is a tunable scale of the interatomic distance $r_{ij}$. The pairwise potential $u^{\textrm{pair}}(r)$ is defined by a user-defined table that provides the value of $u^{\textrm{pair}}$ on an evenly discretized grid from 0 to the cutoff distance.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). DeePMD-kit also supports combination with a pairwise potential: @@ -49,6 +62,10 @@ in the order of Type_0-Type_0, Type_0-Type_1, ..., Type_0-Type_N, Type_1-Type_1, The interaction should be smooth at the cut-off distance. +:::{note} +In instances where the interaction at the cut-off distance is not delineated within the table file, extrapolation will be conducted utilizing the available interaction data. This extrapolative procedure guarantees a smooth transition from the table-provided value to `0` whenever feasible. +::: + ## Interpolation with a short-range pairwise potential ```json diff --git a/doc/model/sel.md b/doc/model/sel.md index f4a3cf6c09..8455c242a9 100644 --- a/doc/model/sel.md +++ b/doc/model/sel.md @@ -5,9 +5,11 @@ All descriptors require to set `sel`, which means the expected maximum number of `sel` should not be too large or too small. If `sel` is too large, the computing will become much slower and cost more memory. If `sel` is not enough, the energy will be not conserved, making the accuracy of the model worse. To determine a proper `sel`, one can calculate the neighbor stat of the training data before training: + ```sh dp neighbor-stat -s data -r 6.0 -t O H ``` + where `data` is the directory of data, `6.0` is the cutoff radius, and `O` and `H` is the type map. The program will give the `max_nbor_size`. For example, `max_nbor_size` of the water example is `[38, 72]`, meaning an atom may have 38 O neighbors and 72 H neighbors in the training data. The `sel` should be set to a higher value than that of the training data, considering there may be some extreme geometries during MD simulations. As a result, we set `sel` to `[46, 92]` in the water example. diff --git a/doc/model/train-energy-spin.md b/doc/model/train-energy-spin.md index d155ec977d..3eb589590b 100644 --- a/doc/model/train-energy-spin.md +++ b/doc/model/train-energy-spin.md @@ -1,10 +1,15 @@ -# Fit spin energy +# Fit spin energy {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: In this section, we will take `$deepmd_source_dir/examples/NiO/se_e2_a/input.json` as an example of the input file. ## Spin The construction of the fitting net is give by section {ref}`spin ` + ```json "spin" : { "use_spin": [true, false], @@ -12,9 +17,10 @@ The construction of the fitting net is give by section {ref}`spin ` "spin_norm": [1.2737], }, ``` -* {ref}`use_spin ` determines whether to turn on the magnetism of the atoms.The index of this option matches option `type_map `. -* {ref}`virtual_len ` specifies the distance between virtual atom and the belonging real atom. -* {ref}`spin_norm ` gives the magnitude of the magnetic moment for each magnatic atom. + +- {ref}`use_spin ` determines whether to turn on the magnetism of the atoms.The index of this option matches option `type_map `. +- {ref}`virtual_len ` specifies the distance between virtual atom and the belonging real atom. +- {ref}`spin_norm ` gives the magnitude of the magnetic moment for each magnatic atom. ## Spin Loss @@ -29,11 +35,13 @@ The prefectors may not be a constant, rather it changes linearly with the learni $$p_{fr}(t) = p_{fr}^0 \frac{ \alpha(t) }{ \alpha(0) } + p_{fr}^\infty ( 1 - \frac{ \alpha(t) }{ \alpha(0) })$$ where $\alpha(t)$ denotes the learning rate at step $t$. $p_{fr}^0$ and $p_{fr}^\infty$ specifies the $p_f$ at the start of the training and at the limit of $t \to \infty$ (set by {ref}`start_pref_fr ` and {ref}`limit_pref_f `, respectively), i.e. + ```math pref_fr(t) = start_pref_fr * ( lr(t) / start_lr ) + limit_pref_fr * ( 1 - lr(t) / start_lr ) ``` The {ref}`loss ` section in the `input.json` is + ```json "loss" :{ "type": "ener_spin", @@ -47,6 +55,7 @@ The {ref}`loss ` section in the `input.json` is "limit_pref_v": 0, }, ``` + The options {ref}`start_pref_e `, {ref}`limit_pref_e `, {ref}`start_pref_fr `, {ref}`limit_pref_fm `, {ref}`start_pref_v ` and {ref}`limit_pref_v ` determine the start and limit prefactors of energy, atomic force, magnatic force and virial, respectively. If one does not want to train with virial, then he/she may set the virial prefactors {ref}`start_pref_v ` and {ref}`limit_pref_v ` to 0. diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md index 90e027d7a0..c1da1f4c1f 100644 --- a/doc/model/train-energy.md +++ b/doc/model/train-energy.md @@ -1,66 +1,86 @@ -# Fit energy +# Fit energy {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.json` as an example of the input file. ## Theory -In the DP model, we let the fitting network $\mathcal{F}_ 0$ maps the descriptor $\mathcal{D}^i$ to a scalar, where the subscript $0$ means that the output is a zero-order tensor (i.e. scalar). The model can then be used to predict the total potential energy of the system by +In the DP model, we let the fitting network $\mathcal{F}_ 0$ maps the descriptor $\mathcal{D}^i$ to a scalar, where the subscript $0$ means that the output is a zero-order tensor (i.e. scalar). The model can then be used to predict the total potential energy of the system by + ```math E = \sum_i E_i = \sum_i \mathcal F_0 (\mathcal D^i), ``` + where the output of the fitting network is treated as the atomic potential energy contribution, i.e. $E_i$. The output scalar can also be treated as other scalar properties defined on an atom, for example, the partial charge of atom $i$. -In some cases, atomic-specific or frame-specific parameters, such as electron temperature, may be treated as extra input to the fitting network. +In some cases, atomic-specific or frame-specific parameters, such as electron temperature, may be treated as extra input to the fitting network. We denote the atomic and frame-specific parameters by $\boldsymbol{P}^i\in \mathbb{R}^{N_p}$ (with $N_p$ being the dimension) and $\boldsymbol{Q}\in \mathbb{R}^{N_q}$ (with $N_q$ being the dimension), respectively. + ```math E_i=\mathcal{F}_0(\{\mathcal{D}^i, \boldsymbol{P}^i, \boldsymbol Q\}). ``` The atomic force $\boldsymbol{F}_ {i}$ and the virial tensor $\boldsymbol{\Xi} = (\Xi_{\alpha\beta})$ (if PBC is applied) can be derived from the potential energy $E$: + ```math F_{i,\alpha}=-\frac{\partial E}{\partial r_{i,\alpha}}, ``` + ```math \Xi_{\alpha\beta}=-\sum_{\gamma} \frac{\partial E}{\partial h_{\gamma\alpha}} h_{\gamma\beta}, ``` + where $r_{i,\alpha}$ and $F_{i,\alpha}$ denotes the $\alpha$-th component of the coordinate and force of atom $i$. $h_{\alpha\beta}$ is the $\beta$-th component of the $\alpha$-th basis vector of the simulation region. The properties $\eta$ of the energy loss function could be energy $E$, force $\boldsymbol{F}$, virial $\boldsymbol{\Xi}$, relative energy $\Delta E$, or any combination among them, and the loss functions of them are + ```math L_E(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}(E(\boldsymbol{x};\boldsymbol{\theta})-E^*)^2, ``` + ```math L_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_{\alpha=1}^3(F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2, ``` + ```math L_\Xi(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{9N}\sum_{\alpha,\beta=1}^{3}(\Xi_{\alpha\beta}(\boldsymbol{x};\boldsymbol{\theta})-\Xi_{\alpha\beta}^*)^2, ``` + ```math L_{\Delta E}(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}({\Delta E}(\boldsymbol{x};\boldsymbol{\theta})-{\Delta E}^*)^2, ``` + where $F_{k,\alpha}$ is the $\alpha$-th component of the force on atom $k$, and the superscript $\ast$ indicates the label of the property that should be provided in advance. Using $N$ ensures that each loss of fitting property is averaged over atomic contributions before they contribute to the total loss by weight. If part of atoms is more important than others, for example, certain atoms play an essential role when calculating free energy profiles or kinetic isotope effects, the MSE of atomic forces with prefactors $q_{k}$ can also be used as the loss function: + ```math L_F^p(\mathbf{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N} \sum_{\alpha} q_{k} (F_{k,\alpha}(\mathbf{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2. ``` + The atomic forces with larger prefactors will be fitted more accurately than those in other atoms. If some forces are quite large, for example, forces can be greater than 60 eV/Å in high-temperature reactive simulations, one may also prefer the force loss is relative to the magnitude: + ```math L^r_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_\alpha \left(\frac{F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*}{\lvert\boldsymbol{F}^\ast_k\lvert + \nu}\right)^2. ``` + where $\nu$ is a small constant used to protect an atom where the magnitude of $\boldsymbol{F}^\ast_k$ is small from having a large $L^r_F$. Benefiting from the relative force loss, small forces can be fitted more accurately.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## The fitting network The construction of the fitting net is given by section {ref}`fitting_net ` + ```json "fitting_net" : { "neuron": [240, 240, 240], @@ -68,9 +88,10 @@ The construction of the fitting net is given by section {ref}`fitting_net ` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. -* If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. -* {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. + +- {ref}`neuron ` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. +- {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. ## Loss @@ -83,11 +104,13 @@ where $L_e$, $L_f$, and $L_v$ denote the loss in energy, forces and virials, res $$p_f(t) = p_f^0 \frac{ \alpha(t) }{ \alpha(0) } + p_f^\infty ( 1 - \frac{ \alpha(t) }{ \alpha(0) })$$ where $\alpha(t)$ denotes the learning rate at step $t$. $p_f^0$ and $p_f^\infty$ specifies the $p_f$ at the start of the training and the limit of $t \to \infty$ (set by {ref}`start_pref_f ` and {ref}`limit_pref_f `, respectively), i.e. + ```math pref_f(t) = start_pref_f * ( lr(t) / start_lr ) + limit_pref_f * ( 1 - lr(t) / start_lr ) ``` The {ref}`loss ` section in the `input.json` is + ```json "loss" : { "start_pref_e": 0.02, @@ -98,6 +121,7 @@ The {ref}`loss ` section in the `input.json` is "limit_pref_v": 0 } ``` + The options {ref}`start_pref_e `, {ref}`limit_pref_e `, {ref}`start_pref_f `, {ref}`limit_pref_f `, {ref}`start_pref_v ` and {ref}`limit_pref_v ` determine the start and limit prefactors of energy, force and virial, respectively. If one does not want to train with virial, then he/she may set the virial prefactors {ref}`start_pref_v ` and {ref}`limit_pref_v ` to 0. diff --git a/doc/model/train-fitting-dos.md b/doc/model/train-fitting-dos.md index bbe5b50690..7b68525a45 100644 --- a/doc/model/train-fitting-dos.md +++ b/doc/model/train-fitting-dos.md @@ -1,4 +1,8 @@ -# Fit electronic density of states (DOS) +# Fit electronic density of states (DOS) {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: Here we present an API to DeepDOS model, which can be used to fit electronic density of state (DOS) (which is a vector). @@ -32,9 +36,9 @@ The JSON of `dos` type should be provided like }, ``` -- `type` specifies which type of fitting net should be used. It should be `dos`. -- `numb_dos` specifies the length of output vector (density of states), which the same as the `NEDOS` set in VASP software, this argument defines the output length of the neural network. We note that the length of `dos` provided in training set should be the same. -- The rest arguments have the same meaning as they do in `ener` mode. +- `type` specifies which type of fitting net should be used. It should be `dos`. +- `numb_dos` specifies the length of output vector (density of states), which the same as the `NEDOS` set in VASP software, this argument defines the output length of the neural network. We note that the length of `dos` provided in training set should be the same. +- The rest arguments have the same meaning as they do in `ener` mode. ## Loss @@ -62,13 +66,12 @@ The loss section should be provided like }, ``` -- {ref}`type ` should be written as `dos` as a distinction from `ener` mode. -- `pref_dos` and `pref_ados`, respectively specify the weight of global and atomic loss. If set to 0, the corresponding label will not be included in the training process. -- We also provides a combination training of vector and its cumulative distribution function `cdf`, which can be defined as +- {ref}`type ` should be written as `dos` as a distinction from `ener` mode. +- `pref_dos` and `pref_ados`, respectively specify the weight of global and atomic loss. If set to 0, the corresponding label will not be included in the training process. +- We also provides a combination training of vector and its cumulative distribution function `cdf`, which can be defined as $$D(\epsilon) = \int_{e_{min}}^{\epsilon} g(\epsilon')d\epsilon'$$ - ## Training Data Preparation The global label should be named `dos.npy/raw`, while the atomic label should be named `atomic_dos.npy/raw`. If wrongly named, DP will report an error. diff --git a/doc/model/train-fitting-tensor.md b/doc/model/train-fitting-tensor.md index 90370adfcf..4d5cb22707 100644 --- a/doc/model/train-fitting-tensor.md +++ b/doc/model/train-fitting-tensor.md @@ -1,157 +1,243 @@ -# Fit `tensor` like `Dipole` and `Polarizability` - -Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in - -```bash -$deepmd_source_dir/examples/water_tensor/dipole/dipole_input.json -$deepmd_source_dir/examples/water_tensor/polar/polar_input.json -``` - -The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.** - -Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model `, {ref}`learning_rate `, {ref}`loss ` and {ref}`training `. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net ` and {ref}`loss `. - -## Theory - -To represent the first-order tensorial properties (i.e. vector properties), we let the fitting network, denoted by $\mathcal F_{1}$, output an $M$-dimensional vector; then we have the representation, - -```math -(T_i^{(1)})_\alpha = -\frac{1}{N_c} -\sum_{j=1}^{N_c}\sum_{m=1}^M (\mathcal G^i)_{jm} (\mathcal R^i)_{j,\alpha+1} -(\mathcal F_{1}(\mathcal D^i))_m, \ \alpha=1,2,3. -``` -We let the fitting network $\mathcal F_{2}$ output an $M$-dimensional vector, and the second-order tensorial properties (matrix properties) are formulated as -```math -(T_i^{(2)})_{\alpha\beta} = -\frac{1}{N_c^2} -\sum_{j=1}^{N_c}\sum_{k=1}^{N_c}\sum_{m=1}^M -(\mathcal G^i)_{jm} -(\mathcal R^i)_{j,\alpha+1} -(\mathcal R^i)_{k,\beta+1} -(\mathcal G^i)_{km} -(\mathcal F_{2}(\mathcal D^i))_m, -\ \alpha,\beta=1,2,3, -``` - -where $\mathcal{G}^i$ and $\mathcal{R}^i$ can be found in [`se_e2_a`](./train-se-e2-a.md). -Thus, the tensor fitting network requires the descriptor to have the same or similar form as the DeepPot-SE descriptor. -$\mathcal{F}_1$ and $\mathcal F_2$ are the neural network functions. -The total tensor $\boldsymbol{T}$ (total dipole $\boldsymbol{T}^{(1)}$ or total polarizability $\boldsymbol{T}^{(2)}$) is the sum of the atomic tensor: -```math - \boldsymbol{T} = \sum_i \boldsymbol{T}_i. -``` -The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1] - -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). - -## The fitting Network - -The {ref}`fitting_net ` section tells DP which fitting net to use. - -The JSON of `dipole` type should be provided like - -```json - "fitting_net" : { - "type": "dipole", - "sel_type": [0], - "neuron": [100,100,100], - "resnet_dt": true, - "seed": 1, - }, -``` - -The JSON of `polar` type should be provided like - -```json - "fitting_net" : { - "type": "polar", - "sel_type": [0], - "neuron": [100,100,100], - "resnet_dt": true, - "seed": 1, - }, -``` - -- `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md). -- `sel_type` is a list specifying which type of atoms have the quantity you want to fit. For example, in the water system, `sel_type` is `[0]` since `0` represents atom `O`. If left unset, all types of atoms will be fitted. -- The rest arguments have the same meaning as they do in `ener` mode. - -## Loss - -DP supports a combinational training of the global system (only a global `tensor` label, i.e. dipole or polar, is provided in a frame) and atomic system (labels for **each** atom included in `sel_type` are provided). In a global system, each frame has just **one** `tensor` label. For example, when fitting `polar`, each frame will just provide a `1 x 9` vector which gives the elements of the polarizability tensor of that frame in order XX, XY, XZ, YX, YY, YZ, XZ, ZY, ZZ. By contrast, in an atomic system, each atom in `sel_type` has a `tensor` label. For example, when fitting a dipole, each frame will provide a `#sel_atom x 3` matrices, where `#sel_atom` is the number of atoms whose type are in `sel_type`. - -The {ref}`loss ` section tells DP the weight of these two kinds of loss, i.e. - -```python -loss = pref * global_loss + pref_atomic * atomic_loss -``` - -The loss section should be provided like - -```json - "loss" : { - "type": "tensor", - "pref": 1.0, - "pref_atomic": 1.0 - }, -``` - -- {ref}`type ` should be written as `tensor` as a distinction from `ener` mode. -- {ref}`pref ` and {ref}`pref_atomic ` respectively specify the weight of global loss and atomic loss. It can not be left unset. If set to 0, the corresponding label will NOT be included in the training process. - -## Training Data Preparation - -In tensor mode, the identification of the label's type (global or atomic) is derived from the file name. The global label should be named `dipole.npy/raw` or `polarizability.npy/raw`, while the atomic label should be named `atomic_dipole.npy/raw` or `atomic_polarizability.npy/raw`. If wrongly named, DP will report an error - -```bash -ValueError: cannot reshape array of size xxx into shape (xx,xx). This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`. -``` - -In this case, please check the file name of the label. - -## Train the Model - -The training command is the same as `ener` mode, i.e. - -```bash -dp train input.json -``` - -The detailed loss can be found in `lcurve.out`: - -``` -# step rmse_val rmse_trn rmse_lc_val rmse_lc_trn rmse_gl_val rmse_gl_trn lr - 0 8.34e+00 8.26e+00 8.34e+00 8.26e+00 0.00e+00 0.00e+00 1.0e-02 - 100 3.51e-02 8.55e-02 0.00e+00 8.55e-02 4.38e-03 0.00e+00 5.0e-03 - 200 4.77e-02 5.61e-02 0.00e+00 5.61e-02 5.96e-03 0.00e+00 2.5e-03 - 300 5.68e-02 1.47e-02 0.00e+00 0.00e+00 7.10e-03 1.84e-03 1.3e-03 - 400 3.73e-02 3.48e-02 1.99e-02 0.00e+00 2.18e-03 4.35e-03 6.3e-04 - 500 2.77e-02 5.82e-02 1.08e-02 5.82e-02 2.11e-03 0.00e+00 3.2e-04 - 600 2.81e-02 5.43e-02 2.01e-02 0.00e+00 1.01e-03 6.79e-03 1.6e-04 - 700 2.97e-02 3.28e-02 2.03e-02 0.00e+00 1.17e-03 4.10e-03 7.9e-05 - 800 2.25e-02 6.19e-02 9.05e-03 0.00e+00 1.68e-03 7.74e-03 4.0e-05 - 900 3.18e-02 5.54e-02 9.93e-03 5.54e-02 2.74e-03 0.00e+00 2.0e-05 - 1000 2.63e-02 5.02e-02 1.02e-02 5.02e-02 2.01e-03 0.00e+00 1.0e-05 - 1100 3.27e-02 5.89e-02 2.13e-02 5.89e-02 1.43e-03 0.00e+00 5.0e-06 - 1200 2.85e-02 2.42e-02 2.85e-02 0.00e+00 0.00e+00 3.02e-03 2.5e-06 - 1300 3.47e-02 5.71e-02 1.07e-02 5.71e-02 3.00e-03 0.00e+00 1.3e-06 - 1400 3.13e-02 5.76e-02 3.13e-02 5.76e-02 0.00e+00 0.00e+00 6.3e-07 - 1500 3.34e-02 1.11e-02 2.09e-02 0.00e+00 1.57e-03 1.39e-03 3.2e-07 - 1600 3.11e-02 5.64e-02 3.11e-02 5.64e-02 0.00e+00 0.00e+00 1.6e-07 - 1700 2.97e-02 5.05e-02 2.97e-02 5.05e-02 0.00e+00 0.00e+00 7.9e-08 - 1800 2.64e-02 7.70e-02 1.09e-02 0.00e+00 1.94e-03 9.62e-03 4.0e-08 - 1900 3.28e-02 2.56e-02 3.28e-02 0.00e+00 0.00e+00 3.20e-03 2.0e-08 - 2000 2.59e-02 5.71e-02 1.03e-02 5.71e-02 1.94e-03 0.00e+00 1.0e-08 -``` - -One may notice that in each step, some of the local loss and global loss will be `0.0`. This is because our training data and validation data consist of the global system and atomic system, i.e. -``` - --training_data - >atomic_system - >global_system - --validation_data - >atomic_system - >global_system -``` -During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros. +# Fit `tensor` like `Dipole` and `Polarizability` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} +::: + +Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +```bash +$deepmd_source_dir/examples/water_tensor/dipole/dipole_input.json +$deepmd_source_dir/examples/water_tensor/polar/polar_input.json +``` + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```bash +$deepmd_source_dir/examples/water_tensor/dipole/dipole_input_torch.json +$deepmd_source_dir/examples/water_tensor/polar/polar_input_torch.json +``` + +::: + +:::: + +The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.** + +Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model `, {ref}`learning_rate `, {ref}`loss ` and {ref}`training `. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net ` and {ref}`loss `. + +## Theory + +To represent the first-order tensorial properties (i.e. vector properties), we let the fitting network, denoted by $\mathcal F_{1}$, output an $M$-dimensional vector; then we have the representation, + +```math +(T_i^{(1)})_\alpha = +\frac{1}{N_c} +\sum_{j=1}^{N_c}\sum_{m=1}^M (\mathcal G^i)_{jm} (\mathcal R^i)_{j,\alpha+1} +(\mathcal F_{1}(\mathcal D^i))_m, \ \alpha=1,2,3. +``` + +We let the fitting network $\mathcal F_{2}$ output an $M$-dimensional vector, and the second-order tensorial properties (matrix properties) are formulated as + +```math +(T_i^{(2)})_{\alpha\beta} = +\frac{1}{N_c^2} +\sum_{j=1}^{N_c}\sum_{k=1}^{N_c}\sum_{m=1}^M +(\mathcal G^i)_{jm} +(\mathcal R^i)_{j,\alpha+1} +(\mathcal R^i)_{k,\beta+1} +(\mathcal G^i)_{km} +(\mathcal F_{2}(\mathcal D^i))_m, +\ \alpha,\beta=1,2,3, +``` + +where $\mathcal{G}^i$ and $\mathcal{R}^i$ can be found in [`se_e2_a`](./train-se-e2-a.md). +Thus, the tensor fitting network requires the descriptor to have the same or similar form as the DeepPot-SE descriptor. +$\mathcal{F}_1$ and $\mathcal F_2$ are the neural network functions. +The total tensor $\boldsymbol{T}$ (total dipole $\boldsymbol{T}^{(1)}$ or total polarizability $\boldsymbol{T}^{(2)}$) is the sum of the atomic tensor: + +```math + \boldsymbol{T} = \sum_i \boldsymbol{T}_i. +``` + +The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1] + +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). + +## The fitting Network + +The {ref}`fitting_net ` section tells DP which fitting net to use. + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +The JSON of `dipole` type should be provided like + +```json + "fitting_net" : { + "type": "dipole", + "sel_type": [0], + "neuron": [100,100,100], + "resnet_dt": true, + "seed": 1, + }, +``` + +The JSON of `polar` type should be provided like + +```json + "fitting_net" : { + "type": "polar", + "sel_type": [0], + "neuron": [100,100,100], + "resnet_dt": true, + "seed": 1, + }, +``` + +- `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md). +- `sel_type` is a list specifying which type of atoms have the quantity you want to fit. For example, in the water system, `sel_type` is `[0]` since `0` represents atom `O`. If left unset, all types of atoms will be fitted. +- The rest arguments have the same meaning as they do in `ener` mode. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +The JSON of `dipole` type should be provided like + +```json + "atom_exclude_types": [ + 1 + ], + "fitting_net" : { + "type": "dipole", + "neuron": [100,100,100], + "resnet_dt": true, + "seed": 1, + }, +``` + +The JSON of `polar` type should be provided like + +```json + "atom_exclude_types": [ + 1 + ], + "fitting_net" : { + "type": "polar", + "neuron": [100,100,100], + "resnet_dt": true, + "seed": 1, + }, +``` + +- `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md). +- `atom_exclude_types` is a list specifying the which type of atoms have the quantity you want to set to zero. For example, in the water system, `atom_exclude_types` is `[1]` since `1` represents atom `H`. +- The rest arguments have the same meaning as they do in `ener` mode. + ::: + +:::: + +## Loss + +DP supports a combinational training of the global system (only a global `tensor` label, i.e. dipole or polar, is provided in a frame) and atomic system (labels for **each** atom included in `sel_type`/ not included in `atom_exclude_types` are provided). In a global system, each frame has just **one** `tensor` label. For example, when fitting `polar`, each frame will just provide a `1 x 9` vector which gives the elements of the polarizability tensor of that frame in order XX, XY, XZ, YX, YY, YZ, XZ, ZY, ZZ. By contrast, in an atomic system, each atom in `sel_type` has a `tensor` label. For example, when fitting a dipole, each frame will provide a `#sel_atom x 3` matrices, where `#sel_atom` is the number of atoms whose type are in `sel_type`. + +The {ref}`loss ` section tells DP the weight of these two kinds of loss, i.e. + +```python +loss = pref * global_loss + pref_atomic * atomic_loss +``` + +The loss section should be provided like + +```json + "loss" : { + "type": "tensor", + "pref": 1.0, + "pref_atomic": 1.0 + }, +``` + +- {ref}`type ` should be written as `tensor` as a distinction from `ener` mode. +- {ref}`pref ` and {ref}`pref_atomic ` respectively specify the weight of global loss and atomic loss. It can not be left unset. If set to 0, the corresponding label will NOT be included in the training process. + +## Training Data Preparation + +In tensor mode, the identification of the label's type (global or atomic) is derived from the file name. The global label should be named `dipole.npy/raw` or `polarizability.npy/raw`, while the atomic label should be named `atomic_dipole.npy/raw` or `atomic_polarizability.npy/raw`. If wrongly named, DP will report an error + +```bash +ValueError: cannot reshape array of size xxx into shape (xx,xx). This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`. +``` + +In this case, please check the file name of the label. + +## Train the Model + +The training command is the same as `ener` mode, i.e. + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +```bash +dp train input.json +``` + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```bash +dp --pt train input.json +``` + +::: + +:::: + +The detailed loss can be found in `lcurve.out`: + +``` +# step rmse_val rmse_trn rmse_lc_val rmse_lc_trn rmse_gl_val rmse_gl_trn lr + 0 8.34e+00 8.26e+00 8.34e+00 8.26e+00 0.00e+00 0.00e+00 1.0e-02 + 100 3.51e-02 8.55e-02 0.00e+00 8.55e-02 4.38e-03 0.00e+00 5.0e-03 + 200 4.77e-02 5.61e-02 0.00e+00 5.61e-02 5.96e-03 0.00e+00 2.5e-03 + 300 5.68e-02 1.47e-02 0.00e+00 0.00e+00 7.10e-03 1.84e-03 1.3e-03 + 400 3.73e-02 3.48e-02 1.99e-02 0.00e+00 2.18e-03 4.35e-03 6.3e-04 + 500 2.77e-02 5.82e-02 1.08e-02 5.82e-02 2.11e-03 0.00e+00 3.2e-04 + 600 2.81e-02 5.43e-02 2.01e-02 0.00e+00 1.01e-03 6.79e-03 1.6e-04 + 700 2.97e-02 3.28e-02 2.03e-02 0.00e+00 1.17e-03 4.10e-03 7.9e-05 + 800 2.25e-02 6.19e-02 9.05e-03 0.00e+00 1.68e-03 7.74e-03 4.0e-05 + 900 3.18e-02 5.54e-02 9.93e-03 5.54e-02 2.74e-03 0.00e+00 2.0e-05 + 1000 2.63e-02 5.02e-02 1.02e-02 5.02e-02 2.01e-03 0.00e+00 1.0e-05 + 1100 3.27e-02 5.89e-02 2.13e-02 5.89e-02 1.43e-03 0.00e+00 5.0e-06 + 1200 2.85e-02 2.42e-02 2.85e-02 0.00e+00 0.00e+00 3.02e-03 2.5e-06 + 1300 3.47e-02 5.71e-02 1.07e-02 5.71e-02 3.00e-03 0.00e+00 1.3e-06 + 1400 3.13e-02 5.76e-02 3.13e-02 5.76e-02 0.00e+00 0.00e+00 6.3e-07 + 1500 3.34e-02 1.11e-02 2.09e-02 0.00e+00 1.57e-03 1.39e-03 3.2e-07 + 1600 3.11e-02 5.64e-02 3.11e-02 5.64e-02 0.00e+00 0.00e+00 1.6e-07 + 1700 2.97e-02 5.05e-02 2.97e-02 5.05e-02 0.00e+00 0.00e+00 7.9e-08 + 1800 2.64e-02 7.70e-02 1.09e-02 0.00e+00 1.94e-03 9.62e-03 4.0e-08 + 1900 3.28e-02 2.56e-02 3.28e-02 0.00e+00 0.00e+00 3.20e-03 2.0e-08 + 2000 2.59e-02 5.71e-02 1.03e-02 5.71e-02 1.94e-03 0.00e+00 1.0e-08 +``` + +One may notice that in each step, some of the local loss and global loss will be `0.0`. This is because our training data and validation data consist of the global system and atomic system, i.e. + +``` + --training_data + >atomic_system + >global_system + --validation_data + >atomic_system + >global_system +``` + +During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros. diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md index 58b66f25e0..c0a55d9eb5 100644 --- a/doc/model/train-hybrid.md +++ b/doc/model/train-hybrid.md @@ -1,10 +1,15 @@ -# Descriptor `"hybrid"` +# Descriptor `"hybrid"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: This descriptor hybridizes multiple descriptors to form a new descriptor. For example, we have a list of descriptors denoted by $\mathcal D_1$, $\mathcal D_2$, ..., $\mathcal D_N$, the hybrid descriptor this the concatenation of the list, i.e. $\mathcal D = (\mathcal D_1, \mathcal D_2, \cdots, \mathcal D_N)$. ## Theory A hybrid descriptor $\mathcal{D}^i_\text{hyb}$ concatenates multiple kinds of descriptors into one descriptor: + ```math \mathcal{D}^{i}_\text{hyb} = \{ \begin{array}{cccc} @@ -12,14 +17,16 @@ A hybrid descriptor $\mathcal{D}^i_\text{hyb}$ concatenates multiple kinds of de \end{array} \}. ``` + The list of descriptors can be different types or the same descriptors with different parameters. This way, one can set the different cutoff radii for different descriptors.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions To use the descriptor in DeePMD-kit, one firstly set the {ref}`type ` to {ref}`hybrid `, then provide the definitions of the descriptors by the items in the `list`, + ```json "descriptor" :{ "type": "hybrid", @@ -37,6 +44,7 @@ To use the descriptor in DeePMD-kit, one firstly set the {ref}`type `. An example of the descriptor is provided as follows + ```json "descriptor" :{ "type": "se_a_mask", @@ -35,15 +42,17 @@ The construction of the descriptor is given by section {ref}`descriptor ` of the descriptor is set to `"se_a_mask"`. -* {ref}`sel ` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`. -* The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. -* The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) -* If the option {ref}`type_one_side ` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters. -* If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. -* {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. + +- The {ref}`type ` of the descriptor is set to `"se_a_mask"`. +- {ref}`sel ` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`. +- The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) +- If the option {ref}`type_one_side ` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters. +- If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. +- {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitting_net` section are needed. + ```json "fitting_net" :{ "neuron": [240, 240, 240], @@ -53,14 +62,16 @@ To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitt "use_aparam_as_mask": true } ``` -* `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net ` section for fitting energy. -* {ref}`numb_aparam ` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0. -* {ref}`use_aparam_as_mask ` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`. + +- `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net ` section for fitting energy. +- {ref}`numb_aparam ` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0. +- {ref}`use_aparam_as_mask ` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`. Finally, to make a reasonable fitting task with `se_a_mask` descriptor for DP/MM simulations, the loss function with `se_a_mask` is designed to include the atomic forces difference in specific atoms of the input particles only. More details about the selection of the specific atoms can be found in paper [DP/MM](left to be filled). Thus, `atom_pref.npy` ( [ nframes * natoms ] ) is required as the indicator of the specific atoms in the input particles. And the `loss` section in the training input script should be set as follows. + ```json "loss": { "type": "ener", diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md index 7480ddbc12..364d35805b 100644 --- a/doc/model/train-se-atten.md +++ b/doc/model/train-se-atten.md @@ -1,4 +1,8 @@ -# Descriptor `"se_atten"` +# Descriptor `"se_atten"` {{ tensorflow_icon }} {{ pytorch_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }} +::: ## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation @@ -15,43 +19,53 @@ Attention-based descriptor $\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}$, whic ```math \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<, ``` + where $\hat{\mathcal{G}}^i$ represents the embedding matrix $\mathcal{G}^i$ after additional self-attention mechanism and $\mathcal{R}^i$ is defined by the full case in the [`se_e2_a`](./train-se-e2-a.md). Note that we obtain $\mathcal{G}^i$ using the type embedding method by default in this descriptor. To perform the self-attention mechanism, the queries $\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, keys $\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, and values $\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}$ are first obtained: + ```math \left(\mathcal{Q}^{i,l}\right)_{j}=Q_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), ``` + ```math \left(\mathcal{K}^{i,l}\right)_{j}=K_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), ``` + ```math \left(\mathcal{V}^{i,l}\right)_{j}=V_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), ``` + where $Q_{l}$, $K_{l}$, $V_{l}$ represent three trainable linear transformations that output the queries and keys of dimension $d_k$ and values of dimension $d_v$, and $l$ is the index of the attention layer. -The input embedding matrix to the attention layers, denoted by $\mathcal{G}^{i,0}$, is chosen as the two-body embedding matrix. +The input embedding matrix to the attention layers, denoted by $\mathcal{G}^{i,0}$, is chosen as the two-body embedding matrix. Then the scaled dot-product attention method is adopted: + ```math A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})=\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right)\mathcal{V}^{i,l}, ``` + where $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) \in \mathbb{R}^{N_c\times N_c}$ is attention weights. In the original attention method, one typically has $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}\right)=\mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right)$, with $\sqrt{d_{k}}$ being the normalization temperature. This is slightly modified to incorporate the angular information: + ```math \varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) = \mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right) \odot \hat{\mathcal{R}}^{i}(\hat{\mathcal{R}}^{i})^{T}, ``` + where $\hat{\mathcal{R}}^{i} \in \mathbb{R}^{N_c\times 3}$ denotes normalized relative coordinates , $\hat{\mathcal{R}}^{i}_{j} = \frac{\boldsymbol{r}_{ij}}{\lVert \boldsymbol{r}_{ij} \lVert}$ and $\odot$ means element-wise multiplication. Then layer normalization is added in a residual way to finally obtain the self-attention local embedding matrix $\hat{\mathcal{G}}^{i} = \mathcal{G}^{i,L_a}$ after $L_a$ attention layers:[^1] + ```math \mathcal{G}^{i,l} = \mathcal{G}^{i,l-1} + \mathrm{LayerNorm}(A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})). ``` -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). - +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Introduction to new features of DPA-1 + Next, we will list the detailed settings in input.json and the data format, especially for large systems with dozens of elements. An example of DPA-1 input can be found [here](../../examples/water/se_atten/input.json). ### Descriptor `"se_atten"` @@ -59,13 +73,20 @@ Next, we will list the detailed settings in input.json and the data format, espe The notation of `se_atten` is short for the smooth edition of Deep Potential with an attention mechanism. This descriptor was described in detail in [the DPA-1 paper](https://arxiv.org/abs/2208.08236) and the images above. -In this example, we will train a DPA-1 model for a water system. A complete training input script of this example can be found in the directory: +In this example, we will train a DPA-1 model for a water system. A complete training input script of this example can be found in the directory: + ```bash $deepmd_source_dir/examples/water/se_atten/input.json ``` + With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory. An example of the DPA-1 descriptor is provided as follows + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + ```json "descriptor" :{ "type": "se_atten", @@ -82,33 +103,76 @@ An example of the DPA-1 descriptor is provided as follows "seed": 1 } ``` -* The {ref}`type ` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures. -* {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. -* **{ref}`sel `** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md)) -* The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. -* The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) -* If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. -* {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. -* {ref}`attn ` sets the length of a hidden vector during scale-dot attention computation. -* {ref}`attn_layer ` sets the number of layers in attention mechanism. -* {ref}`attn_mask ` determines whether to mask the diagonal in the attention weights and False is recommended. -* {ref}`attn_dotr ` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended. + +- The {ref}`type ` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures. +- {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. +- **{ref}`sel `** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md)) +- The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) +- If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. +- {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. +- {ref}`attn ` sets the length of a hidden vector during scale-dot attention computation. +- {ref}`attn_layer ` sets the number of layers in attention mechanism. +- {ref}`attn_mask ` determines whether to mask the diagonal in the attention weights and False is recommended. +- {ref}`attn_dotr ` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended. + +::: + +:::{tab-item} PyTorch {{ pytorch_icon }} + +```json + "descriptor" :{ + "type": "dpa1", + "rcut_smth": 0.50, + "rcut": 6.00, + "sel": 120, + "neuron": [25, 50, 100], + "tebd_dim": 8, + "axis_neuron": 16, + "attn": 128, + "attn_layer": 2, + "attn_mask": false, + "attn_dotr": true, + "post_ln": true + } +``` + +- The {ref}`type ` of the descriptor is set to `"dpa1"`, which will use DPA-1 structures. +- {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. +- **{ref}`sel `** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md)) +- The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- The {ref}`tebd_dim ` specifies the dimension of the type embedding. +- The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) +- {ref}`attn ` sets the length of a hidden vector during scale-dot attention computation. +- {ref}`attn_layer ` sets the number of layers in attention mechanism. +- {ref}`attn_mask ` determines whether to mask the diagonal in the attention weights and False is recommended. +- {ref}`attn_dotr ` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended. +- {ref}`post_ln ` determines whether to perform post layer norm. + +::: + +:::: ### Descriptor `"se_atten_v2"` + We highly recommend using the version 2.0 of the attention-based descriptor `"se_atten_v2"`, which is inherited from `"se_atten"` but with the following parameter modifications: + ```json "stripped_type_embedding": true, "smooth_type_embdding": true, "set_davg_zero": false ``` -Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`. +Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`. ### Fitting `"ener"` + DPA-1 only supports `"ener"` fitting type, and you can refer [here](train-energy.md) for detailed information. ### Type embedding + DPA-1 only supports models with type embeddings. And the default setting is as follows: + ```json "type_embedding":{ "neuron": [8], @@ -116,11 +180,13 @@ DPA-1 only supports models with type embeddings. And the default setting is as f "seed": 1 } ``` -You can add these settings in input.json if you want to change the default ones, see [here](train-se-e2-a-tebd.md) for detailed information. +You can add these settings in input.json if you want to change the default ones, see [here](train-se-e2-a-tebd.md) for detailed information. ### Type map + For training large systems, especially those with dozens of elements, the {ref}`type ` determines the element index of training data: + ```json "type_map": [ "Mg", @@ -128,8 +194,11 @@ For training large systems, especially those with dozens of elements, the {ref}` "Cu" ] ``` + which should include all the elements in the dataset you want to train on. + ## Data format + DPA-1 supports the standard data format, which is detailed in [data-conv.md](../data/data-conv.md) and [system.md](../data/system.md). Note that in this format, only those frames with the same fingerprint (i.e. the number of atoms of different elements) can be put together as a unified system. This may lead to sparse frame numbers in those rare systems. @@ -137,6 +206,7 @@ This may lead to sparse frame numbers in those rare systems. An ideal way is to put systems with the same total number of atoms together, which is the way we trained DPA-1 on [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md). This system format, which is called `mixed_type`, is proper to put frame-sparse systems together and is slightly different from the standard one. Take an example, a `mixed_type` may contain the following files: + ``` type.raw type_map.raw @@ -146,13 +216,14 @@ set.*/energy.npy set.*/force.npy set.*/real_atom_types.npy ``` + This system contains `Nframes` frames with the same atom number `Natoms`, the total number of element types contained in all frames is `Ntypes`. Most files are the same as those in [standard formats](../data/system.md), here we only list the distinct ones: -ID | Property | File | Required/Optional | Shape | Description ----------- | -------------------------------- | ------------------- | -------------------- | ----------------------- | ----------- -/ | Atom type indexes (place holder) | type.raw | Required | Natoms | All zeros to fake the type input -type_map | Atom type names | type_map.raw | Required | Ntypes | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table -type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms. +| ID | Property | File | Required/Optional | Shape | Description | +| -------- | -------------------------------- | ------------------- | ----------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ | +| / | Atom type indexes (place holder) | type.raw | Required | Natoms | All zeros to fake the type input | +| type_map | Atom type names | type_map.raw | Required | Ntypes | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table | +| type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms. | With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor. @@ -161,6 +232,7 @@ To put frames with different `Natoms` into the same system, one can pad systems The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience. ## Training example + Here we upload the AlMgCu example shown in the paper, you can download it here: [Baidu disk](https://pan.baidu.com/s/1Mk9CihPHCmf8quwaMhT-nA?pwd=d586); [Google disk](https://drive.google.com/file/d/11baEpRrvHoqxORFPSdJiGWusb3Y4AnRE/view?usp=sharing). diff --git a/doc/model/train-se-e2-a-tebd.md b/doc/model/train-se-e2-a-tebd.md index cb6ce6674f..a6291bb238 100644 --- a/doc/model/train-se-e2-a-tebd.md +++ b/doc/model/train-se-e2-a-tebd.md @@ -1,4 +1,8 @@ -# Type embedding approach +# Type embedding approach {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: We generate specific a type embedding vector for each atom type so that we can share one descriptor embedding net and one fitting net in total, which decline training complexity largely. @@ -12,6 +16,7 @@ Usually, when the type embedding approach is not enabled, for a system with mult (\mathcal{G}^i)_j = \mathcal{N}^{\alpha_i, \alpha_j}_{e,2}(s(r_{ij})) \quad \mathrm{or}\quad (\mathcal{G}^i)_j = \mathcal{N}^{ \alpha_j}_{e,2}(s(r_{ij})), ``` + ```math (\mathcal{G}^i)_{jk} =\mathcal{N}^{\alpha_j, \alpha_k}_{e,3}((\theta_i)_{jk}). ``` @@ -24,6 +29,7 @@ The limitation of this approach is that when there are large numbers of chemical Similar to the embedding networks, if the type embedding approach is not used, the fitting network parameters are chemical-species-wise, and there are $N_t$ sets of fitting network parameters. For performance, atoms are sorted by their chemical species $\alpha_i$ in advance. Take an example, the atomic energy $E_i$ is represented as follows: + ```math E_i=\mathcal{F}_0^{\alpha_i}(\mathcal{D}^i). ``` @@ -42,21 +48,25 @@ The type embeddings of central and neighboring atoms $\mathcal{A}^i$ and $\mathc (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^i, \mathcal{A}^j\}) \quad \mathrm{or}\quad (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^j\}) , ``` + ```math (\mathcal{G}^i)_{jk} =\mathcal{N}_{e,3}(\{(\theta_i)_{jk}, \mathcal{A}^j, \mathcal{A}^k\}). ``` In fitting networks, the type embedding is inserted into the input of the fitting networks: + ```math E_i=\mathcal{F}_0(\{\mathcal{D}^i, \mathcal{A}^i\}). ``` In this way, all chemical species share the same network parameters through the type embedding.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions + The {ref}`model ` defines how the model is constructed, adding a section of type embedding net: + ```json "model": { "type_map": ["O", "H"], @@ -71,9 +81,11 @@ The {ref}`model ` defines how the model is constructed, adding a section } } ``` + The model will automatically apply the type embedding approach and generate type embedding vectors. If the type embedding vector is detected, the descriptor and fitting net would take it as a part of the input. The construction of type embedding net is given by {ref}`type_embedding `. An example of {ref}`type_embedding ` is provided as follows + ```json "type_embedding":{ "neuron": [2, 4, 8], @@ -81,15 +93,17 @@ The construction of type embedding net is given by {ref}`type_embedding ` specifies the size of the type embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. It takes a one-hot vector as input and output dimension equals to the last dimension of the {ref}`neuron ` list. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. -* If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. -* {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. +- The {ref}`neuron ` specifies the size of the type embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. It takes a one-hot vector as input and output dimension equals to the last dimension of the {ref}`neuron ` list. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. +- {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. A complete training input script of this example can be found in the directory. + ```bash $deepmd_source_dir/examples/water/se_e2_a_tebd/input.json ``` + See [here](../development/type-embedding.md) for further explanation of `type embedding`. :::{note} diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md index 537253a6d9..2412bbc64e 100644 --- a/doc/model/train-se-e2-a.md +++ b/doc/model/train-se-e2-a.md @@ -1,4 +1,8 @@ -# Descriptor `"se_e2_a"` +# Descriptor `"se_e2_a"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The `e2` stands for the embedding with two-atoms information. This descriptor was described in detail in [the DeepPot-SE paper](https://arxiv.org/abs/1805.09003). @@ -39,10 +43,10 @@ where $\boldsymbol{r}_{ij}=\boldsymbol{r}_j-\boldsymbol{r}_i = (x_{ij}, y_{ij}, \end{cases} ``` -where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$. +where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$. The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous. -Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$: +Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$: ```math (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})), @@ -54,17 +58,20 @@ $\mathcal{G}^i_< \in \mathbb{R}^{N_c \times M_<}$ only takes first $M_<$ columns $r_s$, $r_c$, $M$ and $M_<$ are hyperparameters provided by the user. The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions -In this example, we will train a DeepPot-SE model for a water system. A complete training input script of this example can be found in the directory. +In this example, we will train a DeepPot-SE model for a water system. A complete training input script of this example can be found in the directory. + ```bash $deepmd_source_dir/examples/water/se_e2_a/input.json ``` + With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory. The construction of the descriptor is given by section {ref}`descriptor `. An example of the descriptor is provided as follows + ```json "descriptor" :{ "type": "se_e2_a", @@ -78,11 +85,12 @@ The construction of the descriptor is given by section {ref}`descriptor ` of the descriptor is set to `"se_e2_a"`. -* {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. -* {ref}`sel ` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`. -* The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. -* If the option {ref}`type_one_side ` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters. -* The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) -* If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. -* {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. + +- The {ref}`type ` of the descriptor is set to `"se_e2_a"`. +- {ref}`rcut ` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth ` gives where the smoothing starts. +- {ref}`sel ` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`. +- The {ref}`neuron ` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. +- If the option {ref}`type_one_side ` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters. +- The {ref}`axis_neuron ` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) +- If the option {ref}`resnet_dt ` is set to `true`, then a timestep is used in the ResNet. +- {ref}`seed ` gives the random seed that is used to generate random numbers when initializing the model parameters. diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md index f2f990b16a..f427310196 100644 --- a/doc/model/train-se-e2-r.md +++ b/doc/model/train-se-e2-r.md @@ -1,4 +1,8 @@ -# Descriptor `"se_e2_r"` +# Descriptor `"se_e2_r"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information. @@ -14,7 +18,7 @@ where $N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames. A matrix with a dimension of $N_c$ will be padded if the number of neighboring atoms is less than $N_c$. -Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$: +Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$: ```math (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})), @@ -31,23 +35,25 @@ where $\boldsymbol{r}_ {ij}=\boldsymbol{r}_ j-\boldsymbol{r}_ i = (x_{ij}, y_{ij \end{cases} ``` -where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$. +where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$. The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous. In the above equations, the network parameters are not explicitly written. $r_s$, $r_c$ and $M$ are hyperparameters provided by the user. The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions A complete training input script of this example can be found in the directory + ```bash $deepmd_source_dir/examples/water/se_e2_r/input.json ``` The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor ` section + ```json "descriptor": { "type": "se_e2_r", @@ -55,9 +61,11 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m "rcut_smth": 0.50, "rcut": 6.00, "neuron": [5, 10, 20], + "type_one_side": true, "resnet_dt": false, "seed": 1, "_comment": " that's all" }, ``` + The type of the descriptor is set by the key {ref}`type `. diff --git a/doc/model/train-se-e3.md b/doc/model/train-se-e3.md index 5b0710a389..3a0c1a9547 100644 --- a/doc/model/train-se-e3.md +++ b/doc/model/train-se-e3.md @@ -1,13 +1,19 @@ -# Descriptor `"se_e3"` +# Descriptor `"se_e3"` {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: The notation of `se_e3` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The embedding takes bond angles between a central atom and its two neighboring atoms as input (denoted by `e3`). ## Theory The three-body embedding DeepPot-SE descriptor incorporates bond-angle information, making the model more accurate. The descriptor $\mathcal{D}^i$ can be represented as + ```math \mathcal{D}^i = \frac{1}{N_c^2}(\mathcal{R}^i(\mathcal{R}^i)^T):\mathcal{G}^i, ``` + where $N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames. $\mathcal{R}^i$ is constructed as @@ -20,6 +26,7 @@ $\mathcal{R}^i$ is constructed as \end{array} \}, ``` + Currently, only the full information case of $\mathcal{R}^i$ is supported by the three-body embedding. Each element of $\mathcal{G}^i \in \mathbb{R}^{N_c \times N_c \times M}$ comes from $M$ nodes from the output layer of an NN $\mathcal{N}_{e,3}$ function: @@ -30,16 +37,18 @@ Each element of $\mathcal{G}^i \in \mathbb{R}^{N_c \times N_c \times M}$ comes f where $(\theta_i)_ {jk} = (\mathcal{R}^i)_ {j,\\{2,3,4\\}}\cdot (\mathcal{R}^i)_ {k,\\{2,3,4\\}}$ considers the angle form of two neighbours ($j$ and $k$). The notation $:$ in the equation indicates the contraction between matrix $\mathcal{R}^i(\mathcal{R}^i)^T$ and the first two dimensions of tensor $\mathcal{G}^i$.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions A complete training input script of this example can be found in the directory + ```bash $deepmd_source_dir/examples/water/se_e3/input.json ``` The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the `descriptor ` section + ```json "descriptor": { "type": "se_e3", @@ -52,4 +61,5 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m "_comment": " that's all" }, ``` + The type of the descriptor is set by the key {ref}`type `. diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md index c11fee0bc9..67cfb5e22d 100644 --- a/doc/nvnmd/nvnmd.md +++ b/doc/nvnmd/nvnmd.md @@ -1,4 +1,8 @@ -# Introduction +# Introduction {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: NVNMD stands for non-von Neumann molecular dynamics. @@ -29,7 +33,6 @@ where `$dataset` is the path to the data set and `$workspace` is the path to the Create and go to the training directory. - ```bash mkdir train cd train @@ -46,10 +49,10 @@ The structure of the input script is as follows ```json { - "nvnmd" : {}, - "learning_rate" : {}, - "loss" : {}, - "training": {} + "nvnmd": {}, + "learning_rate": {}, + "loss": {}, + "training": {} } ``` @@ -59,29 +62,30 @@ The "nvnmd" section is defined as ```json { - "version": 0, - "max_nnei":128, - "net_size":128, - "sel":[60, 60], - "rcut":6.0, - "rcut_smth":0.5, - "type_map": ["Ge", "Te"] + "version": 0, + "max_nnei": 128, + "net_size": 128, + "sel": [60, 60], + "rcut": 6.0, + "rcut_smth": 0.5, + "type_map": ["Ge", "Te"] } ``` where items are defined as: -| Item | Mean | Optional Value | -| --------- | --------------------------- | --------------------------------------------- | -| version | the version of network structure | 0 or 1 | -| max_nnei | the maximum number of neighbors that do not distinguish element types | 128 or 256 | -| net_size | the size of nueral network | 128 | -| sel | the number of neighbors | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer | -| rcut | the cutoff radial | (0, 8.0] | -| rcut_smth | the smooth cutoff parameter | (0, 8.0] | -| type_map | mapping atom type to the name (str) of the type | string list, optional | +| Item | Mean | Optional Value | +| --------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------- | +| version | the version of network structure | 0 or 1 | +| max_nnei | the maximum number of neighbors that do not distinguish element types | 128 or 256 | +| net_size | the size of nueral network | 128 | +| sel | the number of neighbors | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer | +| rcut | the cutoff radial | (0, 8.0] | +| rcut_smth | the smooth cutoff parameter | (0, 8.0] | +| type_map | mapping atom type to the name (str) of the type | string list, optional | Multiple versions of the nvnmd model correspond to different network structures. `nvnmd-v0` and `nvnmd-v1` differ in the following ways: + 1. `nvnmd-v0` and `nvnmd-v1` use the `se_a` descriptor and `se_atten` descriptor, respectively 2. `nvnmd-v0` has 1 set of parameters for each element and supports up to 4 element types. `nvnmd-v1` shares 1 set of parameters for each element and supports up to 31 types. 3. `nvnmd-v0` distinguishes between neighboring atoms, so `sel` is a list of integers. `nvnmd-v1` does not distinguish between neighboring atoms, so `sel` is an integer. @@ -92,20 +96,20 @@ The "learning_rate" section is defined as ```json { - "type":"exp", - "start_lr": 1e-3, - "stop_lr": 3e-8, - "decay_steps": 5000 + "type": "exp", + "start_lr": 1e-3, + "stop_lr": 3e-8, + "decay_steps": 5000 } ``` where items are defined as: -| Item | Mean | Optional Value | -| ----------- | ------------------------------------------------------------ | ---------------------- | -| type | learning rate variant type | exp | -| start_lr | the learning rate at the beginning of the training | a positive real number | -| stop_lr | the desired learning rate at the end of the training | a positive real number | +| Item | Mean | Optional Value | +| ----------- | ---------------------------------------------------------------- | ---------------------- | +| type | learning rate variant type | exp | +| start_lr | the learning rate at the beginning of the training | a positive real number | +| stop_lr | the desired learning rate at the end of the training | a positive real number | | decay_stops | the learning rate is decaying every {decay_stops} training steps | a positive integer | ### loss @@ -114,12 +118,12 @@ The "loss" section is defined as ```json { - "start_pref_e": 0.02, - "limit_pref_e": 2, - "start_pref_f": 1000, - "limit_pref_f": 1, - "start_pref_v": 0, - "limit_pref_v": 0 + "start_pref_e": 0.02, + "limit_pref_e": 2, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 } ``` @@ -141,17 +145,17 @@ The "training" section is defined as ```json { "seed": 1, - "stop_batch": 1000000, - "numb_test": 1, - "disp_file": "lcurve.out", - "disp_freq": 1000, - "save_ckpt": "model.ckpt", - "save_freq": 10000, - "training_data":{ - "systems":["system1_path", "system2_path", "..."], - "set_prefix": "set", - "batch_size": ["batch_size_of_system1", "batch_size_of_system2", "..."] - } + "stop_batch": 1000000, + "numb_test": 1, + "disp_file": "lcurve.out", + "disp_freq": 1000, + "save_ckpt": "model.ckpt", + "save_freq": 10000, + "training_data": { + "systems": ["system1_path", "system2_path", "..."], + "set_prefix": "set", + "batch_size": ["batch_size_of_system1", "batch_size_of_system2", "..."] + } } ``` @@ -185,20 +189,19 @@ After the training process, you will get two folders: `nvnmd_cnn` and `nvnmd_qnn You can also restart the CNN training from the path prefix of checkpoint files (`nvnmd_cnn/model.ckpt`) by -``` bash +```bash dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1 ``` You can also initialize the CNN model and train it by -``` bash +```bash mv nvnmd_cnn nvnmd_cnn_bck cp train_cnn.json train_cnn2.json # please edit train_cnn2.json dp train-nvnmd train_cnn2.json -s s1 -i nvnmd_cnn_bck/model.ckpt ``` - # Testing The frozen model can be used in many ways. The most straightforward testing can be invoked by @@ -211,6 +214,7 @@ dp test -m ./nvnmd_qnn/frozen_model.pb -s path/to/system -d ./test/detail -n 999 where the frozen model file to import is given via the `-m` command line flag, the path to the testing data set is given via the `-s` command line flag, and the file containing details of energy, forces and virials accuracy is given via the `-d` command line flag, the amount of data for testing is given via the `-n` command line flag. # Running MD in Bohrium + After CNN and QNN training, you can upload the ML model to our online NVNMD system and run MD there through Bohrium (https://bohrium.dp.tech). Bohrium is a research platfrom designed for AI for Science Era. For more information, please refer to [Bohrium Introduction](https://bohrium-doc.dp.tech/en/docs/WhatIsBohrium/). ## Registration @@ -247,30 +251,30 @@ Then you need prepare the configuration file `job.json`, the configuration file ```json { - "job_name": "test", - "command": "/usr/bin/lmp_mpi < in.lmp;", - "log_file": "OUTCAR", - "machine_type": "c4_m16_cpu", - "job_type": "container", - "image_name": "lammps_dp:29Sep2021", - "platform": "hnugba", - "region": "default", - "project_id": 0000 + "job_name": "test", + "command": "/usr/bin/lmp_mpi < in.lmp;", + "log_file": "OUTCAR", + "machine_type": "c4_m16_cpu", + "job_type": "container", + "image_name": "lammps_dp:29Sep2021", + "platform": "hnugba", + "region": "default", + "project_id": 0000 } ``` where items are defined as: -| Item | Mean | Optional Value | -| ------------ | -------------------------------------------------------------------------------------------------------------------------- | -------------- | -| job_name | the name of computing job, which can be named freely | a string | -| command | the command to be executed on the computing node | a string | -| log_file | the log file that can be viewed at any time during the calculation process, which can be viewed on the Bohrium "Jobs" page | a string | -| machine_type | the machine type used for the job | "c1_m4_cpu", "c4_m16_cpu", "c8_m32_cpu" | -| job_type | the job type | "container" | -| image_name | the image name used for the job | "lammps_dp:29Sep2021"| -| platform | resource provider | "hnugba" | -| project_id | the project ID to which the job belongs, which can be viewed on the "Projects" page | a integer | +| Item | Mean | Optional Value | +| ------------ | -------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | +| job_name | the name of computing job, which can be named freely | a string | +| command | the command to be executed on the computing node | a string | +| log_file | the log file that can be viewed at any time during the calculation process, which can be viewed on the Bohrium "Jobs" page | a string | +| machine_type | the machine type used for the job | "c1_m4_cpu", "c4_m16_cpu", "c8_m32_cpu" | +| job_type | the job type | "container" | +| image_name | the image name used for the job | "lammps_dp:29Sep2021" | +| platform | resource provider | "hnugba" | +| project_id | the project ID to which the job belongs, which can be viewed on the "Projects" page | a integer | Notice:The task will use 4 CPU cores for computation, so do not repeatedly use the `mpirun` command, otherwise an error will be reported. All 0000 after "project_id" need to be replaced with your own project ID, which can be viewed on the "Projects" page. Also, the JSON file format requires that no commas be added after the last field within the {}, otherwise, there will be a syntax error. Please check the [documentation](https://github.com/LiuGroupHNU/md-data/blob/master/code/doc/mdpu/hardware.md) for the latest hardware configuration information. diff --git a/doc/sphinx_contrib_exhale_multiproject.py b/doc/sphinx_contrib_exhale_multiproject.py index e05cf88ba2..e26cc158a4 100644 --- a/doc/sphinx_contrib_exhale_multiproject.py +++ b/doc/sphinx_contrib_exhale_multiproject.py @@ -103,11 +103,11 @@ def exhale_environment_ready(app): app.config.exhale_args["containmentFolder"] = os.path.realpath( app.config.exhale_args["containmentFolder"] ) - print("=" * 75) - print(project) - print("-" * 50) - pprint(app.config.exhale_args) - print("=" * 75) + print("=" * 75) # noqa: T201 + print(project) # noqa: T201 + print("-" * 50) # noqa: T201 + pprint(app.config.exhale_args) # noqa: T203 + print("=" * 75) # noqa: T201 # First, setup the extension and verify all of the configurations. exhale.configs.apply_sphinx_configurations(app) diff --git a/doc/test/index.md b/doc/test/index.md deleted file mode 100644 index 4a502123d9..0000000000 --- a/doc/test/index.md +++ /dev/null @@ -1,4 +0,0 @@ -# Test - -- [Test a model](test.md) -- [Calculate Model Deviation](model-deviation.md) diff --git a/doc/test/model-deviation.md b/doc/test/model-deviation.md index a59696c5ee..441d1aabc6 100644 --- a/doc/test/model-deviation.md +++ b/doc/test/model-deviation.md @@ -6,50 +6,61 @@ Model deviation $\epsilon_y$ is the standard deviation of properties $\boldsymbo The DeePMD-kit supports $\boldsymbol y$ to be the atomic force $\boldsymbol F_i$ and the virial tensor $\boldsymbol \Xi$. The model deviation is used to estimate the error of a model at a certain data frame, denoted by $\boldsymbol x$, containing the coordinates and chemical species of all atoms. We present the model deviation of the atomic force and the virial tensor + ```math \epsilon_{\boldsymbol{F},i} (\boldsymbol x)= \sqrt{\langle \lVert \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k)-\langle \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k) \rangle \rVert^2 \rangle}, ``` + ```math \epsilon_{\boldsymbol{\Xi},{\alpha \beta}} (\boldsymbol x)= \frac{1}{N} \sqrt{\langle ( {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k)-\langle {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k) \rangle )^2 \rangle}, ``` + where $\boldsymbol \theta_k$ is the parameters of the model $\mathcal M_k$, and the ensemble average $\langle\cdot\rangle$ is estimated by + ```math \langle \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k) \rangle = \frac{1}{n_m} \sum_{k=1}^{n_m} \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k). ``` + Small $\epsilon_{\boldsymbol{F},i}$ means the model has learned the given data; otherwise, it is not covered, and the training data needs to be expanded. If the magnitude of $\boldsymbol F_i$ or $\boldsymbol \Xi$ is quite large, a relative model deviation $\epsilon_{\boldsymbol{F},i,\text{rel}}$ or $\epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}}$ can be used instead of the absolute model deviation: + ```math \epsilon_{\boldsymbol{F},i,\text{rel}} (\boldsymbol x) = \frac{\lvert \epsilon_{\boldsymbol{F},i} (\boldsymbol x) \lvert} {\lvert \langle \boldsymbol F_i (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu}, ``` + ```math \epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}} (\boldsymbol x) = \frac{ \epsilon_{\boldsymbol{\Xi},\alpha\beta} (\boldsymbol x) } {\lvert \langle \boldsymbol \Xi (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu}, ``` + where $\nu$ is a small constant used to protect an atom where the magnitude of $\boldsymbol{F}_i$ or $\boldsymbol{\Xi}$ is small from having a large model deviation. Statistics of $\epsilon_{\boldsymbol{F},i}$ and $\epsilon_{\boldsymbol{\Xi},{\alpha \beta}}$ can be provided, including the maximum, average, and minimal model deviation over the atom index $i$ and over the component index $\alpha,\beta$, respectively. The maximum model deviation of forces $\epsilon_{\boldsymbol F,\text{max}}$ in a frame was found to be the best error indicator in a concurrent or active learning algorithm.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Instructions One can also use a subcommand to calculate the deviation of predicted forces or virials for a bunch of models in the following way: + ```bash dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out ``` + where `-m` specifies graph files to be calculated, `-s` gives the data to be evaluated, `-o` the file to which model deviation results is dumped. Here is more information on this sub-command: + ```bash usage: dp model-devi [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}] [-l LOG_PATH] [-m MODELS [MODELS ...]] [-s SYSTEM] diff --git a/doc/test/test.md b/doc/test/test.md index c206e8d777..251a12c7e2 100644 --- a/doc/test/test.md +++ b/doc/test/test.md @@ -1,14 +1,19 @@ # Test a model The frozen model can be used in many ways. The most straightforward test can be performed using `dp test`. A typical usage of `dp test` is + ```bash dp test -m graph.pb -s /path/to/system -n 30 ``` + where `-m` gives the tested model, `-s` the path to the tested system and `-n` the number of tested frames. Several other command line options can be passed to `dp test`, which can be checked with + ```bash $ dp test --help ``` + An explanation will be provided + ``` usage: dp test [-h] [-m MODEL] [-s SYSTEM] [-S SET_PREFIX] [-n NUMB_TEST] [-r RAND_SEED] [--shuffle-test] [-d DETAIL_FILE] diff --git a/doc/third-party/ase.md b/doc/third-party/ase.md index ac65fc926e..76371a3197 100644 --- a/doc/third-party/ase.md +++ b/doc/third-party/ase.md @@ -1,6 +1,7 @@ # Use deep potential with ASE Deep potential can be set up as a calculator with ASE to obtain potential energies and forces. + ```python from ase import Atoms from deepmd.calculator import DP @@ -16,6 +17,7 @@ print(water.get_forces()) ``` Optimization is also available: + ```python from ase.optimize import BFGS diff --git a/doc/third-party/dpdata.md b/doc/third-party/dpdata.md new file mode 100644 index 0000000000..05e0f6fb40 --- /dev/null +++ b/doc/third-party/dpdata.md @@ -0,0 +1,12 @@ +# Use deep potential with dpdata + +DeePMD-kit provides a driver for [dpdata](https://github.com/deepmodeling/dpdata) >=0.2.7 via the plugin mechanism, making it possible to call the `predict` method for `System` class: + +```py +import dpdata + +dsys = dpdata.LabeledSystem("OUTCAR") +dp_sys = dsys.predict("frozen_model_compressed.pb", driver="dp") +``` + +By inferring with the DP model `frozen_model_compressed.pb`, dpdata will generate a new labeled system `dp_sys` with inferred energies, forces, and virials. diff --git a/doc/third-party/gromacs.md b/doc/third-party/gromacs.md index 672fb693b9..c9779611e7 100644 --- a/doc/third-party/gromacs.md +++ b/doc/third-party/gromacs.md @@ -1,10 +1,15 @@ # Running MD with GROMACS + ## DP/MM Simulation + This part gives a simple tutorial on how to run a DP/MM simulation for methane in water, which means using DP for methane and TIP3P for water. All relevant files can be found in `examples/methane`. + ### Topology Preparation + Similar to QM/MM simulation, the internal interactions (including bond, angle, dihedrals, LJ, Columb) of the region described by a neural network potential (NNP) have to be **turned off**. In GROMACS, bonded interactions can be turned off by modifying `[ bonds ]`, `[ angles ]`, `[ dihedrals ]` and `[ pairs ]` sections. And LJ and Columb interactions must be turned off by `[ exclusions ]` section. For example, if one wants to simulate ethane in water, using DeepPotential for methane and TIP3P for water, the topology of methane should be like the following (as presented in `examples/methane/methane.itp`): + ``` [ atomtypes ] ;name btype mass charge ptype sigma epsilon @@ -38,7 +43,9 @@ For example, if one wants to simulate ethane in water, using DeepPotential for m 4 1 2 3 5 5 1 2 3 4 ``` + For comparison, the original topology file generated by `acpype` will be: + ``` ; methane_GMX.itp created by acpype (v: 2021-02-05T22:15:50CET) on Wed Sep 8 01:21:53 2021 @@ -75,45 +82,60 @@ For comparison, the original topology file generated by `acpype` will be: 3 1 5 1 1.0758e+02 3.2635e+02 ; H2 - C1 - H4 4 1 5 1 1.0758e+02 3.2635e+02 ; H3 - C1 - H4 ``` + ### DeepMD Settings + Before running simulations, we need to tell GROMACS to use DeepPotential by setting the environment variable `GMX_DEEPMD_INPUT_JSON`: + ```bash export GMX_DEEPMD_INPUT_JSON=input.json ``` + Then, in your working directories, we have to write `input.json` file: + ```json { - "graph_file": "/path/to/graph.pb", - "type_file": "type.raw", - "index_file": "index.raw", - "lambda": 1.0, - "pbc": false + "graph_file": "/path/to/graph.pb", + "type_file": "type.raw", + "index_file": "index.raw", + "lambda": 1.0, + "pbc": false } ``` + Here is an explanation for these settings: -+ `graph_file` : The graph file (with suffix .pb) generated by `dp freeze` command -+ `type_file` : File to specify DP atom types (in space-separated format). Here, `type.raw` looks like + +- `graph_file` : The graph file (with suffix .pb) generated by `dp freeze` command +- `type_file` : File to specify DP atom types (in space-separated format). Here, `type.raw` looks like + ``` 1 0 0 0 0 ``` -+ `index_file` : File containing indices of DP atoms (in space-separated format), which should be consistent with the indices' order in .gro file but **starting from zero**. Here, `index.raw` looks like + +- `index_file` : File containing indices of DP atoms (in space-separated format), which should be consistent with the indices' order in .gro file but **starting from zero**. Here, `index.raw` looks like + ``` 0 1 2 3 4 ``` -+ `lambda`: Optional, default 1.0. Used in alchemical calculations. -+ `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeepMD. + +- `lambda`: Optional, default 1.0. Used in alchemical calculations. +- `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeepMD. ### Run Simulation + Finally, you can run GROMACS using `gmx mdrun` as usual. ## All-atom DP Simulation + This part gives an example of how to simulate all atoms described by a DeepPotential with Gromacs, taking water as an example. Instead of using `[ exclusions ]` to turn off the non-bonded energies, we can simply do this by setting LJ parameters (i.e. epsilon and sigma) and partial charges to 0, as shown in `examples/water/gmx/water.top`: + ``` [ atomtypes ] ; name at.num mass charge ptype sigma epsilon HW 1 1.008 0.0000 A 0.00000e+00 0.00000e+00 OW 8 16.00 0.0000 A 0.00000e+00 0.00000e+00 ``` + As mentioned in the above section, `input.json` and relevant files (`index.raw`, `type.raw`) should also be created. Then, we can start the simulation under the NVT ensemble and plot the radial distribution function (RDF) by `gmx rdf` command. We can see that the RDF given by Gromacs+DP matches perfectly with Lammps+DP, which further provides an evidence on the validity of our simulation. ![rdf](../../examples/water/gmx/rdf.png) diff --git a/doc/third-party/index.md b/doc/third-party/index.md deleted file mode 100644 index 235337974c..0000000000 --- a/doc/third-party/index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Integrate with third-party packages - -Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details. - -- [Use deep potential with ASE](ase.md) -- [Run MD with LAMMPS](lammps-command.md) -- [Run path-integral MD with i-PI](ipi.md) -- [Run MD with GROMACS](gromacs.md) -- [Interfaces out of DeePMD-kit](out-of-deepmd-kit.md) diff --git a/doc/third-party/index.rst b/doc/third-party/index.rst index f88a477fc7..cd0726a4bb 100644 --- a/doc/third-party/index.rst +++ b/doc/third-party/index.rst @@ -6,6 +6,7 @@ Note that the model for inference is required to be compatible with the DeePMD-k .. toctree:: :maxdepth: 1 + dpdata ase lammps-command ipi diff --git a/doc/third-party/ipi.md b/doc/third-party/ipi.md index 59decdf3bb..84a972d885 100644 --- a/doc/third-party/ipi.md +++ b/doc/third-party/ipi.md @@ -1,30 +1,36 @@ # Run path-integral MD with i-PI + The i-PI works in a client-server model. The i-PI provides the server for integrating the replica positions of atoms, while the DeePMD-kit provides a client named `dp_ipi` that computes the interactions (including energy, forces and virials). The server and client communicate via the Unix domain socket or the Internet socket. Installation instructions for i-PI can be found [here](../install/install-ipi.md). The client can be started by + ```bash i-pi input.xml & dp_ipi water.json ``` + It is noted that multiple instances of the client allow for computing, in parallel, the interactions of multiple replicas of the path-integral MD. `water.json` is the parameter file for the client `dp_ipi`, and an example is provided: + ```json { - "verbose": false, - "use_unix": true, - "port": 31415, - "host": "localhost", - "graph_file": "graph.pb", - "coord_file": "conf.xyz", - "atom_type" : { - "OW": 0, - "HW1": 1, - "HW2": 1 - } + "verbose": false, + "use_unix": true, + "port": 31415, + "host": "localhost", + "graph_file": "graph.pb", + "coord_file": "conf.xyz", + "atom_type": { + "OW": 0, + "HW1": 1, + "HW2": 1 + } } ``` + The option **`use_unix`** is set to `true` to activate the Unix domain socket, otherwise, the Internet socket is used. The option **`port`** should be the same as that in input.xml: + ```xml 31415 ``` diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md index 150d755795..63f9d8e3bd 100644 --- a/doc/third-party/lammps-command.md +++ b/doc/third-party/lammps-command.md @@ -1,6 +1,7 @@ # Run MD with LAMMPS ## units + All units in LAMMPS except `lj` are supported. `lj` is not supported. The most commonly used units are `metal`, since the internal units of distance, energy, force, and charge in DeePMD-kit are `\AA`, `eV`, `eV / \AA`, and `proton charge`, respectively. These units are consistent with the `metal` units in LAMMPS. @@ -34,11 +35,12 @@ The DeePMD-kit package provides the pair_style `deepmd` ```lammps pair_style deepmd models ... keyword value ... ``` + - deepmd = style of this pair_style - models = frozen model(s) to compute the interaction. -If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics, -and the model deviation will be computed among all models every `out_freq` timesteps. -- keyword = *out_file* or *out_freq* or *fparam* or *fparam_from_compute* or *aparam_from_compute* or *atomic* or *relative* or *relative_v* or *aparam* or *ttm* + If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics, + and the model deviation will be computed among all models every `out_freq` timesteps. +- keyword = _out_file_ or _out_freq_ or _fparam_ or _fparam_from_compute_ or _aparam_from_compute_ or _atomic_ or _relative_ or _relative_v_ or _aparam_ or _ttm_
     out_file value = filename
         filename = The file name for the model deviation output. Default is model_devi.out
@@ -63,6 +65,7 @@ and the model deviation will be computed among all models every `out_freq` times
 
### Examples + ```lammps pair_style deepmd graph.pb pair_style deepmd graph.pb fparam 1.2 @@ -77,6 +80,7 @@ compute 1 all ke/atom ``` ### Description + Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Potential Smooth Edition][DP-SE]. It is noticed that deep potential is not a "pairwise" interaction, but a multi-body interaction. This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit), which can have either double or single float precision interface. @@ -107,8 +111,8 @@ If the training parameter {ref}`type_map ` is not set, atom name Spin is specified by keywords `virtual_len` and `spin_norm`. If the keyword `virtual_len` is set, the distance between virtual atom and its corresponding real atom for each type of magnetic atoms will be fed to the model as the spin parameters. If the keyword `spin_norm` is set, the magnitude of the magnetic moment for each type of magnetic atoms will be fed to the model as the spin parameters. ### Restrictions -- The `deepmd` pair style is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information. +- The `deepmd` pair style is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information. ## Compute tensorial properties @@ -117,6 +121,7 @@ The DeePMD-kit package provides the compute `deeptensor/atom` for computing atom ```lammps compute ID group-ID deeptensor/atom model_file ``` + - ID: user-assigned name of the computation - group-ID: ID of the group of atoms to compute - deeptensor/atom: the style of this compute @@ -125,27 +130,33 @@ compute ID group-ID deeptensor/atom model_file At this time, the training parameter {ref}`type_map ` will be mapped to LAMMPS atom types. ### Examples + ```lammps compute dipole all deeptensor/atom dipole.pb ``` + The result of the compute can be dumped to trajectory file by + ```lammps dump 1 all custom 100 water.dump id type c_dipole[1] c_dipole[2] c_dipole[3] ``` ### Restrictions + - The `deeptensor/atom` compute is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information. - For the issue of using a unit style for `compute deeptensor/atom`, refer to the discussions in [units](#units) of this page. - ## Long-range interaction + The reciprocal space part of the long-range interaction can be calculated by LAMMPS command `kspace_style`. To use it with DeePMD-kit, one writes + ```lammps pair_style deepmd graph.pb pair_coeff * * kspace_style pppm 1.0e-5 kspace_modify gewald 0.45 ``` + Please notice that the DeePMD does nothing to the direct space part of the electrostatic interaction, because this part is assumed to be fitted in the DeePMD model (the direct space cut-off is thus the cut-off of the DeePMD model). The splitting parameter `gewald` is modified by the `kspace_modify` command. ## Use of the centroid/stress/atom to get the full 3x3 "atomic-virial" @@ -157,9 +168,11 @@ $$dvatom=-\sum_{m}( \mathbf{r}_n- \mathbf{r}_m) \frac{de_m}{d\mathbf{r}_n}$$ Where $\mathbf{r}_n$ is the atomic position of nth atom, $\mathbf{v}_n$ velocity of the atom and $\frac{de_m}{d\mathbf{r}_n}$ the derivative of the atomic energy. In LAMMPS one can get the per-atom stress using the command `centroid/stress/atom`: + ```lammps compute ID group-ID centroid/stress/atom NULL virial ``` + see [LAMMPS doc page](https://docs.lammps.org/compute_stress_atom.html#thompson2) for more details on the meaning of the keywords. :::{versionchanged} v2.2.3 @@ -167,20 +180,25 @@ v2.2.2 or previous versions passed per-atom stress (`cvatom`) with the per-atom ::: ### Examples + In order of computing the 9-component per-atom stress + ```lammps compute stress all centroid/stress/atom NULL virial ``` + Thus `c_stress` is an array with 9 components in the order `xx,yy,zz,xy,xz,yz,yx,zx,zy`. If you use this feature please cite [D. Tisi, L. Zhang, R. Bertossa, H. Wang, R. Car, S. Baroni - arXiv preprint arXiv:2108.10850, 2021](https://arxiv.org/abs/2108.10850) ## Computation of heat flux + Using a per-atom stress tensor one can, for example, compute the heat flux defined as: $$\mathbf J = \sum_n e_n \mathbf v_n + \sum_{n,m} ( \mathbf r_m- \mathbf r_n) \frac{de_m}{d\mathbf r_n} \mathbf v_n$$ to compute the heat flux with LAMMPS: + ```lammps compute ke_ID all ke/atom compute pe_ID all pe/atom @@ -196,10 +214,10 @@ compute pe all pe/atom compute stress all centroid/stress/atom NULL virial compute flux all heat/flux ke pe stress ``` + `c_flux` is a global vector of length 6. The first three components are the $x$, $y$ and $z$ components of the full heat flux vector. The others are the components of the so-called convective portion, see [LAMMPS doc page](https://docs.lammps.org/compute_heat_flux.html) for more detailes. If you use these features please cite [D. Tisi, L. Zhang, R. Bertossa, H. Wang, R. Car, S. Baroni - arXiv preprint arXiv:2108.10850, 2021](https://arxiv.org/abs/2108.10850) - -[DP]:https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 -[DP-SE]:https://dl.acm.org/doi/10.5555/3327345.3327356 +[DP]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 +[DP-SE]: https://dl.acm.org/doi/10.5555/3327345.3327356 diff --git a/doc/third-party/out-of-deepmd-kit.md b/doc/third-party/out-of-deepmd-kit.md index 71dc9adb23..3eb722c040 100644 --- a/doc/third-party/out-of-deepmd-kit.md +++ b/doc/third-party/out-of-deepmd-kit.md @@ -2,19 +2,6 @@ The codes of the following interfaces are not a part of the DeePMD-kit package and maintained by other repositories. We list these interfaces here for user convenience. -## dpdata - -[dpdata](https://github.com/deepmodeling/dpdata) provides the `predict` method for `System` class: - -```py -import dpdata - -dsys = dpdata.LabeledSystem("OUTCAR") -dp_sys = dsys.predict("frozen_model_compressed.pb") -``` - -By inferring with the DP model `frozen_model_compressed.pb`, dpdata will generate a new labeled system `dp_sys` with inferred energies, forces, and virials. - ## OpenMM plugin for DeePMD-kit An [OpenMM](https://github.com/openmm/openmm) plugin is provided from [JingHuangLab/openmm_deepmd_plugin](https://github.com/JingHuangLab/openmm_deepmd_plugin), written by the [Huang Lab](http://www.compbiophysics.org/) at Westlake University. @@ -24,6 +11,7 @@ An [OpenMM](https://github.com/openmm/openmm) plugin is provided from [JingHuang Starting from [AmberTools24](https://ambermd.org/), `sander` includes an interface to the DeePMD-kit, which implements the [Deep Potential Range Corrected (DPRc) correction](../model/dprc.md). The DPRc model and the interface were developed by the [York Lab](https://theory.rutgers.edu/) from Rutgers University. More details are available in + - [Amber Reference Manuals](https://ambermd.org/Manuals.php), providing documentation for how to enable the interface and the `&dprc` namelist; - [GitLab RutgersLBSR/AmberDPRc](https://gitlab.com/RutgersLBSR/AmberDPRc/), providing examples mdin files; - [DP-Amber](https://github.com/njzjz/dpamber/), a tiny tool to convert Amber trajectory to DPRc training data; diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst deleted file mode 100644 index a3b69eade9..0000000000 --- a/doc/train-input-auto.rst +++ /dev/null @@ -1,1502 +0,0 @@ -.. _`model`: - -model: - | type: ``dict`` - | argument path: ``model`` - - .. _`model/type_map`: - - type_map: - | type: ``list``, optional - | argument path: ``model/type_map`` - - A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. - - .. _`model/data_stat_nbatch`: - - data_stat_nbatch: - | type: ``int``, optional, default: ``10`` - | argument path: ``model/data_stat_nbatch`` - - The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics. - - .. _`model/data_stat_protect`: - - data_stat_protect: - | type: ``float``, optional, default: ``0.01`` - | argument path: ``model/data_stat_protect`` - - Protect parameter for atomic energy regression. - - .. _`model/use_srtab`: - - use_srtab: - | type: ``str``, optional - | argument path: ``model/use_srtab`` - - The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly. - - .. _`model/smin_alpha`: - - smin_alpha: - | type: ``float``, optional - | argument path: ``model/smin_alpha`` - - The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided. - - .. _`model/sw_rmin`: - - sw_rmin: - | type: ``float``, optional - | argument path: ``model/sw_rmin`` - - The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided. - - .. _`model/sw_rmax`: - - sw_rmax: - | type: ``float``, optional - | argument path: ``model/sw_rmax`` - - The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided. - - .. _`model/type_embedding`: - - type_embedding: - | type: ``dict``, optional - | argument path: ``model/type_embedding`` - - The type embedding. - - .. _`model/type_embedding/neuron`: - - neuron: - | type: ``list``, optional, default: ``[2, 4, 8]`` - | argument path: ``model/type_embedding/neuron`` - - Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. - - .. _`model/type_embedding/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/type_embedding/activation_function`` - - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/type_embedding/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/type_embedding/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/type_embedding/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/type_embedding/precision`` - - The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/type_embedding/trainable`: - - trainable: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/type_embedding/trainable`` - - If the parameters in the embedding net are trainable - - .. _`model/type_embedding/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/type_embedding/seed`` - - Random seed for parameter initialization - - .. _`model/descriptor`: - - descriptor: - | type: ``dict`` - | argument path: ``model/descriptor`` - - The descriptor of atomic environment. - - - Depending on the value of *type*, different sub args are accepted. - - .. _`model/descriptor/type`: - - type: - | type: ``str`` (flag key) - | argument path: ``model/descriptor/type`` - | possible choices: |code:model/descriptor[loc_frame]|_, |code:model/descriptor[se_e2_a]|_, |code:model/descriptor[se_e2_r]|_, |code:model/descriptor[se_e3]|_, |code:model/descriptor[se_a_tpe]|_, |code:model/descriptor[hybrid]|_ - - The type of the descritpor. See explanation below. - - - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame. - - - `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. - - - `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor. - - - `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor. - - - `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor. - - - `hybrid`: Concatenate of a list of descriptors as a new descriptor. - - .. |code:model/descriptor[loc_frame]| replace:: ``loc_frame`` - .. _`code:model/descriptor[loc_frame]`: `model/descriptor[loc_frame]`_ - .. |code:model/descriptor[se_e2_a]| replace:: ``se_e2_a`` - .. _`code:model/descriptor[se_e2_a]`: `model/descriptor[se_e2_a]`_ - .. |code:model/descriptor[se_e2_r]| replace:: ``se_e2_r`` - .. _`code:model/descriptor[se_e2_r]`: `model/descriptor[se_e2_r]`_ - .. |code:model/descriptor[se_e3]| replace:: ``se_e3`` - .. _`code:model/descriptor[se_e3]`: `model/descriptor[se_e3]`_ - .. |code:model/descriptor[se_a_tpe]| replace:: ``se_a_tpe`` - .. _`code:model/descriptor[se_a_tpe]`: `model/descriptor[se_a_tpe]`_ - .. |code:model/descriptor[hybrid]| replace:: ``hybrid`` - .. _`code:model/descriptor[hybrid]`: `model/descriptor[hybrid]`_ - - .. |flag:model/descriptor/type| replace:: *type* - .. _`flag:model/descriptor/type`: `model/descriptor/type`_ - - - .. _`model/descriptor[loc_frame]`: - - When |flag:model/descriptor/type|_ is set to ``loc_frame``: - - .. _`model/descriptor[loc_frame]/sel_a`: - - sel_a: - | type: ``list`` - | argument path: ``model/descriptor[loc_frame]/sel_a`` - - A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor. - - .. _`model/descriptor[loc_frame]/sel_r`: - - sel_r: - | type: ``list`` - | argument path: ``model/descriptor[loc_frame]/sel_r`` - - A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. - - .. _`model/descriptor[loc_frame]/rcut`: - - rcut: - | type: ``float``, optional, default: ``6.0`` - | argument path: ``model/descriptor[loc_frame]/rcut`` - - The cut-off radius. The default value is 6.0 - - .. _`model/descriptor[loc_frame]/axis_rule`: - - axis_rule: - | type: ``list`` - | argument path: ``model/descriptor[loc_frame]/axis_rule`` - - A list of integers. The length should be 6 times of the number of types. - - - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance. - - - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom. - - - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance. - - - axis_rule[i*6+3]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance. - - - axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom. - - - axis_rule[i*6+5]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance. - - - .. _`model/descriptor[se_e2_a]`: - - When |flag:model/descriptor/type|_ is set to ``se_e2_a`` (or its alias ``se_a``): - - .. _`model/descriptor[se_e2_a]/sel`: - - sel: - | type: ``list`` | ``str``, optional, default: ``auto`` - | argument path: ``model/descriptor[se_e2_a]/sel`` - - This parameter set the number of selected neighbors for each type of atom. It can be: - - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment. - - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1". - - .. _`model/descriptor[se_e2_a]/rcut`: - - rcut: - | type: ``float``, optional, default: ``6.0`` - | argument path: ``model/descriptor[se_e2_a]/rcut`` - - The cut-off radius. - - .. _`model/descriptor[se_e2_a]/rcut_smth`: - - rcut_smth: - | type: ``float``, optional, default: ``0.5`` - | argument path: ``model/descriptor[se_e2_a]/rcut_smth`` - - Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth` - - .. _`model/descriptor[se_e2_a]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[10, 20, 40]`` - | argument path: ``model/descriptor[se_e2_a]/neuron`` - - Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. - - .. _`model/descriptor[se_e2_a]/axis_neuron`: - - axis_neuron: - | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron* - | argument path: ``model/descriptor[se_e2_a]/axis_neuron`` - - Size of the submatrix of G (embedding matrix). - - .. _`model/descriptor[se_e2_a]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/descriptor[se_e2_a]/activation_function`` - - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/descriptor[se_e2_a]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_a]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/descriptor[se_e2_a]/type_one_side`: - - type_one_side: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_a]/type_one_side`` - - Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets - - .. _`model/descriptor[se_e2_a]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/descriptor[se_e2_a]/precision`` - - The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/descriptor[se_e2_a]/trainable`: - - trainable: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/descriptor[se_e2_a]/trainable`` - - If the parameters in the embedding net is trainable - - .. _`model/descriptor[se_e2_a]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/descriptor[se_e2_a]/seed`` - - Random seed for parameter initialization - - .. _`model/descriptor[se_e2_a]/exclude_types`: - - exclude_types: - | type: ``list``, optional, default: ``[]`` - | argument path: ``model/descriptor[se_e2_a]/exclude_types`` - - The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1. - - .. _`model/descriptor[se_e2_a]/set_davg_zero`: - - set_davg_zero: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_a]/set_davg_zero`` - - Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used - - - .. _`model/descriptor[se_e2_r]`: - - When |flag:model/descriptor/type|_ is set to ``se_e2_r`` (or its alias ``se_r``): - - .. _`model/descriptor[se_e2_r]/sel`: - - sel: - | type: ``list`` | ``str``, optional, default: ``auto`` - | argument path: ``model/descriptor[se_e2_r]/sel`` - - This parameter set the number of selected neighbors for each type of atom. It can be: - - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment. - - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1". - - .. _`model/descriptor[se_e2_r]/rcut`: - - rcut: - | type: ``float``, optional, default: ``6.0`` - | argument path: ``model/descriptor[se_e2_r]/rcut`` - - The cut-off radius. - - .. _`model/descriptor[se_e2_r]/rcut_smth`: - - rcut_smth: - | type: ``float``, optional, default: ``0.5`` - | argument path: ``model/descriptor[se_e2_r]/rcut_smth`` - - Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth` - - .. _`model/descriptor[se_e2_r]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[10, 20, 40]`` - | argument path: ``model/descriptor[se_e2_r]/neuron`` - - Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. - - .. _`model/descriptor[se_e2_r]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/descriptor[se_e2_r]/activation_function`` - - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/descriptor[se_e2_r]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_r]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/descriptor[se_e2_r]/type_one_side`: - - type_one_side: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_r]/type_one_side`` - - Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets - - .. _`model/descriptor[se_e2_r]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/descriptor[se_e2_r]/precision`` - - The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/descriptor[se_e2_r]/trainable`: - - trainable: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/descriptor[se_e2_r]/trainable`` - - If the parameters in the embedding net are trainable - - .. _`model/descriptor[se_e2_r]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/descriptor[se_e2_r]/seed`` - - Random seed for parameter initialization - - .. _`model/descriptor[se_e2_r]/exclude_types`: - - exclude_types: - | type: ``list``, optional, default: ``[]`` - | argument path: ``model/descriptor[se_e2_r]/exclude_types`` - - The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1. - - .. _`model/descriptor[se_e2_r]/set_davg_zero`: - - set_davg_zero: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e2_r]/set_davg_zero`` - - Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used - - - .. _`model/descriptor[se_e3]`: - - When |flag:model/descriptor/type|_ is set to ``se_e3`` (or its aliases ``se_at``, ``se_a_3be``, ``se_t``): - - .. _`model/descriptor[se_e3]/sel`: - - sel: - | type: ``list`` | ``str``, optional, default: ``auto`` - | argument path: ``model/descriptor[se_e3]/sel`` - - This parameter set the number of selected neighbors for each type of atom. It can be: - - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment. - - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1". - - .. _`model/descriptor[se_e3]/rcut`: - - rcut: - | type: ``float``, optional, default: ``6.0`` - | argument path: ``model/descriptor[se_e3]/rcut`` - - The cut-off radius. - - .. _`model/descriptor[se_e3]/rcut_smth`: - - rcut_smth: - | type: ``float``, optional, default: ``0.5`` - | argument path: ``model/descriptor[se_e3]/rcut_smth`` - - Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth` - - .. _`model/descriptor[se_e3]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[10, 20, 40]`` - | argument path: ``model/descriptor[se_e3]/neuron`` - - Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. - - .. _`model/descriptor[se_e3]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/descriptor[se_e3]/activation_function`` - - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/descriptor[se_e3]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e3]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/descriptor[se_e3]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/descriptor[se_e3]/precision`` - - The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/descriptor[se_e3]/trainable`: - - trainable: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/descriptor[se_e3]/trainable`` - - If the parameters in the embedding net are trainable - - .. _`model/descriptor[se_e3]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/descriptor[se_e3]/seed`` - - Random seed for parameter initialization - - .. _`model/descriptor[se_e3]/set_davg_zero`: - - set_davg_zero: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_e3]/set_davg_zero`` - - Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used - - - .. _`model/descriptor[se_a_tpe]`: - - When |flag:model/descriptor/type|_ is set to ``se_a_tpe`` (or its alias ``se_a_ebd``): - - .. _`model/descriptor[se_a_tpe]/sel`: - - sel: - | type: ``list`` | ``str``, optional, default: ``auto`` - | argument path: ``model/descriptor[se_a_tpe]/sel`` - - This parameter set the number of selected neighbors for each type of atom. It can be: - - - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment. - - - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1". - - .. _`model/descriptor[se_a_tpe]/rcut`: - - rcut: - | type: ``float``, optional, default: ``6.0`` - | argument path: ``model/descriptor[se_a_tpe]/rcut`` - - The cut-off radius. - - .. _`model/descriptor[se_a_tpe]/rcut_smth`: - - rcut_smth: - | type: ``float``, optional, default: ``0.5`` - | argument path: ``model/descriptor[se_a_tpe]/rcut_smth`` - - Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth` - - .. _`model/descriptor[se_a_tpe]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[10, 20, 40]`` - | argument path: ``model/descriptor[se_a_tpe]/neuron`` - - Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built. - - .. _`model/descriptor[se_a_tpe]/axis_neuron`: - - axis_neuron: - | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron* - | argument path: ``model/descriptor[se_a_tpe]/axis_neuron`` - - Size of the submatrix of G (embedding matrix). - - .. _`model/descriptor[se_a_tpe]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/descriptor[se_a_tpe]/activation_function`` - - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/descriptor[se_a_tpe]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_a_tpe]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/descriptor[se_a_tpe]/type_one_side`: - - type_one_side: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_a_tpe]/type_one_side`` - - Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets - - .. _`model/descriptor[se_a_tpe]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/descriptor[se_a_tpe]/precision`` - - The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/descriptor[se_a_tpe]/trainable`: - - trainable: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/descriptor[se_a_tpe]/trainable`` - - If the parameters in the embedding net is trainable - - .. _`model/descriptor[se_a_tpe]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/descriptor[se_a_tpe]/seed`` - - Random seed for parameter initialization - - .. _`model/descriptor[se_a_tpe]/exclude_types`: - - exclude_types: - | type: ``list``, optional, default: ``[]`` - | argument path: ``model/descriptor[se_a_tpe]/exclude_types`` - - The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1. - - .. _`model/descriptor[se_a_tpe]/set_davg_zero`: - - set_davg_zero: - | type: ``bool``, optional, default: ``False`` - | argument path: ``model/descriptor[se_a_tpe]/set_davg_zero`` - - Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used - - .. _`model/descriptor[se_a_tpe]/type_nchanl`: - - type_nchanl: - | type: ``int``, optional, default: ``4`` - | argument path: ``model/descriptor[se_a_tpe]/type_nchanl`` - - number of channels for type embedding - - .. _`model/descriptor[se_a_tpe]/type_nlayer`: - - type_nlayer: - | type: ``int``, optional, default: ``2`` - | argument path: ``model/descriptor[se_a_tpe]/type_nlayer`` - - number of hidden layers of type embedding net - - .. _`model/descriptor[se_a_tpe]/numb_aparam`: - - numb_aparam: - | type: ``int``, optional, default: ``0`` - | argument path: ``model/descriptor[se_a_tpe]/numb_aparam`` - - dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded. - - - .. _`model/descriptor[hybrid]`: - - When |flag:model/descriptor/type|_ is set to ``hybrid``: - - .. _`model/descriptor[hybrid]/list`: - - list: - | type: ``list`` - | argument path: ``model/descriptor[hybrid]/list`` - - A list of descriptor definitions - - .. _`model/fitting_net`: - - fitting_net: - | type: ``dict`` - | argument path: ``model/fitting_net`` - - The fitting of physical properties. - - - Depending on the value of *type*, different sub args are accepted. - - .. _`model/fitting_net/type`: - - type: - | type: ``str`` (flag key), default: ``ener`` - | argument path: ``model/fitting_net/type`` - | possible choices: |code:model/fitting_net[ener]|_, |code:model/fitting_net[dipole]|_, |code:model/fitting_net[polar]|_ - - The type of the fitting. See explanation below. - - - `ener`: Fit an energy model (potential energy surface). - - - `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter. - - - `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter. - - - - .. |code:model/fitting_net[ener]| replace:: ``ener`` - .. _`code:model/fitting_net[ener]`: `model/fitting_net[ener]`_ - .. |code:model/fitting_net[dipole]| replace:: ``dipole`` - .. _`code:model/fitting_net[dipole]`: `model/fitting_net[dipole]`_ - .. |code:model/fitting_net[polar]| replace:: ``polar`` - .. _`code:model/fitting_net[polar]`: `model/fitting_net[polar]`_ - - .. |flag:model/fitting_net/type| replace:: *type* - .. _`flag:model/fitting_net/type`: `model/fitting_net/type`_ - - - .. _`model/fitting_net[ener]`: - - When |flag:model/fitting_net/type|_ is set to ``ener``: - - .. _`model/fitting_net[ener]/numb_fparam`: - - numb_fparam: - | type: ``int``, optional, default: ``0`` - | argument path: ``model/fitting_net[ener]/numb_fparam`` - - The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams. - - .. _`model/fitting_net[ener]/numb_aparam`: - - numb_aparam: - | type: ``int``, optional, default: ``0`` - | argument path: ``model/fitting_net[ener]/numb_aparam`` - - The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams. - - .. _`model/fitting_net[ener]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron* - | argument path: ``model/fitting_net[ener]/neuron`` - - The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built. - - .. _`model/fitting_net[ener]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/fitting_net[ener]/activation_function`` - - The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/fitting_net[ener]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/fitting_net[ener]/precision`` - - The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/fitting_net[ener]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[ener]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/fitting_net[ener]/trainable`: - - trainable: - | type: ``list`` | ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[ener]/trainable`` - - Whether the parameters in the fitting net are trainable. This option can be - - - bool: True if all parameters of the fitting net are trainable, False otherwise. - - - list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1. - - .. _`model/fitting_net[ener]/rcond`: - - rcond: - | type: ``float``, optional, default: ``0.001`` - | argument path: ``model/fitting_net[ener]/rcond`` - - The condition number used to determine the inital energy shift for each type of atoms. - - .. _`model/fitting_net[ener]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/fitting_net[ener]/seed`` - - Random seed for parameter initialization of the fitting net - - .. _`model/fitting_net[ener]/atom_ener`: - - atom_ener: - | type: ``list``, optional, default: ``[]`` - | argument path: ``model/fitting_net[ener]/atom_ener`` - - Specify the atomic energy in vacuum for each type - - - .. _`model/fitting_net[dipole]`: - - When |flag:model/fitting_net/type|_ is set to ``dipole``: - - .. _`model/fitting_net[dipole]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron* - | argument path: ``model/fitting_net[dipole]/neuron`` - - The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built. - - .. _`model/fitting_net[dipole]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/fitting_net[dipole]/activation_function`` - - The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/fitting_net[dipole]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[dipole]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/fitting_net[dipole]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/fitting_net[dipole]/precision`` - - The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/fitting_net[dipole]/sel_type`: - - sel_type: - | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *dipole_type* - | argument path: ``model/fitting_net[dipole]/sel_type`` - - The atom types for which the atomic dipole will be provided. If not set, all types will be selected. - - .. _`model/fitting_net[dipole]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/fitting_net[dipole]/seed`` - - Random seed for parameter initialization of the fitting net - - - .. _`model/fitting_net[polar]`: - - When |flag:model/fitting_net/type|_ is set to ``polar``: - - .. _`model/fitting_net[polar]/neuron`: - - neuron: - | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron* - | argument path: ``model/fitting_net[polar]/neuron`` - - The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built. - - .. _`model/fitting_net[polar]/activation_function`: - - activation_function: - | type: ``str``, optional, default: ``tanh`` - | argument path: ``model/fitting_net[polar]/activation_function`` - - The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". - - .. _`model/fitting_net[polar]/resnet_dt`: - - resnet_dt: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[polar]/resnet_dt`` - - Whether to use a "Timestep" in the skip connection - - .. _`model/fitting_net[polar]/precision`: - - precision: - | type: ``str``, optional, default: ``float64`` - | argument path: ``model/fitting_net[polar]/precision`` - - The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64". - - .. _`model/fitting_net[polar]/fit_diag`: - - fit_diag: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[polar]/fit_diag`` - - Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix. - - .. _`model/fitting_net[polar]/scale`: - - scale: - | type: ``float`` | ``list``, optional, default: ``1.0`` - | argument path: ``model/fitting_net[polar]/scale`` - - The output of the fitting net (polarizability matrix) will be scaled by ``scale`` - - .. _`model/fitting_net[polar]/shift_diag`: - - shift_diag: - | type: ``bool``, optional, default: ``True`` - | argument path: ``model/fitting_net[polar]/shift_diag`` - - Whether to shift the diagonal of polar, which is beneficial to training. Default is true. - - .. _`model/fitting_net[polar]/sel_type`: - - sel_type: - | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *pol_type* - | argument path: ``model/fitting_net[polar]/sel_type`` - - The atom types for which the atomic polarizability will be provided. If not set, all types will be selected. - - .. _`model/fitting_net[polar]/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``model/fitting_net[polar]/seed`` - - Random seed for parameter initialization of the fitting net - - .. _`model/modifier`: - - modifier: - | type: ``dict``, optional - | argument path: ``model/modifier`` - - The modifier of model output. - - - Depending on the value of *type*, different sub args are accepted. - - .. _`model/modifier/type`: - - type: - | type: ``str`` (flag key) - | argument path: ``model/modifier/type`` - | possible choices: |code:model/modifier[dipole_charge]|_ - - The type of modifier. See explanation below. - - -`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction - - .. |code:model/modifier[dipole_charge]| replace:: ``dipole_charge`` - .. _`code:model/modifier[dipole_charge]`: `model/modifier[dipole_charge]`_ - - .. |flag:model/modifier/type| replace:: *type* - .. _`flag:model/modifier/type`: `model/modifier/type`_ - - - .. _`model/modifier[dipole_charge]`: - - When |flag:model/modifier/type|_ is set to ``dipole_charge``: - - .. _`model/modifier[dipole_charge]/model_name`: - - model_name: - | type: ``str`` - | argument path: ``model/modifier[dipole_charge]/model_name`` - - The name of the frozen dipole model file. - - .. _`model/modifier[dipole_charge]/model_charge_map`: - - model_charge_map: - | type: ``list`` - | argument path: ``model/modifier[dipole_charge]/model_charge_map`` - - The charge of the WFCC. The list length should be the same as the `sel_type `_. - - .. _`model/modifier[dipole_charge]/sys_charge_map`: - - sys_charge_map: - | type: ``list`` - | argument path: ``model/modifier[dipole_charge]/sys_charge_map`` - - The charge of real atoms. The list length should be the same as the `type_map `_ - - .. _`model/modifier[dipole_charge]/ewald_beta`: - - ewald_beta: - | type: ``float``, optional, default: ``0.4`` - | argument path: ``model/modifier[dipole_charge]/ewald_beta`` - - The splitting parameter of Ewald sum. Unit is A^-1 - - .. _`model/modifier[dipole_charge]/ewald_h`: - - ewald_h: - | type: ``float``, optional, default: ``1.0`` - | argument path: ``model/modifier[dipole_charge]/ewald_h`` - - The grid spacing of the FFT grid. Unit is A - - .. _`model/compress`: - - compress: - | type: ``dict``, optional - | argument path: ``model/compress`` - - Model compression configurations - - - Depending on the value of *type*, different sub args are accepted. - - .. _`model/compress/type`: - - type: - | type: ``str`` (flag key), default: ``se_e2_a`` - | argument path: ``model/compress/type`` - | possible choices: |code:model/compress[se_e2_a]|_ - - The type of model compression, which should be consistent with the descriptor type. - - .. |code:model/compress[se_e2_a]| replace:: ``se_e2_a`` - .. _`code:model/compress[se_e2_a]`: `model/compress[se_e2_a]`_ - - .. |flag:model/compress/type| replace:: *type* - .. _`flag:model/compress/type`: `model/compress/type`_ - - - .. _`model/compress[se_e2_a]`: - - When |flag:model/compress/type|_ is set to ``se_e2_a`` (or its alias ``se_a``): - - .. _`model/compress[se_e2_a]/compress`: - - compress: - | type: ``bool`` - | argument path: ``model/compress[se_e2_a]/compress`` - - The name of the frozen model file. - - .. _`model/compress[se_e2_a]/model_file`: - - model_file: - | type: ``str`` - | argument path: ``model/compress[se_e2_a]/model_file`` - - The input model file, which will be compressed by the DeePMD-kit. - - .. _`model/compress[se_e2_a]/table_config`: - - table_config: - | type: ``list`` - | argument path: ``model/compress[se_e2_a]/table_config`` - - The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check). - - .. _`model/compress[se_e2_a]/min_nbor_dist`: - - min_nbor_dist: - | type: ``float`` - | argument path: ``model/compress[se_e2_a]/min_nbor_dist`` - - The nearest distance between neighbor atoms saved in the frozen model. - - -.. _`loss`: - -loss: - | type: ``dict``, optional - | argument path: ``loss`` - - The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset. - \. - - - Depending on the value of *type*, different sub args are accepted. - - .. _`loss/type`: - - type: - | type: ``str`` (flag key), default: ``ener`` - | argument path: ``loss/type`` - | possible choices: |code:loss[ener]|_, |code:loss[tensor]|_ - - The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`. - \. - - .. |code:loss[ener]| replace:: ``ener`` - .. _`code:loss[ener]`: `loss[ener]`_ - .. |code:loss[tensor]| replace:: ``tensor`` - .. _`code:loss[tensor]`: `loss[tensor]`_ - - .. |flag:loss/type| replace:: *type* - .. _`flag:loss/type`: `loss/type`_ - - - .. _`loss[ener]`: - - When |flag:loss/type|_ is set to ``ener``: - - .. _`loss[ener]/start_pref_e`: - - start_pref_e: - | type: ``float`` | ``int``, optional, default: ``0.02`` - | argument path: ``loss[ener]/start_pref_e`` - - The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_energy and limit_pref_energy are set to 0, then the energy will be ignored. - - .. _`loss[ener]/limit_pref_e`: - - limit_pref_e: - | type: ``float`` | ``int``, optional, default: ``1.0`` - | argument path: ``loss[ener]/limit_pref_e`` - - The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity. - - .. _`loss[ener]/start_pref_f`: - - start_pref_f: - | type: ``float`` | ``int``, optional, default: ``1000`` - | argument path: ``loss[ener]/start_pref_f`` - - The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_force and limit_pref_force are set to 0, then the force will be ignored. - - .. _`loss[ener]/limit_pref_f`: - - limit_pref_f: - | type: ``float`` | ``int``, optional, default: ``1.0`` - | argument path: ``loss[ener]/limit_pref_f`` - - The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity. - - .. _`loss[ener]/start_pref_v`: - - start_pref_v: - | type: ``float`` | ``int``, optional, default: ``0.0`` - | argument path: ``loss[ener]/start_pref_v`` - - The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_virial and limit_pref_virial are set to 0, then the virial will be ignored. - - .. _`loss[ener]/limit_pref_v`: - - limit_pref_v: - | type: ``float`` | ``int``, optional, default: ``0.0`` - | argument path: ``loss[ener]/limit_pref_v`` - - The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity. - - .. _`loss[ener]/start_pref_ae`: - - start_pref_ae: - | type: ``float`` | ``int``, optional, default: ``0.0`` - | argument path: ``loss[ener]/start_pref_ae`` - - The prefactor of atom_ener loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the atom_ener label should be provided by file atom_ener.npy in each data system. If both start_pref_atom_ener and limit_pref_atom_ener are set to 0, then the atom_ener will be ignored. - - .. _`loss[ener]/limit_pref_ae`: - - limit_pref_ae: - | type: ``float`` | ``int``, optional, default: ``0.0`` - | argument path: ``loss[ener]/limit_pref_ae`` - - The prefactor of atom_ener loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity. - - .. _`loss[ener]/relative_f`: - - relative_f: - | type: ``float`` | ``NoneType``, optional - | argument path: ``loss[ener]/relative_f`` - - If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label. - - - .. _`loss[tensor]`: - - When |flag:loss/type|_ is set to ``tensor``: - - .. _`loss[tensor]/pref`: - - pref: - | type: ``float`` | ``int`` - | argument path: ``loss[tensor]/pref`` - - The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included. - - .. _`loss[tensor]/pref_atomic`: - - pref_atomic: - | type: ``float`` | ``int`` - | argument path: ``loss[tensor]/pref_atomic`` - - The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0. - - -.. _`learning_rate`: - -learning_rate: - | type: ``dict`` - | argument path: ``learning_rate`` - - The definitio of learning rate - - - Depending on the value of *type*, different sub args are accepted. - - .. _`learning_rate/type`: - - type: - | type: ``str`` (flag key), default: ``exp`` - | argument path: ``learning_rate/type`` - | possible choices: |code:learning_rate[exp]|_ - - The type of the learning rate. - - .. |code:learning_rate[exp]| replace:: ``exp`` - .. _`code:learning_rate[exp]`: `learning_rate[exp]`_ - - .. |flag:learning_rate/type| replace:: *type* - .. _`flag:learning_rate/type`: `learning_rate/type`_ - - - .. _`learning_rate[exp]`: - - When |flag:learning_rate/type|_ is set to ``exp``: - - .. _`learning_rate[exp]/start_lr`: - - start_lr: - | type: ``float``, optional, default: ``0.001`` - | argument path: ``learning_rate[exp]/start_lr`` - - The learning rate the start of the training. - - .. _`learning_rate[exp]/stop_lr`: - - stop_lr: - | type: ``float``, optional, default: ``1e-08`` - | argument path: ``learning_rate[exp]/stop_lr`` - - The desired learning rate at the end of the training. - - .. _`learning_rate[exp]/decay_steps`: - - decay_steps: - | type: ``int``, optional, default: ``5000`` - | argument path: ``learning_rate[exp]/decay_steps`` - - The learning rate is decaying every this number of training steps. - - -.. _`training`: - -training: - | type: ``dict`` - | argument path: ``training`` - - The training options. - - .. _`training/training_data`: - - training_data: - | type: ``dict`` - | argument path: ``training/training_data`` - - Configurations of training data. - - .. _`training/training_data/systems`: - - systems: - | type: ``list`` | ``str`` - | argument path: ``training/training_data/systems`` - - The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated. - - .. _`training/training_data/set_prefix`: - - set_prefix: - | type: ``str``, optional, default: ``set`` - | argument path: ``training/training_data/set_prefix`` - - The prefix of the sets in the `systems `_. - - .. _`training/training_data/batch_size`: - - batch_size: - | type: ``list`` | ``int`` | ``str``, optional, default: ``auto`` - | argument path: ``training/training_data/batch_size`` - - This key can be - - - list: the length of which is the same as the `systems `_. The batch size of each system is given by the elements of the list. - - - int: all `systems `_ use the same batch size. - - - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32. - - - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N. - - .. _`training/training_data/auto_prob`: - - auto_prob: - | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style* - | argument path: ``training/training_data/auto_prob`` - - Determine the probability of systems automatically. The method is assigned by this key and can be - - - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems() - - - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system - - - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system. - - .. _`training/training_data/sys_probs`: - - sys_probs: - | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights* - | argument path: ``training/training_data/sys_probs`` - - A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system. - - .. _`training/validation_data`: - - validation_data: - | type: ``dict`` | ``NoneType``, optional, default: ``None`` - | argument path: ``training/validation_data`` - - Configurations of validation data. Similar to that of training data, except that a `numb_btch` argument may be configured - - .. _`training/validation_data/systems`: - - systems: - | type: ``list`` | ``str`` - | argument path: ``training/validation_data/systems`` - - The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated. - - .. _`training/validation_data/set_prefix`: - - set_prefix: - | type: ``str``, optional, default: ``set`` - | argument path: ``training/validation_data/set_prefix`` - - The prefix of the sets in the `systems `_. - - .. _`training/validation_data/batch_size`: - - batch_size: - | type: ``list`` | ``int`` | ``str``, optional, default: ``auto`` - | argument path: ``training/validation_data/batch_size`` - - This key can be - - - list: the length of which is the same as the `systems `_. The batch size of each system is given by the elements of the list. - - - int: all `systems `_ use the same batch size. - - - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32. - - - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N. - - .. _`training/validation_data/auto_prob`: - - auto_prob: - | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style* - | argument path: ``training/validation_data/auto_prob`` - - Determine the probability of systems automatically. The method is assigned by this key and can be - - - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems() - - - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system - - - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system. - - .. _`training/validation_data/sys_probs`: - - sys_probs: - | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights* - | argument path: ``training/validation_data/sys_probs`` - - A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system. - - .. _`training/validation_data/numb_btch`: - - numb_btch: - | type: ``int``, optional, default: ``1``, alias: *numb_batch* - | argument path: ``training/validation_data/numb_btch`` - - An integer that specifies the number of systems to be sampled for each validation period. - - .. _`training/numb_steps`: - - numb_steps: - | type: ``int``, alias: *stop_batch* - | argument path: ``training/numb_steps`` - - Number of training batch. Each training uses one batch of data. - - .. _`training/seed`: - - seed: - | type: ``int`` | ``NoneType``, optional - | argument path: ``training/seed`` - - The random seed for getting frames from the training data set. - - .. _`training/disp_file`: - - disp_file: - | type: ``str``, optional, default: ``lcurve.out`` - | argument path: ``training/disp_file`` - - The file for printing learning curve. - - .. _`training/disp_freq`: - - disp_freq: - | type: ``int``, optional, default: ``1000`` - | argument path: ``training/disp_freq`` - - The frequency of printing learning curve. - - .. _`training/numb_test`: - - numb_test: - | type: ``list`` | ``int`` | ``str``, optional, default: ``1`` - | argument path: ``training/numb_test`` - - Number of frames used for the test during training. - - .. _`training/save_freq`: - - save_freq: - | type: ``int``, optional, default: ``1000`` - | argument path: ``training/save_freq`` - - The frequency of saving check point. - - .. _`training/save_ckpt`: - - save_ckpt: - | type: ``str``, optional, default: ``model.ckpt`` - | argument path: ``training/save_ckpt`` - - The file name of saving check point. - - .. _`training/disp_training`: - - disp_training: - | type: ``bool``, optional, default: ``True`` - | argument path: ``training/disp_training`` - - Displaying verbose information during training. - - .. _`training/time_training`: - - time_training: - | type: ``bool``, optional, default: ``True`` - | argument path: ``training/time_training`` - - Timing durining training. - - .. _`training/profiling`: - - profiling: - | type: ``bool``, optional, default: ``False`` - | argument path: ``training/profiling`` - - Profiling during training. - - .. _`training/profiling_file`: - - profiling_file: - | type: ``str``, optional, default: ``timeline.json`` - | argument path: ``training/profiling_file`` - - Output file for profiling. - - .. _`training/tensorboard`: - - tensorboard: - | type: ``bool``, optional, default: ``False`` - | argument path: ``training/tensorboard`` - - Enable tensorboard - - .. _`training/tensorboard_log_dir`: - - tensorboard_log_dir: - | type: ``str``, optional, default: ``log`` - | argument path: ``training/tensorboard_log_dir`` - - The log directory of tensorboard outputs - - .. _`training/tensorboard_freq`: - - tensorboard_freq: - | type: ``int``, optional, default: ``1`` - | argument path: ``training/tensorboard_freq`` - - The frequency of writing tensorboard events. diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md index ebc7cda2c9..011db0bf9f 100644 --- a/doc/train/finetuning.md +++ b/doc/train/finetuning.md @@ -1,4 +1,8 @@ -# Finetune the pretrained model +# Finetune the pretrained model {{ tensorflow_icon }} {{ pytorch_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }} +::: Pretraining-and-finetuning is a widely used approach in other fields such as Computer Vision (CV) or Natural Language Processing (NLP) to vastly reduce the training cost, while it's not trivial in potential models. @@ -32,6 +36,7 @@ such as {ref}`descriptor `, {ref}`fitting_net ` part in `input.json` to perform finetuning: + ```json "model": { "type_map": ["O", "H"], diff --git a/doc/train/gpu-limitations.md b/doc/train/gpu-limitations.md index 5df76d28c9..92577fd65c 100644 --- a/doc/train/gpu-limitations.md +++ b/doc/train/gpu-limitations.md @@ -1,5 +1,7 @@ -# Known limitations of using GPUs +# Known limitations of using GPUs {{ tensorflow_icon }} + If you use DeePMD-kit in a GPU environment, the acceptable value range of some variables is additionally restricted compared to the CPU environment due to the software's GPU implementations: + 1. The number of atom types of a given system must be less than 128. 2. The maximum distance between an atom and its neighbors must be less than 128. It can be controlled by setting the rcut value of training parameters. 3. Theoretically, the maximum number of atoms that a single GPU can accept is about 10,000,000. However, this value is limited by the GPU memory size currently, usually within 1000,000 atoms even in the model compression mode. diff --git a/doc/train/index.md b/doc/train/index.md deleted file mode 100644 index f37c1a55ce..0000000000 --- a/doc/train/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Training - -- [Training a model](training.md) -- [Advanced options](training-advanced.md) -- [Parallel training](parallel-training.md) -- [multi-task training](multi-task-training.md) -- [TensorBoard Usage](tensorboard.md) -- [Known limitations of using GPUs](gpu-limitations.md) -- [Training Parameters](../train-input-auto.rst) -- [Finetuning the Pretrained Model](finetuning.md) diff --git a/doc/train/index.rst b/doc/train/index.rst index 92e84b3000..78ee31e5cb 100644 --- a/doc/train/index.rst +++ b/doc/train/index.rst @@ -8,7 +8,8 @@ Training training-advanced train-input parallel-training - multi-task-training + multi-task-training-tf + multi-task-training-pt tensorboard gpu-limitations finetuning diff --git a/doc/train/multi-task-training-pt.md b/doc/train/multi-task-training-pt.md new file mode 100644 index 0000000000..284ecf9a27 --- /dev/null +++ b/doc/train/multi-task-training-pt.md @@ -0,0 +1,86 @@ +# Multi-task training {{ pytorch_icon }} + +:::{note} +**Supported backends**: PyTorch {{ pytorch_icon }} +::: + + + +## Theory + +The multi-task training process can simultaneously handle different datasets with properties that cannot be fitted in one network (e.g. properties from DFT calculations under different exchange-correlation functionals or different basis sets). +These datasets are denoted by $\boldsymbol x^{(1)}, \dots, \boldsymbol x^{(n_t)}$. +For each dataset, a training task is defined as + +```math + \min_{\boldsymbol \theta} L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol \theta^{(t)}, \tau), \quad t=1, \dots, n_t. +``` + +In the Pytorch implementation, during the multi-task training process, all tasks can share any portion of the model parameters. +A typical scenario is that each task shares the same descriptor with trainable parameters $\boldsymbol{\theta}_ {d}$, while each has its own fitting network with trainable parameters $\boldsymbol{\theta}_ f^{(t)}$, thus +$\boldsymbol{\theta}^{(t)} = \{ \boldsymbol{\theta}_ {d} , \boldsymbol{\theta}_ {f}^{(t)} \}$. +At each training step, a task will be randomly selected from ${1, \dots, n_t}$ according to the user-specified probability, +and the Adam optimizer is executed to minimize $L^{(t)}$ for one step to update the parameter $\boldsymbol \theta^{(t)}$. +In the case of multi-GPU parallel training, different GPUs will independently select their tasks. +In the DPA-2 model, this multi-task training framework is adopted.[^1] + +[^1] Duo Zhang, Xinzijian Liu, Xiangyu Zhang, Chengqian Zhang, Chun Cai, Hangrui Bi, Yiming Du, Xuejian Qin, Jiameng Huang, Bowen Li, Yifan Shan, Jinzhe Zeng, Yuzhi Zhang, Siyuan Liu, Yifan Li, Junhan Chang, Xinyan Wang, Shuo Zhou, Jianchuan Liu, Xiaoshan Luo, Zhenyu Wang, Wanrun Jiang, Jing Wu, Yudi Yang, Jiyuan Yang, Manyi Yang, Fu-Qiang Gong, Linshuang Zhang, Mengchao Shi, Fu-Zhi Dai, Darrin M. York, Shi Liu, Tong Zhu, Zhicheng Zhong, Jian Lv, Jun Cheng, Weile Jia, Mohan Chen, Guolin Ke, Weinan E, Linfeng Zhang, Han Wang,[arXiv preprint arXiv:2312.15492 (2023)](https://arxiv.org/abs/2312.15492) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). + +Compared with the previous TensorFlow implementation, the new support in PyTorch is more flexible and efficient. +In particular, it makes multi-GPU parallel training and even tasks beyond DFT possible, +enabling larger-scale and more general multi-task training to obtain more general pre-trained models. + +## Perform the multi-task training using PyTorch + +Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode, +typically with one common descriptor and multiple specific fitting nets for each data set. +To proceed, one need to change the representation of the model definition in the input script. +The core idea is to replace the previous single model definition {ref}`model ` with multiple model definitions {ref}`model/model_dict/model_key `, +define the shared parameters of the model part {ref}`shared_dict `, and then expand other parts for multi-model settings. +Specifically, there are several parts that need to be modified: + +- {ref}`model/shared_dict `: The parameter definition of the shared part, including various descriptors, + type maps (or even fitting nets can be shared). Each module can be defined with a user-defined `part_key`, such as `my_descriptor`. + The content needs to align with the corresponding definition in the single-task training model component, such as the definition of the descriptor. + +- {ref}`model/model_dict `: The core definition of the model part and the explanation of sharing rules, + starting with user-defined model name keys `model_key`, such as `my_model_1`. + Each model part needs to align with the components of the single-task training {ref}`model `, but with the following sharing rules: +- - If you want to share the current model component with other tasks, which should be part of the {ref}`model/shared_dict `, + you can directly fill in the corresponding `part_key`, such as + `"descriptor": "my_descriptor", ` + to replace the previous detailed parameters. Here, you can also specify the shared_level, such as + `"descriptor": "my_descriptor:shared_level", ` + and use the user-defined integer `shared_level` in the code to share the corresponding module to varying degrees + (default is to share all parameters, i.e., `shared_level`=0). + The parts that are exclusive to each model can be written following the previous definition. + +- {ref}`loss_dict `: The loss settings corresponding to each task model, specified by the `model_key`. + Each {ref}`loss_dict/model_key ` contains the corresponding loss settings, + which are the same as the definition in single-task training {ref}``. + +- {ref}`training/data_dict `: The data settings corresponding to each task model, specified by the `model_key`. + Each `training/data_dict/model_key` contains the corresponding `training_data` and `validation_data` settings, + which are the same as the definition in single-task training {ref}`training_data ` and {ref}`validation_data `. + +- (Optional) {ref}`training/model_prob `: The sampling weight settings corresponding to each `model_key`, i.e., the probability weight in the training step. + You can specify any positive real number weight for each task. The higher the weight, the higher the probability of being sampled in each training. + This setting is optional, and if not set, tasks will be sampled with equal weights. + +An example input for multi-task training two models in water system is shown as following: + +```{literalinclude} ../../examples/water_multi_task/pytorch_example/input_torch.json +:language: json +:linenos: +``` + +## Finetune from the pretrained multi-task model + +To finetune based on the checkpoint `model.pt` after the multi-task pre-training is completed, +users only need to prepare the normal input for single-task training `input_single.json`, +and then select one of the trained model's task names `model_key`. +Run the following command: + +```bash +$ dp --pt train input_single.json --finetune model.pt --model-branch model_key +``` diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training-tf.md similarity index 76% rename from doc/train/multi-task-training.md rename to doc/train/multi-task-training-tf.md index c647e6905e..0f745958eb 100644 --- a/doc/train/multi-task-training.md +++ b/doc/train/multi-task-training-tf.md @@ -1,10 +1,17 @@ -# Multi-task training +# Multi-task training {{ tensorflow_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }} +::: + + ## Theory The multi-task training process can simultaneously handle different datasets with properties that cannot be fitted in one network (e.g. properties from DFT calculations under different exchange-correlation functionals or different basis sets). These datasets are denoted by $\boldsymbol x^{(1)}, \dots, \boldsymbol x^{(n_t)}$. For each dataset, a training task is defined as + ```math \min_{\boldsymbol \theta} L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol \theta^{(t)}, \tau), \quad t=1, \dots, n_t. ``` @@ -15,24 +22,26 @@ At each training step, a task is randomly picked from ${1, \dots, n_t}$, and the If different fitting networks have the same architecture, they can share the parameters of some layers to improve training efficiency.[^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ## Perform the multi-task training + Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode, with one common descriptor and multiple specific fitting nets for each data set. One can simply switch the following parameters in training input script to perform multi-task mode: + - {ref}`fitting_net ` --> {ref}`fitting_net_dict `, -each key of which can be one individual fitting net. -- {ref}`training_data `, {ref}`validation_data ` ---> {ref}`data_dict `, each key of which can be one individual data set contains -several data systems for corresponding fitting net, the keys must be consistent with those in -{ref}`fitting_net_dict `. + each key of which can be one individual fitting net. +- {ref}`training_data `, {ref}`validation_data ` + --> {ref}`data_dict `, each key of which can be one individual data set contains + several data systems for corresponding fitting net, the keys must be consistent with those in + {ref}`fitting_net_dict `. - {ref}`loss ` --> {ref}`loss_dict `, each key of which can be one individual loss setting -for corresponding fitting net, the keys must be consistent with those in -{ref}`fitting_net_dict `, if not set, the corresponding fitting net will use the default loss. + for corresponding fitting net, the keys must be consistent with those in + {ref}`fitting_net_dict `, if not set, the corresponding fitting net will use the default loss. - (Optional) {ref}`fitting_weight `, each key of which can be a non-negative integer or float, -deciding the chosen probability for corresponding fitting net in training, if not set or invalid, -the corresponding fitting net will not be used. + deciding the chosen probability for corresponding fitting net in training, if not set or invalid, + the corresponding fitting net will not be used. The training procedure will automatically choose single-task or multi-task mode, based on the above parameters. Note that parameters of single-task mode and multi-task mode can not be mixed. @@ -40,6 +49,7 @@ Note that parameters of single-task mode and multi-task mode can not be mixed. An example input for training energy and dipole in water system can be found here: [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json). The supported descriptors for multi-task mode are listed: + - {ref}`se_a (se_e2_a) ` - {ref}`se_r (se_e2_r) ` - {ref}`se_at (se_e3) ` @@ -48,6 +58,7 @@ The supported descriptors for multi-task mode are listed: - {ref}`hybrid ` The supported fitting nets for multi-task mode are listed: + - {ref}`ener ` - {ref}`dipole ` - {ref}`polar ` @@ -55,12 +66,14 @@ The supported fitting nets for multi-task mode are listed: The output of `dp freeze` command in multi-task mode can be seen in [freeze command](../freeze/freeze.md). ## Initialization from pretrained multi-task model + For advance training in multi-task mode, one can first train the descriptor on several upstream datasets and then transfer it on new downstream ones with newly added fitting nets. At the second step, you can also inherit some fitting nets trained on upstream datasets, by merely adding fitting net keys in {ref}`fitting_net_dict ` and optional fitting net weights in {ref}`fitting_weight `. Take [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json) again for example. You can first train a multi-task model using input script with the following {ref}`model ` part: + ```json "model": { "type_map": ["O", "H"], @@ -70,25 +83,30 @@ You can first train a multi-task model using input script with the following {re "rcut_smth": 0.5, "rcut": 6.0, "neuron": [25, 50, 100], + "type_one_side": true }, "fitting_net_dict": { "water_dipole": { "type": "dipole", - "neuron": [100, 100, 100], + "neuron": [100, 100, 100] }, "water_ener": { "neuron": [240, 240, 240], - "resnet_dt": true, + "resnet_dt": true } }, } ``` + After training, you can freeze this multi-task model into one unit graph: + ```bash $ dp freeze -o graph.pb --united-model ``` + Then if you want to transfer the trained descriptor and some fitting nets (take `water_ener` for example) to newly added datasets with new fitting net `water_ener_2`, you can modify the {ref}`model ` part of the new input script in a more simplified way: + ```json "model": { "type_map": ["O", "H"], @@ -102,12 +120,14 @@ you can modify the {ref}`model ` part of the new input script in a more s }, } ``` + It will autocomplete the configurations according to the frozen graph. Note that for newly added fitting net keys, other parts in the input script, including {ref}`data_dict ` and {ref}`loss_dict ` (optionally {ref}`fitting_weight `), should be set explicitly. While for old fitting net keys, it will inherit the old configurations if not set. Finally, you can perform the modified multi-task training from the frozen model with command: + ```bash $ dp train input.json --init_frz_model graph.pb ``` @@ -119,6 +139,7 @@ In this situation, one can set {ref}`model/fitting_net[ener]/layer_name>` to sha The architecture of the layers with the same name should be the same. For example, if one want to share the first and the third layers for two three-hidden-layer fitting networks, the following parameters should be set. + ```json "fitting_net_dict": { "ccsd": { diff --git a/doc/train/parallel-training.md b/doc/train/parallel-training.md index 98d12f2b9b..9ea92b4751 100644 --- a/doc/train/parallel-training.md +++ b/doc/train/parallel-training.md @@ -1,15 +1,22 @@ -# Parallel training +# Parallel training {{ tensorflow_icon }} {{ pytorch_icon }} -Currently, parallel training is enabled in a synchronized way with help of [Horovod](https://github.com/horovod/horovod). +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }} +::: + +## TensorFlow Implementation {{ tensorflow_icon }} + +Currently, parallel training in tensorflow version is enabled in a synchronized way with help of [Horovod](https://github.com/horovod/horovod). Depending on the number of training processes (according to MPI context) and the number of GPU cards available, DeePMD-kit will decide whether to launch the training in parallel (distributed) mode or in serial mode. Therefore, no additional options are specified in your JSON/YAML input file. -## Tuning learning rate +### Tuning learning rate Horovod works in the data-parallel mode, resulting in a larger global batch size. For example, the real batch size is 8 when {ref}`batch_size ` is set to 2 in the input file and you launch 4 workers. Thus, {ref}`learning_rate ` is automatically scaled by the number of workers for better convergence. Technical details of such heuristic rule are discussed at [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). The number of decay steps required to achieve the same accuracy can decrease by the number of cards (e.g., 1/2 of steps in the above case), but needs to be scaled manually in the input file. In some cases, it won't work well when scaling the learning rate by worker count in a `linear` way. Then you can try `sqrt` or `none` by setting argument {ref}`scale_by_worker ` like below. + ```json "learning_rate" :{ "scale_by_worker": "none", @@ -17,18 +24,18 @@ In some cases, it won't work well when scaling the learning rate by worker count } ``` -## Scaling test +### Scaling test Testing `examples/water/se_e2_a` on an 8-GPU host, linear acceleration can be observed with the increasing number of cards. | Num of GPU cards | Seconds every 100 samples | Samples per second | Speed up | -| -- | -- | -- | -- | -| 1 | 1.4515 | 68.89 | 1.00 | -| 2 | 1.5962 | 62.65*2 | 1.82 | -| 4 | 1.7635 | 56.71*4 | 3.29 | -| 8 | 1.7267 | 57.91*8 | 6.72 | +| ---------------- | ------------------------- | ------------------ | -------- | +| 1 | 1.4515 | 68.89 | 1.00 | +| 2 | 1.5962 | 62.65\*2 | 1.82 | +| 4 | 1.7635 | 56.71\*4 | 3.29 | +| 8 | 1.7267 | 57.91\*8 | 6.72 | -## How to use +### How to use Training workers can be launched with `horovodrun`. The following command launches 4 processes on the same host: @@ -42,13 +49,16 @@ Need to mention, the environment variable `CUDA_VISIBLE_DEVICES` must be set to To maximize the performance, one should follow [FAQ: How to control the parallelism of a job](../troubleshooting/howtoset_num_nodes.md) to control the number of threads. When using MPI with Horovod, `horovodrun` is a simple wrapper around `mpirun`. In the case where fine-grained control over options is passed to `mpirun`, [`mpirun` can be invoked directly](https://horovod.readthedocs.io/en/stable/mpi_include.html), and it will be detected automatically by Horovod, e.g., + ```bash CUDA_VISIBLE_DEVICES=4,5,6,7 mpirun -l -launcher=fork -hosts=localhost -np 4 \ dp train --mpi-log=workers input.json ``` + this is sometimes necessary for an HPC environment. Whether distributed workers are initiated can be observed in the "Summary of the training" section in the log (`world size` > 1, and `distributed`). + ``` [0] DEEPMD INFO ---Summary of the training--------------------------------------- [0] DEEPMD INFO distributed @@ -64,9 +74,10 @@ Whether distributed workers are initiated can be observed in the "Summary of the [0] DEEPMD INFO ----------------------------------------------------------------- ``` -## Logging +### Logging What's more, 2 command-line arguments are defined to control the logging behavior when performing parallel training with MPI. + ``` optional arguments: -l LOG_PATH, --log-path LOG_PATH @@ -80,3 +91,99 @@ optional arguments: means each process will output its own log (default: master) ``` + +## PyTorch Implementation {{ pytorch_icon }} + +Currently, parallel training in pytorch version is implemented in the form of PyTorch Distributed Data Parallelism [DDP](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html). +DeePMD-kit will decide whether to launch the training in parallel (distributed) mode or in serial mode depending on your execution command. + +### Dataloader and Dataset + +One of the major differences between two backends during training is that the PyTorch version employs a multi-threaded data loading utility [DataLoader](https://pytorch.org/docs/stable/data.html). +We utilize the PyTorch framework and have designed and implemented a multiprocessing data processing and loading system called DpLoaderSet based on torch DataLoader and Dataset. + +First, we establish a DeepmdData class for each system, which is consistent with the TensorFlow version in this level. Then, we create a dataloader for each system, resulting in the same number of dataloaders as the number of systems. Next, we create a dataset for the dataloaders obtained in the previous step. This allows us to query the data for each system through this dataset, while the iteration pointers for each system are maintained by their respective dataloaders. Finally, a dataloader is created for the outermost dataset. + +We achieve custom sampling methods using a weighted sampler. The length of the sampler is set to total_batch_num \* num_workers.The parameter "num_workers" defines the number of threads involved in multi-threaded loading, which can be modified by setting the environment variable NUM_WORKERS (default: min(8, ncpus)). + +> **Note** The underlying dataloader will use a distributed sampler to ensure that each GPU receives batches with different content in parallel mode, which will use sequential sampler in serial mode. In the TensorFlow version, Horovod shuffles the dataset using different random seeds for the same purpose.. + +```mermaid +flowchart LR + + subgraph systems + subgraph system1 + direction LR + frame1[frame 1] + frame2[frame 2] + end + + subgraph system2 + direction LR + frame3[frame 3] + frame4[frame 4] + frame5[frame 5] + end + end + + subgraph dataset + dataset1[dataset 1] + dataset2[dataset 2] + end + system1 -- frames --> dataset1 + system2 --> dataset2 + + subgraph distribted sampler + ds1[distributed sampler 1] + ds2[distributed sampler 2] + end + dataset1 --> ds1 + dataset2 --> ds2 + + subgraph dataloader + dataloader1[dataloader 1] + dataloader2[dataloader 2] + end + ds1 -- mini batch --> dataloader1 + ds2 --> dataloader2 + + subgraph index[index on Rank 0] + dl11[dataloader 1, entry 1] + dl21[dataloader 2, entry 1] + dl22[dataloader 2, entry 2] + end + dataloader1 --> dl11 + dataloader2 --> dl21 + dataloader2 --> dl22 + + index -- for each step, choose 1 system --> WeightedSampler + --> dploaderset --> bufferedq[buffered queue] --> model +``` + +### How to use + +We use [`torchrun`](https://pytorch.org/docs/stable/elastic/run.html#usage) to launch a DDP training session. + +To start training with multiple GPUs in one node, set parameter `nproc_per_node` as the number of it: + +```bash +torchrun --nproc_per_node=4 --no-python dp --pt train input.json +# Not setting `nproc_per_node` uses only 1 GPU +torchrun --no-python dp --pt train input.json +``` + +To train a model with a cluster, one can manually launch the task using the commands below (usually this should be done by your job management system). Set `nnodes` as the number of available nodes, `node_rank` as the rank of the current node among all nodes (not the rank of processes!), and `nproc_per_node` as the number of available GPUs in one node. Please make sure that every node can access the rendezvous address and port (`rdzv_endpoint` in the command), and has a same amount of GPUs. + +```bash +# Running DDP on 2 nodes with 4 GPUs each +# On node 0: +torchrun --rdzv_endpoint=node0:12321 --nnodes=2 --nproc_per_node=4 --node_rank=0 --no_python dp --pt train tests/water/se_e2_a.json +# On node 1: +torchrun --rdzv_endpoint=node0:12321 --nnodes=2 --nproc_per_node=4 --node_rank=1 --no_python dp --pt train tests/water/se_e2_a.json +``` + +> **Note** Set environment variables to tune [CPU specific optimizations](https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations) in advance. + +> **Note** for developers: `torchrun` by default passes settings as environment variables [(list here)](https://pytorch.org/docs/stable/elastic/run.html#environment-variables). + +> To check forward, backward, and communication time, please set env var `TORCH_CPP_LOG_LEVEL=INFO TORCH_DISTRIBUTED_DEBUG=DETAIL`. More details can be found [here](https://pytorch.org/docs/stable/distributed.html#logging). diff --git a/doc/train/tensorboard.md b/doc/train/tensorboard.md index 4846005216..7b41c004ce 100644 --- a/doc/train/tensorboard.md +++ b/doc/train/tensorboard.md @@ -1,4 +1,8 @@ -# TensorBoard Usage +# TensorBoard Usage {{ tensorflow_icon }} {{ pytorch_icon }} + +:::{note} +**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }} +::: TensorBoard provides the visualization and tooling needed for machine learning experimentation. Full instructions for TensorBoard can be found @@ -8,10 +12,10 @@ experimentation. Full instructions for TensorBoard can be found DeePMD-kit can now use most of the interesting features enabled by TensorBoard! -* **Tracking and visualizing metrics,** such as l2_loss, l2_energy_loss and l2_force_loss -* **Visualizing the model graph** (ops and layers) -* **Viewing histograms of weights, biases, or other tensors as they change over time.** -* **Viewing summaries of trainable variables** +- **Tracking and visualizing metrics,** such as l2_loss, l2_energy_loss and l2_force_loss +- **Visualizing the model graph** (ops and layers) +- **Viewing histograms of weights, biases, or other tensors as they change over time.** +- **Viewing summaries of trainable variables** @@ -80,6 +84,7 @@ tensorboard --logdir path/to/logs ![DeePMD-kit distribution](../images/tensorboard-distribution.png) ### Viewing summaries of trainable variables + ![DeePMD-kit scalar](../images/tensorboard-scalar.png) ## Attention diff --git a/doc/train/train-input.rst b/doc/train/train-input.rst index 2a32aeb930..04e82451e4 100644 --- a/doc/train/train-input.rst +++ b/doc/train/train-input.rst @@ -4,5 +4,5 @@ Training Parameters One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_ online or hosted using the :ref:`command line interface ` :code:`dp gui`. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for furthur training. .. dargs:: - :module: deepmd.utils.argcheck + :module: deepmd.tf.utils.argcheck :func: gen_args diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md index 4940b77fa7..a0f6759256 100644 --- a/doc/train/training-advanced.md +++ b/doc/train/training-advanced.md @@ -7,21 +7,26 @@ In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.j ### Theory The learning rate $\gamma$ decays exponentially: + ```math \gamma(\tau) = \gamma^0 r ^ {\lfloor \tau/s \rfloor}, ``` + where $\tau \in \mathbb{N}$ is the index of the training step, $\gamma^0 \in \mathbb{R}$ is the learning rate at the first step, and the decay rate $r$ is given by + ```math r = {\left(\frac{\gamma^{\text{stop}}}{\gamma^0}\right )} ^{\frac{s}{\tau^{\text{stop}}}}, ``` + where $\tau^{\text{stop}} \in \mathbb{N}$, $\gamma^{\text{stop}} \in \mathbb{R}$, and $s \in \mathbb{N}$ are the stopping step, the stopping learning rate, and the decay steps, respectively, all of which are hyperparameters provided in advance. [^1] -[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). +[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/). ### Instructions The {ref}`learning_rate ` section in `input.json` is given as follows + ```json "learning_rate" :{ "type": "exp", @@ -31,17 +36,19 @@ The {ref}`learning_rate ` section in `input.json` is given as fol "_comment": "that's all" } ``` -* {ref}`start_lr ` gives the learning rate at the beginning of the training. -* {ref}`stop_lr ` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge. -* During the training, the learning rate decays exponentially from {ref}`start_lr ` to {ref}`stop_lr ` following the formula: - ``` - lr(t) = start_lr * decay_rate ^ ( t / decay_steps ) - ``` +- {ref}`start_lr ` gives the learning rate at the beginning of the training. +- {ref}`stop_lr ` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge. +- During the training, the learning rate decays exponentially from {ref}`start_lr ` to {ref}`stop_lr ` following the formula: + + ``` + lr(t) = start_lr * decay_rate ^ ( t / decay_steps ) + ``` ## Training parameters Other training parameters are given in the {ref}`training ` section. + ```json "training": { "training_data": { @@ -65,15 +72,18 @@ Other training parameters are given in the {ref}`training ` section. "save_freq": 1000 } ``` + The sections {ref}`training_data ` and {ref}`validation_data ` give the training dataset and validation dataset, respectively. Taking the training dataset for example, the keys are explained below: -* {ref}`systems ` provide paths of the training data systems. DeePMD-kit allows you to provide multiple systems with different numbers of atoms. This key can be a `list` or a `str`. - * `list`: {ref}`systems ` gives the training data systems. - * `str`: {ref}`systems ` should be a valid path. DeePMD-kit will recursively search all data systems in this path. -* At each training step, DeePMD-kit randomly picks {ref}`batch_size ` frame(s) from one of the systems. The probability of using a system is by default in proportion to the number of batches in the system. More options are available for automatically determining the probability of using systems. One can set the key {ref}`auto_prob ` to - * `"prob_uniform"` all systems are used with the same probability. - * `"prob_sys_size"` the probability of using a system is proportional to its size (number of frames). - * `"prob_sys_size; sidx_0:eidx_0:w_0; sidx_1:eidx_1:w_1;..."` the `list` of systems is divided into blocks. Block `i` has systems ranging from `sidx_i` to `eidx_i`. The probability of using a system from block `i` is proportional to `w_i`. Within one block, the probability of using a system is proportional to its size. -* An example of using `"auto_prob"` is given below. The probability of using `systems[2]` is 0.4, and the sum of the probabilities of using `systems[0]` and `systems[1]` is 0.6. If the number of frames in `systems[1]` is twice of `system[0]`, then the probability of using `system[1]` is 0.4 and that of `system[0]` is 0.2. + +- {ref}`systems ` provide paths of the training data systems. DeePMD-kit allows you to provide multiple systems with different numbers of atoms. This key can be a `list` or a `str`. + - `list`: {ref}`systems ` gives the training data systems. + - `str`: {ref}`systems ` should be a valid path. DeePMD-kit will recursively search all data systems in this path. +- At each training step, DeePMD-kit randomly picks {ref}`batch_size ` frame(s) from one of the systems. The probability of using a system is by default in proportion to the number of batches in the system. More options are available for automatically determining the probability of using systems. One can set the key {ref}`auto_prob ` to + - `"prob_uniform"` all systems are used with the same probability. + - `"prob_sys_size"` the probability of using a system is proportional to its size (number of frames). + - `"prob_sys_size; sidx_0:eidx_0:w_0; sidx_1:eidx_1:w_1;..."` the `list` of systems is divided into blocks. Block `i` has systems ranging from `sidx_i` to `eidx_i`. The probability of using a system from block `i` is proportional to `w_i`. Within one block, the probability of using a system is proportional to its size. +- An example of using `"auto_prob"` is given below. The probability of using `systems[2]` is 0.4, and the sum of the probabilities of using `systems[0]` and `systems[1]` is 0.6. If the number of frames in `systems[1]` is twice of `system[0]`, then the probability of using `system[1]` is 0.4 and that of `system[0]` is 0.2. + ```json "training_data": { "systems": ["../data_water/data_0/", "../data_water/data_1/", "../data_water/data_2/"], @@ -81,7 +91,9 @@ The sections {ref}`training_data ` and {ref}`validation_ "batch_size": "auto" } ``` -* The probability of using systems can also be specified explicitly with key {ref}`sys_probs ` which is a list having the length of the number of systems. For example + +- The probability of using systems can also be specified explicitly with key {ref}`sys_probs ` which is a list having the length of the number of systems. For example + ```json "training_data": { "systems": ["../data_water/data_0/", "../data_water/data_1/", "../data_water/data_2/"], @@ -89,34 +101,40 @@ The sections {ref}`training_data ` and {ref}`validation_ "batch_size": "auto:32" } ``` -* The key {ref}`batch_size ` specifies the number of frames used to train or validate the model in a training step. It can be set to - * `list`: the length of which is the same as the {ref}`systems`. The batch size of each system is given by the elements of the list. - * `int`: all systems use the same batch size. - * `"auto"`: the same as `"auto:32"`, see `"auto:N"` - * `"auto:N"`: automatically determines the batch size so that the {ref}`batch_size ` times the number of atoms in the system is no less than `N`. -* The key {ref}`numb_batch ` in {ref}`validate_data ` gives the number of batches of model validation. Note that the batches may not be from the same system + +- The key {ref}`batch_size ` specifies the number of frames used to train or validate the model in a training step. It can be set to + - `list`: the length of which is the same as the {ref}`systems`. The batch size of each system is given by the elements of the list. + - `int`: all systems use the same batch size. + - `"auto"`: the same as `"auto:32"`, see `"auto:N"` + - `"auto:N"`: automatically determines the batch size so that the {ref}`batch_size ` times the number of atoms in the system is no less than `N`. +- The key {ref}`numb_batch ` in {ref}`validate_data ` gives the number of batches of model validation. Note that the batches may not be from the same system The section {ref}`mixed_precision ` specifies the mixed precision settings, which will enable the mixed precision training workflow for DeePMD-kit. The keys are explained below: -* {ref}`output_prec ` precision used in the output tensors, only `float32` is supported currently. -* {ref}`compute_prec ` precision used in the computing tensors, only `float16` is supported currently. -Note there are several limitations about mixed precision training: -* Only {ref}`se_e2_a ` type descriptor is supported by the mixed precision training workflow. -* The precision of the embedding net and the fitting net are forced to be set to `float32`. + +- {ref}`output_prec ` precision used in the output tensors, only `float32` is supported currently. +- {ref}`compute_prec ` precision used in the computing tensors, only `float16` is supported currently. + Note there are several limitations about mixed precision training: +- Only {ref}`se_e2_a ` type descriptor is supported by the mixed precision training workflow. +- The precision of the embedding net and the fitting net are forced to be set to `float32`. Other keys in the {ref}`training ` section are explained below: -* {ref}`numb_steps ` The number of training steps. -* {ref}`seed ` The random seed for getting frames from the training data set. -* {ref}`disp_file ` The file for printing learning curve. -* {ref}`disp_freq ` The frequency of printing learning curve. Set in the unit of training steps -* {ref}`save_freq ` The frequency of saving checkpoint. + +- {ref}`numb_steps ` The number of training steps. +- {ref}`seed ` The random seed for getting frames from the training data set. +- {ref}`disp_file ` The file for printing learning curve. +- {ref}`disp_freq ` The frequency of printing learning curve. Set in the unit of training steps +- {ref}`save_freq ` The frequency of saving checkpoint. ## Options and environment variables Several command line options can be passed to `dp train`, which can be checked with + ```bash $ dp train --help ``` + An explanation will be provided + ``` positional arguments: INPUT the input json database @@ -146,16 +164,16 @@ To maximize the performance, one should follow [FAQ: How to control the parallel One can set other environmental variables: -| Environment variables | Allowed value | Default value | Usage | -| --------------------- | ---------------------- | ------------- | -------------------------- | -| DP_INTERFACE_PREC | `high`, `low` | `high` | Control high (double) or low (float) precision of training. | -| DP_AUTO_PARALLELIZATION | 0, 1 | 0 | Enable auto parallelization for CPU operators. | -| DP_JIT | 0, 1 | 0 | Enable JIT. Note that this option may either improve or decrease the performance. Requires TensorFlow supports JIT. | - +| Environment variables | Allowed value | Default value | Usage | +| ----------------------- | ------------- | ------------- | ------------------------------------------------------------------------------------------------------------------- | +| DP_INTERFACE_PREC | `high`, `low` | `high` | Control high (double) or low (float) precision of training. | +| DP_AUTO_PARALLELIZATION | 0, 1 | 0 | Enable auto parallelization for CPU operators. | +| DP_JIT | 0, 1 | 0 | Enable JIT. Note that this option may either improve or decrease the performance. Requires TensorFlow supports JIT. | ## Adjust `sel` of a frozen model One can use `--init-frz-model` features to adjust (increase or decrease) [`sel`](../model/sel.md) of a existing model. Firstly, one needs to adjust [`sel`](./train-input.rst) in `input.json`. For example, adjust from `[46, 92]` to `[23, 46]`. + ```json "model": { "descriptor": { @@ -163,7 +181,9 @@ One can use `--init-frz-model` features to adjust (increase or decrease) [`sel`] } } ``` + To obtain the new model at once, [`numb_steps`](./train-input.rst) should be set to zero: + ```json "training": { "numb_steps": 0 @@ -171,6 +191,7 @@ To obtain the new model at once, [`numb_steps`](./train-input.rst) should be set ``` Then, one can initialize the training from the frozen model and freeze the new model at once: + ```sh dp train input.json --init-frz-model frozen_model.pb dp freeze -o frozen_model_adjusted_sel.pb diff --git a/doc/train/training.md b/doc/train/training.md index c1e5b89a84..5b7bbd32a8 100644 --- a/doc/train/training.md +++ b/doc/train/training.md @@ -1,17 +1,21 @@ # Train a model Several examples of training can be found in the `examples` directory: + ```bash $ cd $deepmd_source_dir/examples/water/se_e2_a/ ``` After switching to that directory, the training can be invoked by + ```bash $ dp train input.json ``` + where `input.json` is the name of the input script. By default, the verbosity level of the DeePMD-kit is `INFO`, one may see a lot of important information on the code and environment showing on the screen. Among them two pieces of information regarding data systems are worth special notice. + ```bash DEEPMD INFO ---Summary of DataSystem: training ----------------------------------------------- DEEPMD INFO found 3 system(s): @@ -26,9 +30,11 @@ DEEPMD INFO system natoms bch_sz n_bc DEEPMD INFO ../data_water/data_3 192 1 80 1.000 T DEEPMD INFO -------------------------------------------------------------------------------------- ``` + The DeePMD-kit prints detailed information on the training and validation data sets. The data sets are defined by {ref}`training_data ` and {ref}`validation_data ` defined in the {ref}`training ` section of the input script. The training data set is composed of three data systems, while the validation data set is composed by one data system. The number of atoms, batch size, the number of batches in the system and the probability of using the system are all shown on the screen. The last column presents if the periodic boundary condition is assumed for the system. During the training, the error of the model is tested every {ref}`disp_freq ` training steps with the batch used to train the model and with {ref}`numb_btch ` batches from the validating data. The training error and validation error are printed correspondingly in the file {ref}`disp_file ` (default is `lcurve.out`). The batch size can be set in the input script by the key {ref}`batch_size ` in the corresponding sections for the training and validation data set. An example of the output + ```bash # step rmse_val rmse_trn rmse_e_val rmse_e_trn rmse_f_val rmse_f_trn lr 0 3.33e+01 3.41e+01 1.03e+01 1.03e+01 8.39e-01 8.72e-01 1.0e-03 @@ -38,6 +44,7 @@ During the training, the error of the model is tested every {ref}`disp_freq