diff --git a/.gitattributes b/.gitattributes
index e77d446ba6..82d852900b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,3 @@
 # do not show up detailed difference on GitHub
 source/3rdparty/* linguist-generated=true
+source/3rdparty/README.md linguist-generated=false
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index f13b187dfb..49918e47ac 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -21,10 +21,10 @@ body:
     validations:
       required: true
   - type: input
-    id: tf-version
+    id: backend-version
     attributes:
-      label: TensorFlow Version
-      description: "The version will be printed when running DeePMD-kit."
+      label: Backend and its version
+      description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0."
     validations:
       required: true
   - type: dropdown
diff --git a/.github/ISSUE_TEMPLATE/generic-issue.yml b/.github/ISSUE_TEMPLATE/generic-issue.yml
index af9f01c64d..f84097580e 100644
--- a/.github/ISSUE_TEMPLATE/generic-issue.yml
+++ b/.github/ISSUE_TEMPLATE/generic-issue.yml
@@ -21,10 +21,10 @@ body:
     validations:
       required: true
   - type: input
-    id: tf-version
+    id: backend-version
     attributes:
-      label: TensorFlow Version
-      description: "The version will be printed when running DeePMD-kit."
+      label: Backend and its version
+      description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0."
     validations:
       required: true
   - type: textarea
diff --git a/.github/labeler.yml b/.github/labeler.yml
index b0a85679de..b048574e77 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,39 +1,38 @@
-Python:
-- changed-files:
-  - any-glob-to-any-file:
-    - deepmd/**/*
-    - deepmd_utils/**/*
-    - source/tests/**/*
-Docs:
-- changed-files:
-  - any-glob-to-any-file: doc/**/*
-Examples:
-- changed-files:
-  - any-glob-to-any-file: examples/**/*
-Core:
-- changed-files:
-  - any-glob-to-any-file: source/lib/**/*
-CUDA:
-- changed-files:
-  - any-glob-to-any-file: source/lib/src/gpu/**/*
-ROCM:
-- changed-files:
-  - any-glob-to-any-file: source/lib/src/gpu/**/*
-OP:
-- changed-files:
-  - any-glob-to-any-file: source/op/**/*
-C++:
-- changed-files:
-  - any-glob-to-any-file: source/api_cc/**/*
-C:
-- changed-files:
-  - any-glob-to-any-file: source/api_c/**/*
-LAMMPS:
-- changed-files:
-  - any-glob-to-any-file: source/lmp/**/*
-Gromacs:
-- changed-files:
-  - any-glob-to-any-file: source/gmx/**/*
-i-Pi:
-- changed-files:
-  - any-glob-to-any-file: source/ipi/**/*
+Python:
+  - changed-files:
+      - any-glob-to-any-file:
+          - deepmd/**/*
+          - source/tests/**/*
+Docs:
+  - changed-files:
+      - any-glob-to-any-file: doc/**/*
+Examples:
+  - changed-files:
+      - any-glob-to-any-file: examples/**/*
+Core:
+  - changed-files:
+      - any-glob-to-any-file: source/lib/**/*
+CUDA:
+  - changed-files:
+      - any-glob-to-any-file: source/lib/src/gpu/**/*
+ROCM:
+  - changed-files:
+      - any-glob-to-any-file: source/lib/src/gpu/**/*
+OP:
+  - changed-files:
+      - any-glob-to-any-file: source/op/**/*
+C++:
+  - changed-files:
+      - any-glob-to-any-file: source/api_cc/**/*
+C:
+  - changed-files:
+      - any-glob-to-any-file: source/api_c/**/*
+LAMMPS:
+  - changed-files:
+      - any-glob-to-any-file: source/lmp/**/*
+Gromacs:
+  - changed-files:
+      - any-glob-to-any-file: source/gmx/**/*
+i-Pi:
+  - changed-files:
+      - any-glob-to-any-file: source/ipi/**/*
diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index f029517d80..adcb615a0a 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -1,6 +1,12 @@
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Build C++
 jobs:
   buildcc:
@@ -27,6 +33,10 @@ jobs:
         cache: 'pip'
     - uses: lukka/get-cmake@latest
     - run: python -m pip install tensorflow
+    - name: Download libtorch
+      run: |
+         wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
+         unzip libtorch.zip
     - run: |
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
@@ -48,13 +58,17 @@ jobs:
          && sudo apt-get update \
          && sudo apt-get install -y rocm-dev hipcub-dev
       if: matrix.variant == 'rocm'
-    - run: source/install/build_cc.sh
+    - run: |
+        export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
+        source/install/build_cc.sh
       env:
         DP_VARIANT: ${{ matrix.dp_variant }}
         DOWNLOAD_TENSORFLOW: "FALSE"
         CMAKE_GENERATOR: Ninja
       if: matrix.variant != 'clang'
-    - run: source/install/build_cc.sh
+    - run: |
+        export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
+        source/install/build_cc.sh
       env:
         DP_VARIANT: cpu
         DOWNLOAD_TENSORFLOW: "FALSE"
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 23076e9bf5..18fd7a1ac1 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -2,7 +2,16 @@ name: Build and upload to PyPI
 
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
+    tags:
+      - "v*"
   pull_request:
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   determine-arm64-runner:
@@ -41,12 +50,12 @@ jobs:
             cuda_version: 11.8
             dp_pkg_name: deepmd-kit-cu11
           # macos-x86-64
-          - os: macos-latest
+          - os: macos-13
             python: 311
             platform_id: macosx_x86_64
             dp_variant: cpu
           # macos-arm64
-          - os: macos-latest
+          - os: macos-14
             python: 311
             platform_id: macosx_arm64
             dp_variant: cpu
@@ -68,8 +77,20 @@ jobs:
       - uses: docker/setup-qemu-action@v3
         name: Setup QEMU
         if: matrix.platform_id == 'manylinux_aarch64' && matrix.os == 'ubuntu-latest'
+      # detect version in advance. See #3168
+      - uses: actions/setup-python@v5
+        name: Install Python
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+        if: matrix.dp_pkg_name == 'deepmd-kit-cu11'
+      - run: |
+          python -m pip install setuptools_scm
+          python -c "from setuptools_scm import get_version;print('SETUPTOOLS_SCM_PRETEND_VERSION='+get_version())" >> $GITHUB_ENV
+          rm -rf .git
+        if: matrix.dp_pkg_name == 'deepmd-kit-cu11'
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16
+        uses: pypa/cibuildwheel@v2.17
         env:
           CIBW_BUILD_VERBOSITY: 1
           CIBW_ARCHS: all
@@ -136,7 +157,7 @@ jobs:
           path: source/install/docker/dist
           merge-multiple: true
       - name: Log in to the Container registry
-        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d
+        uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
@@ -144,12 +165,12 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@dbef88086f6cef02e264edb7dbf63250c17cef6c
+        uses: docker/metadata-action@v5
         with:
           images: ghcr.io/deepmodeling/deepmd-kit
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56
+        uses: docker/build-push-action@v5
         with:
           context: source/install/docker
           push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }}
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index a9a162432c..c912ece8d5 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -2,10 +2,14 @@ name: "CodeQL"
 
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
   pull_request:
   schedule:
     - cron: '45 2 * * 2'
-
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 jobs:
   analyze:
     name: Analyze
@@ -37,6 +41,8 @@ jobs:
          && sudo apt-get update \
          && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2
         python -m pip install tensorflow
+        wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
+        unzip libtorch.zip
       env:
         DEBIAN_FRONTEND: noninteractive
     # Initializes the CodeQL tools for scanning.
@@ -46,7 +52,9 @@ jobs:
         languages: ${{ matrix.language }}
         queries: security-extended,security-and-quality
     - name: "Run, Build Application using script"
-      run: source/install/build_cc.sh
+      run: |
+        export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
+        source/install/build_cc.sh
       env:
         DP_VARIANT: cuda
         DOWNLOAD_TENSORFLOW: "FALSE"
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 877c780f1f..be43c5cff2 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -11,4 +11,4 @@ jobs:
     steps:
     - uses: actions/labeler@v5
       with:
-        repo-token: "${{ secrets.GITHUB_TOKEN }}"
+        repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
index 5594c79181..e11f773b3a 100644
--- a/.github/workflows/package_c.yml
+++ b/.github/workflows/package_c.yml
@@ -2,8 +2,15 @@ name: Build C library
 
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
+    tags:
+      - "v*"
   pull_request:
-
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 jobs:
   build_c:
     name: Build C library
@@ -35,7 +42,7 @@ jobs:
       - name: Test C library
         run: ./source/install/docker_test_package_c.sh
       - name: Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         if: startsWith(github.ref, 'refs/tags/')
         with:
           files: ${{ matrix.filename }}
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index ef6fade8e5..5c5d260f42 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -1,11 +1,20 @@
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test C++
 jobs:
   testcc:
     name: Test C++
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        check_memleak: [true, false]
     steps:
     - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
@@ -18,28 +27,41 @@ jobs:
         mpi: mpich
     - uses: lukka/get-cmake@latest
     - run: python -m pip install tensorflow
-    - run: source/install/test_cc_local.sh
+    - name: Download libtorch
+      run: |
+         wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
+         unzip libtorch.zip
+    # https://github.com/actions/runner-images/issues/9491
+    - name: Fix kernel mmap rnd bits
+      run: sudo sysctl vm.mmap_rnd_bits=28
+      if: ${{ matrix.check_memleak }}
+    - run: |
+         export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
+         source/install/test_cc_local.sh
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LMP_CXX11_ABI_0: 1
         CMAKE_GENERATOR: Ninja
+        CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }}
     # test lammps
     # ASE issue: https://gitlab.com/ase/ase/-/merge_requests/2843
     # TODO: remove ase version when ase has new release
     - run: |
         python -m pip install -U pip
-        python -m pip install -e .[cpu,test,lmp] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+        python -m pip install -e .[cpu,test,lmp] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
         DP_BUILD_TESTING: 1
+      if: ${{ !matrix.check_memleak }}
     - run: pytest --cov=deepmd source/lmp/tests
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
-        LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib
+        LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib
+      if: ${{ !matrix.check_memleak }}
     # test ipi
     - run: pytest --cov=deepmd source/ipi/tests
       env:
@@ -47,10 +69,11 @@ jobs:
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         PATH: ${{ github.workspace }}/dp_test/bin:$PATH
-        LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib
-    - uses: codecov/codecov-action@v3
-      with:
-        gcov: true
+        LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib
+      if: ${{ !matrix.check_memleak }}
+    - uses: codecov/codecov-action@v4
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
   pass:
     name: Pass testing C++
     needs: [testcc]
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index e74c0abde2..15a20d889a 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -4,6 +4,14 @@ on:
   pull_request:
     types:
       - "labeled"
+      # to let the PR pass the test
+      - "opened"
+      - "reopened"
+      - "synchronize"
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test CUDA
 jobs:
   test_cuda:
@@ -11,9 +19,9 @@ jobs:
     runs-on: nvidia
     # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845
     container:
-      image: nvidia/cuda:12.2.0-devel-ubuntu22.04
+      image: nvidia/cuda:12.3.1-devel-ubuntu22.04
       options: --gpus all
-    if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch'
+    if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group')
     steps:
     - name: Make sudo and git work
       run: apt-get update && apt-get install -y sudo git
@@ -27,24 +35,34 @@ jobs:
       with:
         mpi: mpich
     - uses: lukka/get-cmake@latest
+      with:
+        useLocalCache: true
+        useCloudCache: false
+    - name: Install wget and unzip
+      run: apt-get update && apt-get install -y wget unzip
     - run: |
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
          && sudo dpkg -i cuda-keyring_1.0-1_all.deb \
          && sudo apt-get update \
-         && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2
+         && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
       if: false  # skip as we use nvidia image
-    - name: Set PyPI mirror for Aliyun cloud machine
-      run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/
     - run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
-    - run: python -m pip install "tensorflow>=2.15.0rc0"
-    - run: python -m pip install -v -e .[gpu,test,lmp,cu12] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+    - run: python -m pip install "tensorflow>=2.15.0rc0" "torch>=2.2.0"
+    - run: python -m pip install -v -e .[gpu,test,lmp,cu12,torch] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
-        DP_BUILD_TESTING: 1
         DP_VARIANT: cuda
-        CUDA_PATH: /usr/local/cuda-12.2
+        DP_ENABLE_NATIVE_OPTIMIZATION: 1
     - run: dp --version
-    - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0
-    - run: source/install/test_cc_local.sh
+    - run: python -m pytest source/tests --durations=0
+      env:
+        NUM_WORKERS: 0
+    - name: Download libtorch
+      run: |
+         wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip -O libtorch.zip
+         unzip libtorch.zip
+    - run: |
+        export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
+        source/install/test_cc_local.sh
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
@@ -53,18 +71,25 @@ jobs:
         CMAKE_GENERATOR: Ninja
         DP_VARIANT: cuda
         DP_USE_MPICH2: 1
-        CUDA_PATH: /usr/local/cuda-12.2
     - run: |
-        export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$GITHUB_WORKSPACE/libtorch/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
         export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
-        python -m pytest -s --cov=deepmd source/lmp/tests
-        python -m pytest -s --cov=deepmd source/ipi/tests
+        python -m pytest source/lmp/tests
+        python -m pytest source/ipi/tests
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
-        CUDA_PATH: /usr/local/cuda-12.2
-    - uses: codecov/codecov-action@v3
+        CUDA_VISIBLE_DEVICES: 0
+  pass:
+    name: Pass testing on CUDA
+    needs: [test_cuda]
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+    - name: Decide whether the needed jobs succeeded or failed
+      uses: re-actors/alls-green@release/v1
       with:
-        gcov: true
+        jobs: ${{ toJSON(needs) }}
+        allowed-skips: test_cuda
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 1bd78bfae0..60b5ecf0e0 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -1,6 +1,12 @@
 on:
   push:
+    branches-ignore:
+      - "gh-readonly-queue/**"
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test Python
 jobs:
   testpython:
@@ -9,12 +15,12 @@ jobs:
     strategy:
       matrix:
         include:
-          - python: 3.7
-            tf: 1.14
           - python: 3.8
             tf:
+            torch:
           - python: "3.11"
             tf:
+            torch:
 
     steps:
     - uses: actions/checkout@v4
@@ -23,25 +29,26 @@ jobs:
         python-version: ${{ matrix.python }}
         cache: 'pip'
     - uses: mpi4py/setup-mpi@v1
-      if: ${{ matrix.tf == '' }}
       with:
         mpi: openmpi
     # https://github.com/pypa/pip/issues/11770
     - run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
-    - run: pip install -e .[cpu,test]
+    - run: pip install -e .[cpu,test,torch]
       env:
         TENSORFLOW_VERSION: ${{ matrix.tf }}
         DP_BUILD_TESTING: 1
     - run: pip install horovod mpi4py
-      if: ${{ matrix.tf == '' }}
       env:
         HOROVOD_WITH_TENSORFLOW: 1
+        HOROVOD_WITHOUT_PYTORCH: 1
         HOROVOD_WITHOUT_GLOO: 1
     - run: dp --version
-    - run: pytest --cov=deepmd --cov=deepmd_utils source/tests --durations=0
-    - uses: codecov/codecov-action@v3
-      with:
-        gcov: true
+    - run: pytest --cov=deepmd source/tests --durations=0
+      env:
+        NUM_WORKERS: 0
+    - uses: codecov/codecov-action@v4
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
   pass:
     name: Pass testing Python
     needs: [testpython]
diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml
new file mode 100644
index 0000000000..2608bb1071
--- /dev/null
+++ b/.github/workflows/todo.yml
@@ -0,0 +1,20 @@
+name: TODO workflow
+on:
+  push:
+    branches:
+      - devel
+jobs:
+  build:
+    if: github.repository_owner == 'deepmodeling'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run tdg-github-action
+      uses: ribtoks/tdg-github-action@master
+      with:
+        TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        REPO: ${{ github.repository }}
+        SHA: ${{ github.sha }}
+        REF: ${{ github.ref }}
+        EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*"
+        COMMENT_ON_ISSUES: 1
diff --git a/.gitignore b/.gitignore
index 82d3e4a7da..5e30cf3167 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@
 *.bz2
 *.pyc
 *.pb
+*.DS_Store
 tmp*
 CMakeCache.txt
 CMakeFiles
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d4e89f1129..f75d0db7ae 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,119 +1,121 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:
-    -   id: trailing-whitespace
+      - id: trailing-whitespace
         exclude: "^.+\\.pbtxt$"
-    -   id: end-of-file-fixer
+      - id: end-of-file-fixer
         exclude: "^.+\\.pbtxt$"
-    -   id: check-yaml
-    -   id: check-json
-    -   id: check-added-large-files
-        args: ['--maxkb=1024', '--enforce-all']
-        # TODO: remove the following after resolved
+      - id: check-yaml
+      - id: check-json
+      - id: check-added-large-files
+        args: ["--maxkb=1024", "--enforce-all"]
         exclude: |
-            (?x)^(
-                source/tests/infer/dipolecharge_e.pbtxt|
-                source/tests/infer/deeppolar_new.pbtxt
-            )$
-    -   id: check-merge-conflict
-    -   id: check-symlinks
-    -   id: check-toml
-# Python
--   repo: https://github.com/PyCQA/isort
+          (?x)^(
+              source/tests/infer/dipolecharge_e.pbtxt|
+              source/tests/infer/deeppolar_new.pbtxt
+          )$
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+  # Python
+  - repo: https://github.com/PyCQA/isort
     rev: 5.13.2
     hooks:
-    - id: isort
-      files: \.py$
-      exclude: ^source/3rdparty
--   repo: https://github.com/astral-sh/ruff-pre-commit
+      - id: isort
+        files: \.py$
+        exclude: ^source/3rdparty
+  - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.1.13
+    rev: v0.3.4
     hooks:
-    - id: ruff
-      args: ["--fix"]
-      exclude: ^source/3rdparty
-      types_or: [python, pyi, jupyter]
-    - id: ruff-format
-      exclude: ^source/3rdparty
-      types_or: [python, pyi, jupyter]
-# numpydoc
--   repo: https://github.com/Carreau/velin
+      - id: ruff
+        args: ["--fix"]
+        exclude: ^source/3rdparty
+        types_or: [python, pyi, jupyter]
+      - id: ruff-format
+        exclude: ^source/3rdparty
+        types_or: [python, pyi, jupyter]
+  # numpydoc
+  - repo: https://github.com/Carreau/velin
     rev: 0.0.12
     hooks:
-    - id: velin
-      args: ["--write"]
-      exclude: ^source/3rdparty
-# Python inside docs
--   repo: https://github.com/asottile/blacken-docs
+      - id: velin
+        args: ["--write"]
+        exclude: ^source/3rdparty
+  # Python inside docs
+  - repo: https://github.com/asottile/blacken-docs
     rev: 1.16.0
     hooks:
-    -   id: blacken-docs
-# C++
--   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.6
+      - id: blacken-docs
+  # C++
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v18.1.2
     hooks:
-    -   id: clang-format
+      - id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc
-# CSS
--   repo: https://github.com/pre-commit/mirrors-csslint
-    rev: v1.0.5
+  # markdown, yaml, CSS, javascript
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v4.0.0-alpha.8
+    hooks:
+      - id: prettier
+        types_or: [markdown, yaml, css]
+        # workflow files cannot be modified by pre-commit.ci
+        exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
+  # Shell
+  - repo: https://github.com/scop/pre-commit-shfmt
+    rev: v3.8.0-1
+    hooks:
+      - id: shfmt
+  # CMake
+  - repo: https://github.com/cheshirekow/cmake-format-precommit
+    rev: v0.6.13
+    hooks:
+      - id: cmake-format
+      #- id: cmake-lint
+  # license header
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.5.5
     hooks:
-    -   id: csslint
-# Shell
-- repo: https://github.com/scop/pre-commit-shfmt
-  rev: v3.7.0-4
-  hooks:
-    - id: shfmt
-# CMake
-- repo: https://github.com/cheshirekow/cmake-format-precommit
-  rev: v0.6.13
-  hooks:
-    - id: cmake-format
-    #- id: cmake-lint
-# license header
-- repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.5.4
-  hooks:
-    # C++, js
-    -   id: insert-license
+      # C++, js
+      - id: insert-license
         files: \.(c|cc|cpp|js|ts|h|hpp)$
         args:
-        - --license-filepath
-        - .license-header.txt
-        - --comment-style
-        - //
-        - --no-extra-eol
+          - --license-filepath
+          - .license-header.txt
+          - --comment-style
+          - //
+          - --no-extra-eol
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc
-    # CSS
-    -   id: insert-license
+      # CSS
+      - id: insert-license
         files: \.(css|scss)$
         args:
-        - --license-filepath
-        - .license-header.txt
-        - --comment-style
-        - /*| *| */
-        - --no-extra-eol
-    # Python
-    -   id: insert-license
+          - --license-filepath
+          - .license-header.txt
+          - --comment-style
+          - /*| *| */
+          - --no-extra-eol
+      # Python
+      - id: insert-license
         files: \.(py|pyx)$
         args:
-        - --license-filepath
-        - .license-header.txt
-        - --comment-style
-        - "#"
-        - --no-extra-eol
+          - --license-filepath
+          - .license-header.txt
+          - --comment-style
+          - "#"
+          - --no-extra-eol
         exclude: ^source/3rdparty
-    # HTML
-    -   id: insert-license
+      # HTML
+      - id: insert-license
         files: \.(html|vue|xml)$
         args:
-        - --license-filepath
-        - .license-header.txt
-        - --comment-style
-        - <!--|  ~|  -->
-        - --no-extra-eol
+          - --license-filepath
+          - .license-header.txt
+          - --comment-style
+          - <!--|  ~|  -->
+          - --no-extra-eol
 ci:
   autoupdate_branch: devel
diff --git a/CITATIONS.bib b/CITATIONS.bib
index ac682b28f7..425c00ac42 100644
--- a/CITATIONS.bib
+++ b/CITATIONS.bib
@@ -105,6 +105,25 @@ @misc{Zhang_2022_DPA1
     doi = {10.48550/arXiv.2208.08236},
 }
 
+@misc{Zhang_2023_DPA2,
+    annote = {DPA-2},
+    author = {Duo Zhang and Xinzijian Liu and Xiangyu Zhang and Chengqian Zhang and
+              Chun Cai and Hangrui Bi and Yiming Du and Xuejian Qin and Jiameng Huang
+              and Bowen Li and Yifan Shan and Jinzhe Zeng and Yuzhi Zhang and Siyuan
+              Liu and Yifan Li and Junhan Chang and Xinyan Wang and Shuo Zhou and
+              Jianchuan Liu and Xiaoshan Luo and Zhenyu Wang and Wanrun Jiang and Jing
+              Wu and Yudi Yang and Jiyuan Yang and Manyi Yang and Fu-Qiang Gong and
+              Linshuang Zhang and Mengchao Shi and Fu-Zhi Dai and Darrin M. York and
+              Shi Liu and Tong Zhu and Zhicheng Zhong and Jian Lv and Jun Cheng and
+              Weile Jia and Mohan Chen and Guolin Ke and Weinan E and Linfeng Zhang
+              and Han Wang},
+    title = {{DPA-2: Towards a universal large atomic model for molecular and material
+              simulation}},
+    publisher = {arXiv},
+    year = {2023},
+    doi = {10.48550/arXiv.2312.15492},
+}
+
 @article{Zhang_PhysPlasmas_2020_v27_p122704,
     annote = {frame-specific parameters (e.g. electronic temperature)},
     author = {Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, Han and Chen, Mohan},
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e43e23beb6..cb08609c2b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,6 +7,7 @@ Welcome to [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit)!
 You can either make a code contribution, help improve our document or offer help to other users. Your help is always appreciated. Come and have fun!
 
 ### Code contribution
+
 You can start from any one of the following items to help improve deepmd-kit
 
 - Smash a bug
@@ -18,6 +19,7 @@ See [here](#before-you-contribute) for some before-hand heads-up.
 See [here](#how-to-contribute) to learn how to contribute.
 
 ### Document improvement
+
 You can start from any one of the following items to help improve [DeePMD-kit Docs](https://deepmd.readthedocs.io/en/latest/?badge=latest):
 
 - Fix typos or format (punctuation, space, indentation, code block, etc.)
@@ -26,21 +28,27 @@ You can start from any one of the following items to help improve [DeePMD-kit Do
 - Translate docs changes from English to Chinese
 
 ### Offer help
+
 You can help other users of deepmd-kit in the following way
 
 - Submit, reply to, and resolve [issues](https://github.com/deepmodeling/deepmd-kit/issues)
 - (Advanced) Review Pull Requests created by others
 
 ## Before you contribute
+
 ### Overview of DeePMD-kit
+
 Currently, we maintain two main branch:
+
 - master: stable branch with version tag
-- devel :  branch for developers
+- devel : branch for developers
 
 ### Developer guide
-See [here](doc/development/index.md) for coding conventions, API and other needs-to-know of the code.
+
+See [documentation](https://deepmd.readthedocs.io/) for coding conventions, API and other needs-to-know of the code.
 
 ## How to contribute
+
 Please perform the following steps to create your Pull Request to this repository. If don't like to use commands, you can also use [GitHub Desktop](https://desktop.github.com/), which is easier to get started. Go to [git documentation](https://git-scm.com/doc) if you want to really master git.
 
 ### Step 1: Fork the repository
@@ -51,79 +59,82 @@ Please perform the following steps to create your Pull Request to this repositor
 ### Step 2: Clone the forked repository to local storage and set configurations
 
 1. Clone your own repo, not the public repo (from deepmodeling) ! And change the branch to devel.
-    ```bash
-    git clone https://github.com/$username/deepmd-kit.git
-    # Replace `$username` with your GitHub ID
 
-    git checkout devel
-    ```
+   ```bash
+   git clone https://github.com/$username/deepmd-kit.git
+   # Replace `$username` with your GitHub ID
+
+   git checkout devel
+   ```
 
 2. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation.
-    ```bash
-    git remote add upstream https://github.com/deepmodeling/deepmd-kit.git
-    # After you add a remote repo, your local repo will be automatically named "origin".
 
-    git fetch upstream
+   ```bash
+   git remote add upstream https://github.com/deepmodeling/deepmd-kit.git
+   # After you add a remote repo, your local repo will be automatically named "origin".
 
-    # If your current codes are behind the latest codes, you should merge latest codes first.
-    # Notice you should merge from "devel"!
-    git merge upstream/devel
-    ```
+   git fetch upstream
+
+   # If your current codes are behind the latest codes, you should merge latest codes first.
+   # Notice you should merge from "devel"!
+   git merge upstream/devel
+   ```
 
 3. Modify your codes and design unit tests.
 
 4. Commit your changes
-    ```bash
-    git status # Checks the local status
-    git add <file> ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.`
-    git commit -m "commit-message: update the xx"
-    ```
+
+   ```bash
+   git status # Checks the local status
+   git add <file> ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.`
+   git commit -m "commit-message: update the xx"
+   ```
 
 5. Push the changed codes to your original repo on github.
-    ```bash
-    git push origin devel
-    ```
+   ```bash
+   git push origin devel
+   ```
 
 ### Alternatively: Create a new branch
 
 1. Get your local master up-to-date with upstream/master.
 
-    ```bash
-    cd $working_dir/deepmd-kit
-    git fetch upstream
-    git checkout master
-    git rebase upstream/master
-    ```
+   ```bash
+   cd $working_dir/deepmd-kit
+   git fetch upstream
+   git checkout master
+   git rebase upstream/master
+   ```
 
 2. Create a new branch based on the master branch.
 
-    ```bash
-    git checkout -b new-branch-name
-    ```
+   ```bash
+   git checkout -b new-branch-name
+   ```
 
 3. Modify your codes and design unit tests.
 
 4. Commit your changes
 
-    ```bash
-    git status # Checks the local status
-    git add <file> ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.`
-    git commit -m "commit-message: update the xx"
-    ```
+   ```bash
+   git status # Checks the local status
+   git add <file> ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.`
+   git commit -m "commit-message: update the xx"
+   ```
 
 5. Keep your branch in sync with upstream/master
 
-    ```bash
-    # While on your new branch
-    git fetch upstream
-    git rebase upstream/master
-    ```
+   ```bash
+   # While on your new branch
+   git fetch upstream
+   git rebase upstream/master
+   ```
 
 6. Push your changes to the remote
 
-    ```bash
-    git push -u origin new-branch-name # "-u" is used to track the remote branch from origin
-    ```
+   ```bash
+   git push -u origin new-branch-name # "-u" is used to track the remote branch from origin
+   ```
 
 ### Step 3: Create a pull request
 
@@ -133,4 +144,5 @@ Please perform the following steps to create your Pull Request to this repositor
 Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to DeePMD-kit.
 
 ## Contact us
+
 E-mail: contact@deepmodeling.org
diff --git a/README.md b/README.md
index 81fdead098..3838f2596a 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 [<picture><source media="(prefers-color-scheme: dark)" srcset="./doc/_static/logo-dark.svg"><source media="(prefers-color-scheme: light)" srcset="./doc/_static/logo.svg"><img alt="DeePMD-kit logo" src="./doc/_static/logo.svg"></picture>](./doc/logo.md)
 
---------------------------------------------------------------------------------
+---
+
+# DeePMD-kit
 
-<span style="font-size:larger;">DeePMD-kit Manual</span>
-========
 [![GitHub release](https://img.shields.io/github/release/deepmodeling/deepmd-kit.svg?maxAge=86400)](https://github.com/deepmodeling/deepmd-kit/releases)
 [![offline packages](https://img.shields.io/github/downloads/deepmodeling/deepmd-kit/total?label=offline%20packages)](https://github.com/deepmodeling/deepmd-kit/releases)
 [![conda-forge](https://img.shields.io/conda/dn/conda-forge/deepmd-kit?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/deepmd-kit)
@@ -11,168 +11,93 @@
 [![docker pull](https://img.shields.io/docker/pulls/deepmodeling/deepmd-kit)](https://hub.docker.com/r/deepmodeling/deepmd-kit)
 [![Documentation Status](https://readthedocs.org/projects/deepmd/badge/)](https://deepmd.readthedocs.io/)
 
-# Table of contents
-- [About DeePMD-kit](#about-deepmd-kit)
- 	- [Highlights in v2.0](#highlights-in-deepmd-kit-v2.0)
- 	- [Highlighted features](#highlighted-features)
- 	- [License and credits](#license-and-credits)
- 	- [Deep Potential in a nutshell](#deep-potential-in-a-nutshell)
-- [Download and install](#download-and-install)
-- [Use DeePMD-kit](#use-deepmd-kit)
-- [Code structure](#code-structure)
-- [Troubleshooting](#troubleshooting)
-
-# About DeePMD-kit
+## About DeePMD-kit
+
 DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems.
 
 For more information, check the [documentation](https://deepmd.readthedocs.io/).
 
-# Highlights in DeePMD-kit v2.0
-* [Model compression](doc/freeze/compress.md). Accelerate the efficiency of model inference 4-15 times.
-* [New descriptors](doc/model/overall.md). Including [`se_e2_r`](doc/model/train-se-e2-r.md) and [`se_e3`](doc/model/train-se-e3.md).
-* [Hybridization of descriptors](doc/model/train-hybrid.md). Hybrid descriptor constructed from the concatenation of several descriptors.
-* [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom-type embedding to decline training complexity and refine performance.
-* Training and inference of the dipole (vector) and polarizability (matrix).
-* Split of training and validation dataset.
-* Optimized training on GPUs.
-
-## Highlighted features
-* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition, Tensorboard can be used to visualize training procedures.
-* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively.
-* **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, insulators, etc.
-* **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
-* **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models.
-
-## License and credits
+### Highlighted features
+
+- **interfaced with multiple backends**, including TensorFlow and PyTorch, the most popular deep learning frameworks, making the training process highly automatic and efficient.
+- **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABUCUS.
+- **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc.
+- **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
+- **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models.
+
+### License and credits
+
 The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE).
 If you use this code in any future publications, please cite the following publications for general purpose:
+
 - Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184.
-[![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016)
-[![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016)
+  [![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016)
+  [![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016)
 - Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang. "DeePMD-kit v2: A software package for deep potential models." J. Chem. Phys. 159 (2023): 054801.
-[![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600)
-[![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600)
+  [![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600)
+  [![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600)
 
 In addition, please follow [the bib file](CITATIONS.bib) to cite the methods you used.
 
-## Deep Potential in a nutshell
-The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called *atomic energy*. Summing up all the atomic energies gives the potential energy of the system.
+### Highlights in major versions
+
+#### Initial version
 
-The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical *ab initio* molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations.
+The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called _atomic energy_. Summing up all the atomic energies gives the potential energy of the system.
+
+The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical _ab initio_ molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations.
 
 Although highly efficient, the original Deep Potential model satisfies the extensive and symmetry-invariant properties of a potential energy model at the price of introducing discontinuities in the model. This has negligible influence on a trajectory from canonical sampling but might not be sufficient for calculations of dynamical and mechanical properties. These points motivated us to develop the Deep Potential-Smooth Edition ([DeepPot-SE][3]) model, which replaces the non-smooth local frame with a smooth and adaptive embedding network. DeepPot-SE shows great ability in modeling many kinds of systems that are of interest in the fields of physics, chemistry, biology, and materials science.
 
 In addition to building up potential energy models, DeePMD-kit can also be used to build up coarse-grained models. In these models, the quantity that we want to parameterize is the free energy, or the coarse-grained potential, of the coarse-grained particles. See the [DeePCG paper][4] for more details.
 
-See [our latest paper](https://doi.org/10.48550/arXiv.2304.09409) for details of all features.
-
-# Download and install
-
-Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel).
-
-DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker).
-
-One may manually install DeePMD-kit by following the instructions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS, i-PI or GROMACS.
-
-
-# Use DeePMD-kit
-
-A quick start on using DeePMD-kit can be found [here](doc/getting-started/quick_start.ipynb).
-
-A full [document](doc/train/train-input-auto.rst) on options in the training input script is available.
-
-# Advanced
-
-- [Installation](doc/install/index.md)
-    - [Easy install](doc/install/easy-install.md)
-    - [Install from source code](doc/install/install-from-source.md)
-    - [Install from pre-compiled C library](doc/install/install-from-c-library.md)
-    - [Install LAMMPS](doc/install/install-lammps.md)
-    - [Install i-PI](doc/install/install-ipi.md)
-    - [Install GROMACS](doc/install/install-gromacs.md)
-    - [Building conda packages](doc/install/build-conda.md)
-    - [Install Node.js interface](doc/install/install-nodejs.md)
-    - [Easy install the latest development version](doc/install/easy-install-dev.md)
-- [Data](doc/data/index.md)
-    - [System](doc/data/system.md)
-    - [Formats of a system](doc/data/data-conv.md)
-    - [Prepare data with dpdata](doc/data/dpdata.md)
-- [Model](doc/model/index.md)
-    - [Overall](doc/model/overall.md)
-    - [Descriptor `"se_e2_a"`](doc/model/train-se-e2-a.md)
-    - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md)
-    - [Descriptor `"se_e3"`](doc/model/train-se-e3.md)
-    - [Descriptor `"se_atten"`](doc/model/train-se-atten.md)
-    - [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2)
-    - [Descriptor `"hybrid"`](doc/model/train-hybrid.md)
-    - [Descriptor `sel`](doc/model/sel.md)
-    - [Fit energy](doc/model/train-energy.md)
-    - [Fit spin energy](doc/model/train-energy-spin.md)
-    - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md)
-    - [Fit electronic density of states (DOS)](doc/model/train-fitting-dos.md)
-    - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md)
-    - [Deep potential long-range](doc/model/dplr.md)
-    - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
-    - [Linear model](doc/model/linear.md)
-    - [Interpolation or combination with a pairwise potential](doc/model/pairtab.md)
-- [Training](doc/train/index.md)
-    - [Training a model](doc/train/training.md)
-    - [Advanced options](doc/train/training-advanced.md)
-    - [Parallel training](doc/train/parallel-training.md)
-    - [Multi-task training](doc/train/multi-task-training.md)
-    - [TensorBoard Usage](doc/train/tensorboard.md)
-    - [Known limitations of using GPUs](doc/train/gpu-limitations.md)
-    - [Training Parameters](doc/train-input-auto.rst)
-- [Freeze and Compress](doc/freeze/index.rst)
-    - [Freeze a model](doc/freeze/freeze.md)
-    - [Compress a model](doc/freeze/compress.md)
-- [Test](doc/test/index.rst)
-    - [Test a model](doc/test/test.md)
-    - [Calculate Model Deviation](doc/test/model-deviation.md)
-- [Inference](doc/inference/index.rst)
-    - [Python interface](doc/inference/python.md)
-    - [C++ interface](doc/inference/cxx.md)
-    - [Node.js interface](doc/inference/nodejs.md)
-- [Integrate with third-party packages](doc/third-party/index.rst)
-    - [Use deep potential with ASE](doc/third-party/ase.md)
-    - [Run MD with LAMMPS](doc/third-party/lammps-command.md)
-    - [Run path-integral MD with i-PI](doc/third-party/ipi.md)
-    - [Run MD with GROMACS](doc/third-party/gromacs.md)
-    - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md)
-- [Use NVNMD](doc/nvnmd/index.md)
-
-# Code structure
+#### v1
+
+- Code refactor to make it highly modularized.
+- GPU support for descriptors.
+
+#### v2
+
+- Model compression. Accelerate the efficiency of model inference 4-15 times.
+- New descriptors. Including `se_e2_r`, `se_e3`, and `se_atten` (DPA-1).
+- Hybridization of descriptors. Hybrid descriptor constructed from the concatenation of several descriptors.
+- Atom type embedding. Enable atom-type embedding to decline training complexity and refine performance.
+- Training and inference of the dipole (vector) and polarizability (matrix).
+- Split of training and validation dataset.
+- Optimized training on GPUs, including CUDA and ROCm.
+- Non-von-Neumann.
+- C API to interface with the third-party packages.
+
+See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all features until v2.2.3.
+
+#### v3
+
+- Multiple backends supported. Add a PyTorch backend.
+- The DPA-2 model.
+
+## Install and use DeePMD-kit
+
+Please read the [online documentation](https://deepmd.readthedocs.io/) for how to install and use DeePMD-kit.
+
+## Code structure
 
 The code is organized as follows:
 
-* `data/raw`: tools manipulating the raw data files.
-* `examples`: examples.
-* `deepmd`: DeePMD-kit python modules.
-* `source/api_cc`: source code of DeePMD-kit C++ API.
-* `source/ipi`: source code of i-PI client.
-* `source/lib`: source code of DeePMD-kit library.
-* `source/lmp`: source code of Lammps module.
-* `source/gmx`: source code of Gromacs plugin.
-* `source/op`: TensorFlow op implementation. working with the library.
-
-
-# Troubleshooting
-
-- [Model compatibility](doc/troubleshooting/model_compatability.md)
-- [Installation](doc/troubleshooting/installation.md)
-- [The temperature undulates violently during the early stages of MD](doc/troubleshooting/md_energy_undulation.md)
-- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](doc/troubleshooting/md_version_compatibility.md)
-- [Do we need to set rcut < half boxsize?](doc/troubleshooting/howtoset_rcut.md)
-- [How to set sel?](doc/troubleshooting/howtoset_sel.md)
-- [How to control the parallelism of a job?](doc/troubleshooting/howtoset_num_nodes.md)
-- [How to tune Fitting/embedding-net size?](doc/troubleshooting/howtoset_netsize.md)
-- [Why does a model have low precision?](doc/troubleshooting/precision.md)
+- `examples`: examples.
+- `deepmd`: DeePMD-kit python modules.
+- `source/lib`: source code of the core library.
+- `source/op`: Operator (OP) implementation.
+- `source/api_cc`: source code of DeePMD-kit C++ API.
+- `source/api_c`: source code of the C API.
+- `source/nodejs`: source code of the Node.js API.
+- `source/ipi`: source code of i-PI client.
+- `source/lmp`: source code of Lammps module.
+- `source/gmx`: source code of Gromacs plugin.
 
 # Contributing
 
 See [DeePMD-kit Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓
 
-
 [1]: https://arxiv.org/abs/1707.01478
 [2]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001
 [3]: https://arxiv.org/abs/1805.09003
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
index d28afdb239..2ca0ff2f93 100644
--- a/backend/dp_backend.py
+++ b/backend/dp_backend.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """A PEP-517 backend to find TensorFlow."""
+
 from typing import (
     List,
 )
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
index ab955c3cf8..2a66ff065c 100644
--- a/backend/dynamic_metadata.py
+++ b/backend/dynamic_metadata.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import sys
+from pathlib import (
+    Path,
+)
 from typing import (
     Dict,
     List,
@@ -12,6 +16,11 @@
     get_argument_from_env,
 )
 
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+
 __all__ = ["dynamic_metadata"]
 
 
@@ -22,70 +31,24 @@ def __dir__() -> List[str]:
 def dynamic_metadata(
     field: str,
     settings: Optional[Dict[str, object]] = None,
-) -> str:
+):
     assert field in ["optional-dependencies", "entry-points", "scripts"]
     _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env()
+    with Path("pyproject.toml").open("rb") as f:
+        pyproject = tomllib.load(f)
+
     if field == "scripts":
         return {
-            "dp": "deepmd_utils.main:main",
+            **pyproject["tool"]["deepmd_build_backend"]["scripts"],
             **extra_scripts,
         }
     elif field == "optional-dependencies":
+        optional_dependencies = pyproject["tool"]["deepmd_build_backend"][
+            "optional-dependencies"
+        ]
+        optional_dependencies["lmp"].extend(find_libpython_requires)
+        optional_dependencies["ipi"].extend(find_libpython_requires)
         return {
-            "test": [
-                "dpdata>=0.1.9",
-                "ase",
-                "pytest",
-                "pytest-cov",
-                "pytest-sugar",
-                "dpgui",
-            ],
-            "docs": [
-                "sphinx>=3.1.1",
-                "sphinx_rtd_theme>=1.0.0rc1",
-                "sphinx_markdown_tables",
-                "myst-nb>=1.0.0rc0",
-                "myst-parser>=0.19.2",
-                "breathe",
-                "exhale",
-                "numpydoc",
-                "ase",
-                "deepmodeling-sphinx>=0.1.0",
-                "dargs>=0.3.4",
-                "sphinx-argparse",
-                "pygments-lammps",
-                "sphinxcontrib-bibtex",
-            ],
-            "lmp": [
-                "lammps~=2023.8.2.2.0",
-                *find_libpython_requires,
-            ],
-            "ipi": [
-                "i-PI",
-                *find_libpython_requires,
-            ],
-            "gui": [
-                "dpgui",
-            ],
+            **optional_dependencies,
             **get_tf_requirement(tf_version),
-            "cu11": [
-                "nvidia-cuda-runtime-cu11",
-                "nvidia-cublas-cu11",
-                "nvidia-cufft-cu11",
-                "nvidia-curand-cu11",
-                "nvidia-cusolver-cu11",
-                "nvidia-cusparse-cu11",
-                "nvidia-cudnn-cu11",
-                "nvidia-cuda-nvcc-cu11",
-            ],
-            "cu12": [
-                "nvidia-cuda-runtime-cu12",
-                "nvidia-cublas-cu12",
-                "nvidia-cufft-cu12",
-                "nvidia-curand-cu12",
-                "nvidia-cusolver-cu12",
-                "nvidia-cusparse-cu12",
-                "nvidia-cudnn-cu12",
-                "nvidia-cuda-nvcc-cu12",
-            ],
         }
diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py
new file mode 100644
index 0000000000..f039b6f289
--- /dev/null
+++ b/backend/find_pytorch.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import site
+from functools import (
+    lru_cache,
+)
+from importlib.machinery import (
+    FileFinder,
+)
+from importlib.util import (
+    find_spec,
+)
+from pathlib import (
+    Path,
+)
+from sysconfig import (
+    get_path,
+)
+from typing import (
+    Optional,
+)
+
+
+@lru_cache
+def find_pytorch() -> Optional[str]:
+    """Find PyTorch library.
+
+    Tries to find PyTorch in the order of:
+
+    1. Environment variable `PYTORCH_ROOT` if set
+    2. The current Python environment.
+    3. user site packages directory if enabled
+    4. system site packages directory (purelib)
+
+    Considering the default PyTorch package still uses old CXX11 ABI, we
+    cannot install it automatically.
+
+    Returns
+    -------
+    str, optional
+        PyTorch library path if found.
+    """
+    if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
+        return None
+    pt_spec = None
+
+    if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
+        site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute()
+        pt_spec = FileFinder(str(site_packages)).find_spec("torch")
+
+    # get pytorch spec
+    # note: isolated build will not work for backend
+    if pt_spec is None or not pt_spec:
+        pt_spec = find_spec("torch")
+
+    if not pt_spec and site.ENABLE_USER_SITE:
+        # first search TF from user site-packages before global site-packages
+        site_packages = site.getusersitepackages()
+        if site_packages:
+            pt_spec = FileFinder(site_packages).find_spec("torch")
+
+    if not pt_spec:
+        # purelib gets site-packages path
+        site_packages = get_path("purelib")
+        if site_packages:
+            pt_spec = FileFinder(site_packages).find_spec("torch")
+
+    # get install dir from spec
+    try:
+        pt_install_dir = pt_spec.submodule_search_locations[0]  # type: ignore
+        # AttributeError if ft_spec is None
+        # TypeError if submodule_search_locations are None
+        # IndexError if submodule_search_locations is an empty list
+    except (AttributeError, TypeError, IndexError):
+        pt_install_dir = None
+    return pt_install_dir
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index 08a73f7252..4d63f3118d 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -28,7 +28,7 @@
 )
 
 
-@lru_cache()
+@lru_cache
 def find_tensorflow() -> Tuple[Optional[str], List[str]]:
     """Find TensorFlow library.
 
@@ -47,15 +47,11 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
     list of str
         TensorFlow requirement if not found. Empty if found.
     """
+    if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "0":
+        return None, []
     requires = []
 
     tf_spec = None
-    if os.environ.get("CIBUILDWHEEL", "0") == "1" and os.environ.get(
-        "CIBW_BUILD", ""
-    ).endswith("macosx_arm64"):
-        # cibuildwheel cross build
-        site_packages = Path(os.environ.get("RUNNER_TEMP")) / "tensorflow"
-        tf_spec = FileFinder(str(site_packages)).find_spec("tensorflow")
 
     if (tf_spec is None or not tf_spec) and os.environ.get(
         "TENSORFLOW_ROOT"
@@ -87,6 +83,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
         # TypeError if submodule_search_locations are None
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
+        tf_version = ""
         if os.environ.get("CIBUILDWHEEL", "0") == "1":
             cuda_version = os.environ.get("CUDA_VERSION", "12.2")
             if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
@@ -103,15 +100,16 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
                         "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'",
                     ]
                 )
+                tf_version = "2.14.1"
             else:
                 raise RuntimeError("Unsupported CUDA version")
-        requires.extend(get_tf_requirement()["cpu"])
+        requires.extend(get_tf_requirement(tf_version)["cpu"])
         # setuptools will re-find tensorflow after installing setup_requires
         tf_install_dir = None
     return tf_install_dir, requires
 
 
-@lru_cache()
+@lru_cache
 def get_tf_requirement(tf_version: str = "") -> dict:
     """Get TensorFlow requirement (CPU) when TF is not installed.
 
@@ -127,6 +125,12 @@ def get_tf_requirement(tf_version: str = "") -> dict:
     dict
         TensorFlow requirement, including cpu and gpu.
     """
+    if tf_version is None:
+        return {
+            "cpu": [],
+            "gpu": [],
+            "mpi": [],
+        }
     if tf_version == "":
         tf_version = os.environ.get("TENSORFLOW_VERSION", "")
 
@@ -134,6 +138,11 @@ def get_tf_requirement(tf_version: str = "") -> dict:
     extra_select = {}
     if not (tf_version == "" or tf_version in SpecifierSet(">=2.12", prereleases=True)):
         extra_requires.append("protobuf<3.20")
+    # keras 3 is not compatible with tf.compat.v1
+    if tf_version == "" or tf_version in SpecifierSet(">=2.15.0rc0", prereleases=True):
+        extra_requires.append("tf-keras; python_version>='3.9'")
+        # only TF>=2.16 is compatible with Python 3.12
+        extra_requires.append("tf-keras>=2.16.0rc0; python_version>='3.12'")
     if tf_version == "" or tf_version in SpecifierSet(">=1.15", prereleases=True):
         extra_select["mpi"] = [
             "horovod",
@@ -189,7 +198,7 @@ def get_tf_requirement(tf_version: str = "") -> dict:
         }
 
 
-@lru_cache()
+@lru_cache
 def get_tf_version(tf_path: Union[str, Path]) -> str:
     """Get TF version from a TF Python library path.
 
diff --git a/backend/read_env.py b/backend/read_env.py
index 079211d4d7..c97c854a13 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -13,13 +13,16 @@
     Version,
 )
 
+from .find_pytorch import (
+    find_pytorch,
+)
 from .find_tensorflow import (
     find_tensorflow,
     get_tf_version,
 )
 
 
-@lru_cache()
+@lru_cache
 def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
     """Get the arguments from environment variables.
 
@@ -78,18 +81,41 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         cmake_args.append(f"-DLAMMPS_VERSION={dp_lammps_version}")
     if dp_ipi == "1":
         cmake_args.append("-DENABLE_IPI:BOOL=TRUE")
-        extra_scripts["dp_ipi"] = "deepmd.entrypoints.ipi:dp_ipi"
+        extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi"
 
-    tf_install_dir, _ = find_tensorflow()
-    tf_version = get_tf_version(tf_install_dir)
-    if tf_version == "" or Version(tf_version) >= Version("2.12"):
+    if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1":
+        tf_install_dir, _ = find_tensorflow()
+        tf_version = get_tf_version(tf_install_dir)
+        if tf_version == "" or Version(tf_version) >= Version("2.12"):
+            find_libpython_requires = []
+        else:
+            find_libpython_requires = ["find_libpython"]
+        cmake_args.extend(
+            [
+                "-DENABLE_TENSORFLOW=ON",
+                f"-DTENSORFLOW_VERSION={tf_version}",
+                f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
+            ]
+        )
+    else:
         find_libpython_requires = []
+        cmake_args.append("-DENABLE_TENSORFLOW=OFF")
+        tf_version = None
+
+    if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
+        pt_install_dir = find_pytorch()
+        if pt_install_dir is None:
+            raise RuntimeError("Cannot find installed PyTorch.")
+        cmake_args.extend(
+            [
+                "-DENABLE_PYTORCH=ON",
+                f"-DCMAKE_PREFIX_PATH={pt_install_dir}",
+            ]
+        )
     else:
-        find_libpython_requires = ["find_libpython"]
-    cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}")
+        cmake_args.append("-DENABLE_PYTORCH=OFF")
 
     cmake_args = [
-        f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
         "-DBUILD_PY_IF:BOOL=TRUE",
         *cmake_args,
     ]
diff --git a/codecov.yml b/codecov.yml
index 3654859423..8f639ec037 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -20,7 +20,6 @@ component_management:
       name: Python
       paths:
         - deepmd/**
-        - deepmd_utils/**
     - component_id: module_op
       name: OP
       paths:
diff --git a/data/raw/copy_raw.py b/data/raw/copy_raw.py
index 642865db86..69ccdf5c63 100755
--- a/data/raw/copy_raw.py
+++ b/data/raw/copy_raw.py
@@ -85,7 +85,7 @@ def _main():
     )
     args = parser.parse_args()
 
-    print("# copy the system by %s copies" % args.ncopies)
+    print("# copy the system by %s copies" % args.ncopies)  # noqa: T201
     assert np.all(
         np.array(args.ncopies, dtype=int) >= np.array([1, 1, 1], dtype=int)
     ), "number of copies should be larger than or equal to 1"
diff --git a/data/raw/shuffle_raw.py b/data/raw/shuffle_raw.py
index 51bb7466c9..b4fc1457e5 100755
--- a/data/raw/shuffle_raw.py
+++ b/data/raw/shuffle_raw.py
@@ -37,7 +37,7 @@ def _main():
     outpath = args.OUTPUT
 
     if not os.path.isdir(inpath):
-        print("# no input dir " + inpath + ", exit")
+        print("# no input dir " + inpath + ", exit")  # noqa: T201
         return
 
     if not os.path.isdir(outpath):
@@ -47,16 +47,16 @@ def _main():
         raws = detect_raw(inpath)
 
     if len(raws) == 0:
-        print("# no file to shuffle, exit")
+        print("# no file to shuffle, exit")  # noqa: T201
         return
 
     assert "box.raw" in raws
     tmp = np.loadtxt(os.path.join(inpath, "box.raw"))
     tmp = np.reshape(tmp, [-1, 9])
     nframe = tmp.shape[0]
-    print(nframe)
+    print(nframe)  # noqa: T201
 
-    print(
+    print(  # noqa: T201
         "# will shuffle raw files "
         + str(raws)
         + " in dir "
diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index 0190bbc124..1ce4beb723 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -1,61 +1,45 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Root of the deepmd package, exposes all public classes and submodules."""
+"""DeePMD-kit is a package written in Python/C++, designed to
+minimize the effort required to build deep learning-based model
+of interatomic potential energy and force field and to perform
+molecular dynamics (MD).
 
-try:
-    from importlib import (
-        metadata,
-    )
-except ImportError:  # for Python<3.8
-    import importlib_metadata as metadata
-
-import deepmd.utils.network as network
-
-from . import (
-    cluster,
-    descriptor,
-    fit,
-    loss,
-    nvnmd,
-    utils,
-)
-from .env import (
-    set_mkl,
-)
-from .infer import (
-    DeepEval,
-    DeepPotential,
-)
-from .infer.data_modifier import (
-    DipoleChargeModifier,
-)
-
-set_mkl()
+The top module (deepmd.__init__) should not import any third-party
+modules for performance.
+"""
 
 try:
-    from deepmd_utils._version import version as __version__
+    from deepmd._version import version as __version__
 except ImportError:
     from .__about__ import (
         __version__,
     )
 
-# load third-party plugins
-try:
-    eps = metadata.entry_points(group="deepmd")
-except TypeError:
-    eps = metadata.entry_points().get("deepmd", [])
-for ep in eps:
-    ep.load()
+
+def DeepPotential(*args, **kwargs):
+    """Factory function that forwards to DeepEval (for compatbility
+    and performance).
+
+    Parameters
+    ----------
+    *args
+        positional arguments
+    **kwargs
+        keyword arguments
+
+    Returns
+    -------
+    DeepEval
+        potentials
+    """
+    from deepmd.infer import (
+        DeepPotential,
+    )
+
+    return DeepPotential(*args, **kwargs)
+
 
 __all__ = [
     "__version__",
-    "descriptor",
-    "fit",
-    "loss",
-    "utils",
-    "cluster",
-    "network",
-    "DeepEval",
     "DeepPotential",
-    "DipoleChargeModifier",
-    "nvnmd",
 ]
diff --git a/deepmd/__main__.py b/deepmd/__main__.py
index 6026b1c269..a31379b5e3 100644
--- a/deepmd/__main__.py
+++ b/deepmd/__main__.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Package dp entry point."""
 
-from .entrypoints.main import (
+from deepmd.main import (
     main,
 )
 
diff --git a/deepmd/backend/__init__.py b/deepmd/backend/__init__.py
new file mode 100644
index 0000000000..2b3f24c5ed
--- /dev/null
+++ b/deepmd/backend/__init__.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Backends.
+
+Avoid directly importing third-party libraries in this module for performance.
+"""
+
+# copy from dpdata
+from importlib import (
+    import_module,
+    metadata,
+)
+from pathlib import (
+    Path,
+)
+
+PACKAGE_BASE = "deepmd.backend"
+NOT_LOADABLE = ("__init__.py",)
+
+for module_file in Path(__file__).parent.glob("*.py"):
+    if module_file.name not in NOT_LOADABLE:
+        module_name = f".{module_file.stem}"
+        import_module(module_name, PACKAGE_BASE)
+
+# https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html
+try:
+    eps = metadata.entry_points(group="deepmd.backend")
+except TypeError:
+    eps = metadata.entry_points().get("deepmd.backend", [])
+for ep in eps:
+    plugin = ep.load()
diff --git a/deepmd/backend/backend.py b/deepmd/backend/backend.py
new file mode 100644
index 0000000000..8f7bca319e
--- /dev/null
+++ b/deepmd/backend/backend.py
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    abstractmethod,
+)
+from enum import (
+    Flag,
+    auto,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+    Dict,
+    List,
+    Type,
+)
+
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+class Backend(PluginVariant, make_plugin_registry("backend")):
+    r"""General backend class.
+
+    Examples
+    --------
+    >>> @Backend.register("tf")
+    >>> @Backend.register("tensorflow")
+    >>> class TensorFlowBackend(Backend):
+    ...     pass
+    """
+
+    @staticmethod
+    def get_backend(key: str) -> Type["Backend"]:
+        """Get the backend by key.
+
+        Parameters
+        ----------
+        key : str
+            the key of a backend
+
+        Returns
+        -------
+        Backend
+            the backend
+        """
+        return Backend.get_class_by_type(key)
+
+    @staticmethod
+    def get_backends() -> Dict[str, Type["Backend"]]:
+        """Get all the registered backend names.
+
+        Returns
+        -------
+        list
+            all the registered backends
+        """
+        return Backend.get_plugins()
+
+    @staticmethod
+    def get_backends_by_feature(
+        feature: "Backend.Feature",
+    ) -> Dict[str, Type["Backend"]]:
+        """Get all the registered backend names with a specific feature.
+
+        Parameters
+        ----------
+        feature : Backend.Feature
+            the feature flag
+
+        Returns
+        -------
+        list
+            all the registered backends with the feature
+        """
+        return {
+            key: backend
+            for key, backend in Backend.get_backends().items()
+            if backend.features & feature
+        }
+
+    @staticmethod
+    def detect_backend_by_model(filename: str) -> Type["Backend"]:
+        """Detect the backend of the given model file.
+
+        Parameters
+        ----------
+        filename : str
+            The model file name
+        """
+        filename = str(filename).lower()
+        for backend in Backend.get_backends().values():
+            for suffix in backend.suffixes:
+                if filename.endswith(suffix):
+                    return backend
+        raise ValueError(f"Cannot detect the backend of the model file {filename}.")
+
+    class Feature(Flag):
+        """Feature flag to indicate whether the backend supports certain features."""
+
+        ENTRY_POINT = auto()
+        """Support entry point hook."""
+        DEEP_EVAL = auto()
+        """Support Deep Eval backend."""
+        NEIGHBOR_STAT = auto()
+        """Support neighbor statistics."""
+        IO = auto()
+        """Support IO hook."""
+
+    name: ClassVar[str] = "Unknown"
+    """The formal name of the backend.
+
+    To be consistent, this name should be also registered in the plugin system."""
+
+    features: ClassVar[Feature] = Feature(0)
+    """The features of the backend."""
+    suffixes: ClassVar[List[str]] = []
+    """The supported suffixes of the saved model.
+
+    The first element is considered as the default suffix."""
+
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+
+    @property
+    @abstractmethod
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def deep_eval(self) -> Type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def neighbor_stat(self) -> Type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        pass
diff --git a/deepmd/backend/dpmodel.py b/deepmd/backend/dpmodel.py
new file mode 100644
index 0000000000..64df95586d
--- /dev/null
+++ b/deepmd/backend/dpmodel.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+    List,
+    Type,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+@Backend.register("dp")
+@Backend.register("dpmodel")
+@Backend.register("np")
+@Backend.register("numpy")
+class DPModelBackend(Backend):
+    """DPModel backend that uses NumPy as the reference implementation."""
+
+    name = "DPModel"
+    """The formal name of the backend."""
+    features: ClassVar[Backend.Feature] = (
+        Backend.Feature.DEEP_EVAL | Backend.Feature.NEIGHBOR_STAT | Backend.Feature.IO
+    )
+    """The features of the backend."""
+    suffixes: ClassVar[List[str]] = [".dp"]
+    """The suffixes of the backend."""
+
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+        return True
+
+    @property
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        raise NotImplementedError(f"Unsupported backend: {self.name}")
+
+    @property
+    def deep_eval(self) -> Type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        from deepmd.dpmodel.infer.deep_eval import (
+            DeepEval,
+        )
+
+        return DeepEval
+
+    @property
+    def neighbor_stat(self) -> Type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        from deepmd.dpmodel.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
+
+    @property
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        from deepmd.dpmodel.utils.network import (
+            load_dp_model,
+        )
+
+        return load_dp_model
+
+    @property
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        from deepmd.dpmodel.utils.network import (
+            save_dp_model,
+        )
+
+        return save_dp_model
diff --git a/deepmd/backend/pytorch.py b/deepmd/backend/pytorch.py
new file mode 100644
index 0000000000..fb7d30e994
--- /dev/null
+++ b/deepmd/backend/pytorch.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from importlib.util import (
+    find_spec,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+    List,
+    Type,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+@Backend.register("pt")
+@Backend.register("pytorch")
+class PyTorchBackend(Backend):
+    """PyTorch backend."""
+
+    name = "PyTorch"
+    """The formal name of the backend."""
+    features: ClassVar[Backend.Feature] = (
+        Backend.Feature.ENTRY_POINT
+        | Backend.Feature.DEEP_EVAL
+        | Backend.Feature.NEIGHBOR_STAT
+        | Backend.Feature.IO
+    )
+    """The features of the backend."""
+    suffixes: ClassVar[List[str]] = [".pth", ".pt"]
+    """The suffixes of the backend."""
+
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+        return find_spec("torch") is not None
+
+    @property
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        from deepmd.pt.entrypoints.main import main as deepmd_main
+
+        return deepmd_main
+
+    @property
+    def deep_eval(self) -> Type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        from deepmd.pt.infer.deep_eval import DeepEval as DeepEvalPT
+
+        return DeepEvalPT
+
+    @property
+    def neighbor_stat(self) -> Type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        from deepmd.pt.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
+
+    @property
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        from deepmd.pt.utils.serialization import (
+            serialize_from_file,
+        )
+
+        return serialize_from_file
+
+    @property
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        from deepmd.pt.utils.serialization import (
+            deserialize_to_file,
+        )
+
+        return deserialize_to_file
diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py
new file mode 100644
index 0000000000..273fbc0951
--- /dev/null
+++ b/deepmd/backend/suffix.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+import operator
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+    Type,
+    Union,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+
+def format_model_suffix(
+    filename: str,
+    feature: Optional[Backend.Feature] = None,
+    preferred_backend: Optional[Union[str, Type["Backend"]]] = None,
+    strict_prefer: Optional[bool] = None,
+) -> str:
+    """Check and format the suffixes of a filename.
+
+    When preferred_backend is not given, this method checks the suffix of the filename
+    is within the suffixes of the any backends (with the given feature) and doesn't do formating.
+    When preferred_backend is given, strict_prefer must be given.
+    If strict_prefer is True and the suffix is not within the suffixes of the preferred backend,
+    or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature,
+    the filename will be formatted with the preferred suffix of the preferred backend.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to be formatted.
+    feature : Backend.Feature, optional
+        The feature of the backend, by default None
+    preferred_backend : str or type of Backend, optional
+        The preferred backend, by default None
+    strict_prefer : bool, optional
+        Whether to strictly prefer the preferred backend, by default None
+
+    Returns
+    -------
+    str
+        The formatted filename with the correct suffix.
+
+    Raises
+    ------
+    ValueError
+        When preferred_backend is not given and the filename is not supported by any backend.
+    """
+    if preferred_backend is not None and strict_prefer is None:
+        raise ValueError("strict_prefer must be given when preferred_backend is given.")
+    if isinstance(preferred_backend, str):
+        preferred_backend = Backend.get_backend(preferred_backend)
+    if preferred_backend is not None and strict_prefer:
+        all_backends = [preferred_backend]
+    elif feature is None:
+        all_backends = list(Backend.get_backends().values())
+    else:
+        all_backends = list(Backend.get_backends_by_feature(feature).values())
+
+    all_suffixes = set(
+        functools.reduce(
+            operator.iconcat, [backend.suffixes for backend in all_backends], []
+        )
+    )
+    pp = Path(filename)
+    current_suffix = pp.suffix
+    if current_suffix not in all_suffixes:
+        if preferred_backend is not None:
+            return str(pp) + preferred_backend.suffixes[0]
+        raise ValueError(f"Unsupported model file format: {filename}")
+    return filename
diff --git a/deepmd/backend/tensorflow.py b/deepmd/backend/tensorflow.py
new file mode 100644
index 0000000000..15b03ee7c8
--- /dev/null
+++ b/deepmd/backend/tensorflow.py
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from importlib.util import (
+    find_spec,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    ClassVar,
+    List,
+    Type,
+)
+
+from deepmd.backend.backend import (
+    Backend,
+)
+
+if TYPE_CHECKING:
+    from argparse import (
+        Namespace,
+    )
+
+    from deepmd.infer.deep_eval import (
+        DeepEvalBackend,
+    )
+    from deepmd.utils.neighbor_stat import (
+        NeighborStat,
+    )
+
+
+@Backend.register("tf")
+@Backend.register("tensorflow")
+class TensorFlowBackend(Backend):
+    """TensorFlow backend."""
+
+    name = "TensorFlow"
+    """The formal name of the backend."""
+    features: ClassVar[Backend.Feature] = (
+        Backend.Feature.ENTRY_POINT
+        | Backend.Feature.DEEP_EVAL
+        | Backend.Feature.NEIGHBOR_STAT
+        | Backend.Feature.IO
+    )
+    """The features of the backend."""
+    suffixes: ClassVar[List[str]] = [".pb"]
+    """The suffixes of the backend."""
+
+    def is_available(self) -> bool:
+        """Check if the backend is available.
+
+        Returns
+        -------
+        bool
+            Whether the backend is available.
+        """
+        # deepmd.env imports expensive numpy
+        # avoid import outside the method
+        from deepmd.env import (
+            GLOBAL_CONFIG,
+        )
+
+        return (
+            find_spec("tensorflow") is not None
+            and GLOBAL_CONFIG["enable_tensorflow"] != "0"
+        )
+
+    @property
+    def entry_point_hook(self) -> Callable[["Namespace"], None]:
+        """The entry point hook of the backend.
+
+        Returns
+        -------
+        Callable[[Namespace], None]
+            The entry point hook of the backend.
+        """
+        from deepmd.tf.entrypoints.main import main as deepmd_main
+
+        return deepmd_main
+
+    @property
+    def deep_eval(self) -> Type["DeepEvalBackend"]:
+        """The Deep Eval backend of the backend.
+
+        Returns
+        -------
+        type[DeepEvalBackend]
+            The Deep Eval backend of the backend.
+        """
+        from deepmd.tf.infer.deep_eval import DeepEval as DeepEvalTF
+
+        return DeepEvalTF
+
+    @property
+    def neighbor_stat(self) -> Type["NeighborStat"]:
+        """The neighbor statistics of the backend.
+
+        Returns
+        -------
+        type[NeighborStat]
+            The neighbor statistics of the backend.
+        """
+        from deepmd.tf.utils.neighbor_stat import (
+            NeighborStat,
+        )
+
+        return NeighborStat
+
+    @property
+    def serialize_hook(self) -> Callable[[str], dict]:
+        """The serialize hook to convert the model file to a dictionary.
+
+        Returns
+        -------
+        Callable[[str], dict]
+            The serialize hook of the backend.
+        """
+        from deepmd.tf.utils.serialization import (
+            serialize_from_file,
+        )
+
+        return serialize_from_file
+
+    @property
+    def deserialize_hook(self) -> Callable[[str, dict], None]:
+        """The deserialize hook to convert the dictionary to a model file.
+
+        Returns
+        -------
+        Callable[[str, dict], None]
+            The deserialize hook of the backend.
+        """
+        from deepmd.tf.utils.serialization import (
+            deserialize_to_file,
+        )
+
+        return deserialize_to_file
diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index b9c0a81006..2d3e7ce831 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -19,8 +19,8 @@
     all_changes,
 )
 
-from deepmd import (
-    DeepPotential,
+from deepmd.infer import (
+    DeepPot,
 )
 
 if TYPE_CHECKING:
@@ -53,7 +53,7 @@ class DP(Calculator):
     Compute potential energy
 
     >>> from ase import Atoms
-    >>> from deepmd.calculator import DP
+    >>> from deepmd.tf.calculator import DP
     >>> water = Atoms('H2O',
     >>>             positions=[(0.7601, 1.9270, 1),
     >>>                        (1.9575, 1, 1),
@@ -89,7 +89,7 @@ def __init__(
         **kwargs,
     ) -> None:
         Calculator.__init__(self, label=label, **kwargs)
-        self.dp = DeepPotential(str(Path(model).resolve()), neighbor_list=neighbor_list)
+        self.dp = DeepPot(str(Path(model).resolve()), neighbor_list=neighbor_list)
         if type_dict:
             self.type_dict = type_dict
         else:
diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py
deleted file mode 100644
index 5264622232..0000000000
--- a/deepmd/cluster/slurm.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""MOdule to get resources on SLURM cluster.
-
-References
-----------
-https://github.com/deepsense-ai/tensorflow_on_slurm ####
-"""
-
-import os
-from typing import (
-    List,
-    Optional,
-    Tuple,
-)
-
-import hostlist
-
-from deepmd.cluster import (
-    local,
-)
-
-__all__ = ["get_resource"]
-
-
-def get_resource() -> Tuple[str, List[str], Optional[List[int]]]:
-    """Get SLURM resources: nodename, nodelist, and gpus.
-
-    Returns
-    -------
-    Tuple[str, List[str], Optional[List[int]]]
-        nodename, nodelist, and gpus
-
-    Raises
-    ------
-    RuntimeError
-        if number of nodes could not be retrieved
-    ValueError
-        list of nodes is not of the same length sa number of nodes
-    ValueError
-        if current nodename is not found in node list
-    """
-    nodelist = hostlist.expand_hostlist(os.environ["SLURM_JOB_NODELIST"])
-    nodename = os.environ["SLURMD_NODENAME"]
-    num_nodes_env = os.getenv("SLURM_JOB_NUM_NODES")
-    if num_nodes_env:
-        num_nodes = int(num_nodes_env)
-    else:
-        raise RuntimeError("Could not get SLURM number of nodes")
-
-    if len(nodelist) != num_nodes:
-        raise ValueError(
-            f"Number of slurm nodes {len(nodelist)} not equal to {num_nodes}"
-        )
-    if nodename not in nodelist:
-        raise ValueError(
-            f"Nodename({nodename}) not in nodelist({nodelist}). This should not happen!"
-        )
-    gpus = local.get_gpus()
-    return nodename, nodelist, gpus
diff --git a/deepmd/common.py b/deepmd/common.py
index 54e3d0a6f8..098bb0ed11 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -1,46 +1,44 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Collection of functions and classes used throughout the whole package."""
-
+import glob
+import json
+import os
+import platform
+import shutil
 import warnings
-from functools import (
-    wraps,
+from hashlib import (
+    sha1,
+)
+from pathlib import (
+    Path,
 )
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
+    Dict,
+    List,
+    Optional,
+    Set,
+    TypeVar,
     Union,
+    get_args,
 )
 
-import tensorflow
-from tensorflow.python.framework import (
-    tensor_util,
-)
+try:
+    from typing import Literal  # python >=3.8
+except ImportError:
+    from typing_extensions import Literal  # type: ignore
+
+import numpy as np
+import yaml
 
 from deepmd.env import (
-    GLOBAL_TF_FLOAT_PRECISION,
-    op_module,
-    tf,
+    GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd_utils.common import (
-    add_data_requirement,
-    data_requirement,
-    expand_sys_str,
-    get_np_precision,
-    j_loader,
-    j_must_have,
-    make_default_mesh,
-    select_idx_map,
+from deepmd.utils.path import (
+    DPPath,
 )
 
-if TYPE_CHECKING:
-    from deepmd_utils.common import (
-        _ACTIVATION,
-        _PRECISION,
-    )
-
 __all__ = [
-    # from deepmd_utils.common
     "data_requirement",
     "add_data_requirement",
     "select_idx_map",
@@ -49,238 +47,311 @@
     "j_loader",
     "expand_sys_str",
     "get_np_precision",
-    # from self
-    "PRECISION_DICT",
+    "VALID_PRECISION",
+    "VALID_ACTIVATION",
+]
+
+_PRECISION = Literal["default", "float16", "float32", "float64"]
+_ACTIVATION = Literal[
+    "relu",
+    "relu6",
+    "softplus",
+    "sigmoid",
+    "tanh",
     "gelu",
     "gelu_tf",
-    "ACTIVATION_FN_DICT",
-    "get_activation_func",
-    "get_precision",
-    "safe_cast_tensor",
-    "cast_precision",
-    "clear_session",
+    "none",
+    "linear",
 ]
+# get_args is new in py38
+VALID_PRECISION: Set[_PRECISION] = set(get_args(_PRECISION))
+VALID_ACTIVATION: Set[_ACTIVATION] = set(get_args(_ACTIVATION))
 
-# define constants
-PRECISION_DICT = {
-    "default": GLOBAL_TF_FLOAT_PRECISION,
-    "float16": tf.float16,
-    "float32": tf.float32,
-    "float64": tf.float64,
-    "bfloat16": tf.bfloat16,
-}
+if TYPE_CHECKING:
+    _DICT_VAL = TypeVar("_DICT_VAL")
+    __all__.extend(
+        [
+            "_DICT_VAL",
+            "_PRECISION",
+            "_ACTIVATION",
+        ]
+    )
 
 
-def gelu(x: tf.Tensor) -> tf.Tensor:
-    """Gaussian Error Linear Unit.
+# TODO: refactor data_requirement to make it not a global variable
+# this is not a good way to do things. This is some global variable to which
+# anyone can write and there is no good way to keep track of the changes
+data_requirement = {}
 
-    This is a smoother version of the RELU, implemented by custom operator.
+
+def add_data_requirement(
+    key: str,
+    ndof: int,
+    atomic: bool = False,
+    must: bool = False,
+    high_prec: bool = False,
+    type_sel: Optional[bool] = None,
+    repeat: int = 1,
+    default: float = 0.0,
+    dtype: Optional[np.dtype] = None,
+    output_natoms_for_type_sel: bool = False,
+):
+    """Specify data requirements for training.
+
+    Parameters
+    ----------
+    key : str
+        type of data stored in corresponding `*.npy` file e.g. `forces` or `energy`
+    ndof : int
+        number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces
+        have `atomic=True` and `ndof=3`
+    atomic : bool, optional
+        specifies whwther the `ndof` keyworrd applies to per atom quantity or not,
+        by default False
+    must : bool, optional
+        specifi if the `*.npy` data file must exist, by default False
+    high_prec : bool, optional
+        if true load data to `np.float64` else `np.float32`, by default False
+    type_sel : bool, optional
+        select only certain type of atoms, by default None
+    repeat : int, optional
+        if specify repaeat data `repeat` times, by default 1
+    default : float, optional, default=0.
+        default value of data
+    dtype : np.dtype, optional
+        the dtype of data, overwrites `high_prec` if provided
+    output_natoms_for_type_sel : bool, optional
+        if True and type_sel is True, the atomic dimension will be natoms instead of nsel
+    """
+    data_requirement[key] = {
+        "ndof": ndof,
+        "atomic": atomic,
+        "must": must,
+        "high_prec": high_prec,
+        "type_sel": type_sel,
+        "repeat": repeat,
+        "default": default,
+        "dtype": dtype,
+        "output_natoms_for_type_sel": output_natoms_for_type_sel,
+    }
+
+
+def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
+    """Build map of indices for element supplied element types from all atoms list.
 
     Parameters
     ----------
-    x : tf.Tensor
-        float Tensor to perform activation
+    atom_types : np.ndarray
+        array specifing type for each atoms as integer
+    select_types : np.ndarray
+        types of atoms you want to find indices for
 
     Returns
     -------
-    tf.Tensor
-        `x` with the GELU activation applied
+    np.ndarray
+        indices of types of atoms defined by `select_types` in `atom_types` array
 
-    References
-    ----------
-    Original paper
-    https://arxiv.org/abs/1606.08415
+    Warnings
+    --------
+    `select_types` array will be sorted before finding indices in `atom_types`
     """
-    return op_module.gelu_custom(x)
+    sort_select_types = np.sort(select_types)
+    idx_map = []
+    for ii in sort_select_types:
+        idx_map.append(np.where(atom_types == ii)[0])
+    return np.concatenate(idx_map)
 
 
-def gelu_tf(x: tf.Tensor) -> tf.Tensor:
-    """Gaussian Error Linear Unit.
+def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
+    """Make mesh.
 
-    This is a smoother version of the RELU, implemented by TF.
+    Only the size of mesh matters, not the values:
+    * 6 for PBC, no mixed types
+    * 0 for no PBC, no mixed types
+    * 7 for PBC, mixed types
+    * 1 for no PBC, mixed types
 
     Parameters
     ----------
-    x : tf.Tensor
-        float Tensor to perform activation
+    pbc : bool
+        if True, the mesh will be made for periodic boundary conditions
+    mixed_type : bool
+        if True, the mesh will be made for mixed types
 
     Returns
     -------
-    tf.Tensor
-        `x` with the GELU activation applied
-
-    References
-    ----------
-    Original paper
-    https://arxiv.org/abs/1606.08415
+    np.ndarray
+        mesh
     """
+    mesh_size = int(pbc) * 6 + int(mixed_type)
+    default_mesh = np.zeros(mesh_size, dtype=np.int32)
+    return default_mesh
 
-    def gelu_wrapper(x):
-        try:
-            return tensorflow.nn.gelu(x, approximate=True)
-        except AttributeError:
-            warnings.warn(
-                "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator."
-            )
-            return op_module.gelu_custom(x)
-
-    return (lambda x: gelu_wrapper(x))(x)
-
-
-ACTIVATION_FN_DICT = {
-    "relu": tf.nn.relu,
-    "relu6": tf.nn.relu6,
-    "softplus": tf.nn.softplus,
-    "sigmoid": tf.sigmoid,
-    "tanh": tf.nn.tanh,
-    "gelu": gelu,
-    "gelu_tf": gelu_tf,
-    "None": None,
-    "none": None,
-}
-
-
-def get_activation_func(
-    activation_fn: Union["_ACTIVATION", None],
-) -> Union[Callable[[tf.Tensor], tf.Tensor], None]:
-    """Get activation function callable based on string name.
 
-    Parameters
-    ----------
-    activation_fn : _ACTIVATION
-        one of the defined activation functions
+# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys
+# maybe rename this to j_deprecated and only warn about deprecated keys,
+# if the deprecated_key argument is left empty function puppose is only custom
+# error since dict[key] already raises KeyError when the key is missing
+def j_must_have(
+    jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
+) -> "_DICT_VAL":
+    """Assert that supplied dictionary conaines specified key.
 
     Returns
     -------
-    Callable[[tf.Tensor], tf.Tensor]
-        correspondingg TF callable
+    _DICT_VAL
+        value that was store unde supplied key
 
     Raises
     ------
     RuntimeError
-        if unknown activation function is specified
+        if the key is not present
     """
-    if activation_fn is None:
-        return None
-    if activation_fn not in ACTIVATION_FN_DICT:
-        raise RuntimeError(f"{activation_fn} is not a valid activation function")
-    return ACTIVATION_FN_DICT[activation_fn]
+    if key not in jdata.keys():
+        for ii in deprecated_key:
+            if ii in jdata.keys():
+                warnings.warn(f"the key {ii} is deprecated, please use {key} instead")
+                return jdata[ii]
+        else:
+            raise RuntimeError(f"json database must provide key {key}")
+    else:
+        return jdata[key]
 
 
-def get_precision(precision: "_PRECISION") -> Any:
-    """Convert str to TF DType constant.
+def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
+    """Load yaml or json settings file.
 
     Parameters
     ----------
-    precision : _PRECISION
-        one of the allowed precisions
+    filename : Union[str, Path]
+        path to file
 
     Returns
     -------
-    tf.python.framework.dtypes.DType
-        appropriate TF constant
+    Dict[str, Any]
+        loaded dictionary
 
     Raises
     ------
-    RuntimeError
-        if supplied precision string does not have acorresponding TF constant
+    TypeError
+        if the supplied file is of unsupported type
     """
-    if precision not in PRECISION_DICT:
-        raise RuntimeError(f"{precision} is not a valid precision")
-    return PRECISION_DICT[precision]
-
+    filepath = Path(filename)
+    if filepath.suffix.endswith("json"):
+        with filepath.open() as fp:
+            return json.load(fp)
+    elif filepath.suffix.endswith(("yml", "yaml")):
+        with filepath.open() as fp:
+            return yaml.safe_load(fp)
+    else:
+        raise TypeError("config file must be json, or yaml/yml")
 
-def safe_cast_tensor(
-    input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType
-) -> tf.Tensor:
-    """Convert a Tensor from a precision to another precision.
 
-    If input is not a Tensor or without the specific precision, the method will not
-    cast it.
+# TODO port expand_sys_str completely to pathlib when all callers are ported
+def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
+    """Recursively iterate over directories taking those that contain `type.raw` file.
 
     Parameters
     ----------
-    input : tf.Tensor
-        input tensor
-    from_precision : tf.DType
-        Tensor data type that is casted from
-    to_precision : tf.DType
-        Tensor data type that casts to
+    root_dir : Union[str, Path]
+        starting directory
 
     Returns
     -------
-    tf.Tensor
-        casted Tensor
+    List[str]
+        list of string pointing to system directories
     """
-    if tensor_util.is_tensor(input) and input.dtype == from_precision:
-        return tf.cast(input, to_precision)
-    return input
+    root_dir = DPPath(root_dir)
+    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
+    if (root_dir / "type.raw").is_file():
+        matches.append(str(root_dir))
+    return matches
 
 
-def cast_precision(func: Callable) -> Callable:
-    """A decorator that casts and casts back the input
-    and output tensor of a method.
+def get_np_precision(precision: "_PRECISION") -> np.dtype:
+    """Get numpy precision constant from string.
 
-    The decorator should be used in a classmethod.
-
-    The decorator will do the following thing:
-    (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION`
-    to precision defined by property `precision`.
-    (2) It casts output Tensors from `precision` to
-    `GLOBAL_TF_FLOAT_PRECISION`.
-    (3) It checks inputs and outputs and only casts when
-    input or output is a Tensor and its dtype matches
-    `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively.
-    If it does not match (e.g. it is an integer), the decorator
-    will do nothing on it.
+    Parameters
+    ----------
+    precision : _PRECISION
+        string name of numpy constant or default
 
     Returns
     -------
-    Callable
-        a decorator that casts and casts back the input and
-        output tensor of a method
+    np.dtype
+        numpy presicion constant
 
-    Examples
-    --------
-    >>> class A:
-    ...   @property
-    ...   def precision(self):
-    ...     return tf.float32
-    ...
-    ...   @cast_precision
-    ...   def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor:
-    ...     return x ** 2 + y
+    Raises
+    ------
+    RuntimeError
+        if string is invalid
     """
+    if precision == "default":
+        return GLOBAL_NP_FLOAT_PRECISION
+    elif precision == "float16":
+        return np.float16
+    elif precision == "float32":
+        return np.float32
+    elif precision == "float64":
+        return np.float64
+    else:
+        raise RuntimeError(f"{precision} is not a valid precision")
+
+
+def symlink_prefix_files(old_prefix: str, new_prefix: str):
+    """Create symlinks from old checkpoint prefix to new one.
 
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        # only convert tensors
-        returned_tensor = func(
-            self,
-            *[
-                safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
-                for vv in args
-            ],
-            **{
-                kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
-                for kk, vv in kwargs.items()
-            },
-        )
-        if isinstance(returned_tensor, tuple):
-            return tuple(
-                safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION)
-                for vv in returned_tensor
-            )
+    On Windows this function will copy files instead of creating symlinks.
+
+    Parameters
+    ----------
+    old_prefix : str
+        old checkpoint prefix, all files with this prefix will be symlinked
+    new_prefix : str
+        new checkpoint prefix
+    """
+    original_files = glob.glob(old_prefix + ".*")
+    for ori_ff in original_files:
+        new_ff = new_prefix + ori_ff[len(old_prefix) :]
+        try:
+            # remove old one
+            os.remove(new_ff)
+        except OSError:
+            pass
+        if platform.system() != "Windows":
+            # by default one does not have access to create symlink on Windows
+            os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff)
         else:
-            return safe_cast_tensor(
-                returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION
-            )
+            shutil.copyfile(ori_ff, new_ff)
+
+
+def get_hash(obj) -> str:
+    """Get hash of object.
+
+    Parameters
+    ----------
+    obj
+        object to hash
+    """
+    return sha1(json.dumps(obj).encode("utf-8")).hexdigest()
+
 
-    return wrapper
+def j_get_type(data: dict, class_name: str = "object") -> str:
+    """Get the type from the data.
 
+    Parameters
+    ----------
+    data : dict
+        the data
+    class_name : str, optional
+        the name of the class for error message, by default "object"
 
-def clear_session():
-    """Reset all state generated by DeePMD-kit."""
-    tf.reset_default_graph()
-    # TODO: remove this line when data_requirement is not a global variable
-    data_requirement.clear()
+    Returns
+    -------
+    str
+        the type
+    """
+    try:
+        return data["type"]
+    except KeyError as e:
+        raise KeyError(f"the type of the {class_name} should be set by `type`") from e
diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py
deleted file mode 100644
index 598f6f9ff8..0000000000
--- a/deepmd/descriptor/se.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Tuple,
-)
-
-from deepmd.env import (
-    tf,
-)
-from deepmd.utils.graph import (
-    get_embedding_net_variables_from_graph_def,
-    get_tensor_by_name_from_graph,
-)
-
-from .descriptor import (
-    Descriptor,
-)
-
-
-class DescrptSe(Descriptor):
-    """A base class for smooth version of descriptors.
-
-    Notes
-    -----
-    All of these descriptors have an environmental matrix and an
-    embedding network (:meth:`deepmd.utils.network.embedding_net`), so
-    they can share some similiar methods without defining them twice.
-
-    Attributes
-    ----------
-    embedding_net_variables : dict
-        initial embedding network variables
-    descrpt_reshape : tf.Tensor
-        the reshaped descriptor
-    descrpt_deriv : tf.Tensor
-        the descriptor derivative
-    rij : tf.Tensor
-        distances between two atoms
-    nlist : tf.Tensor
-        the neighbor list
-
-    """
-
-    def _identity_tensors(self, suffix: str = "") -> None:
-        """Identify tensors which are expected to be stored and restored.
-
-        Notes
-        -----
-        These tensors will be indentitied:
-            self.descrpt_reshape : o_rmat
-            self.descrpt_deriv : o_rmat_deriv
-            self.rij : o_rij
-            self.nlist : o_nlist
-        Thus, this method should be called during building the descriptor and
-        after these tensors are initialized.
-
-        Parameters
-        ----------
-        suffix : str
-            The suffix of the scope
-        """
-        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix)
-        self.descrpt_deriv = tf.identity(
-            self.descrpt_deriv, name="o_rmat_deriv" + suffix
-        )
-        self.rij = tf.identity(self.rij, name="o_rij" + suffix)
-        self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix)
-
-    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
-        """Get names of tensors.
-
-        Parameters
-        ----------
-        suffix : str
-            The suffix of the scope
-
-        Returns
-        -------
-        Tuple[str]
-            Names of tensors
-        """
-        return (
-            f"o_rmat{suffix}:0",
-            f"o_rmat_deriv{suffix}:0",
-            f"o_rij{suffix}:0",
-            f"o_nlist{suffix}:0",
-        )
-
-    def pass_tensors_from_frz_model(
-        self,
-        descrpt_reshape: tf.Tensor,
-        descrpt_deriv: tf.Tensor,
-        rij: tf.Tensor,
-        nlist: tf.Tensor,
-    ):
-        """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def.
-
-        Parameters
-        ----------
-        descrpt_reshape
-            The passed descrpt_reshape tensor
-        descrpt_deriv
-            The passed descrpt_deriv tensor
-        rij
-            The passed rij tensor
-        nlist
-            The passed nlist tensor
-        """
-        self.rij = rij
-        self.nlist = nlist
-        self.descrpt_deriv = descrpt_deriv
-        self.descrpt_reshape = descrpt_reshape
-
-    def init_variables(
-        self,
-        graph: tf.Graph,
-        graph_def: tf.GraphDef,
-        suffix: str = "",
-    ) -> None:
-        """Init the embedding net variables with the given dict.
-
-        Parameters
-        ----------
-        graph : tf.Graph
-            The input frozen model graph
-        graph_def : tf.GraphDef
-            The input frozen model graph_def
-        suffix : str, optional
-            The suffix of the scope
-        """
-        self.embedding_net_variables = get_embedding_net_variables_from_graph_def(
-            graph_def, suffix=suffix
-        )
-        self.davg = get_tensor_by_name_from_graph(
-            graph, "descrpt_attr%s/t_avg" % suffix
-        )
-        self.dstd = get_tensor_by_name_from_graph(
-            graph, "descrpt_attr%s/t_std" % suffix
-        )
-
-    @property
-    def precision(self) -> tf.DType:
-        """Precision of filter network."""
-        return self.filter_precision
-
-    @classmethod
-    def update_sel(cls, global_jdata: dict, local_jdata: dict):
-        """Update the selection and perform neighbor statistics.
-
-        Parameters
-        ----------
-        global_jdata : dict
-            The global data, containing the training section
-        local_jdata : dict
-            The local data refer to the current class
-        """
-        from deepmd.entrypoints.train import (
-            update_one_sel,
-        )
-
-        # default behavior is to update sel which is a list
-        local_jdata_cpy = local_jdata.copy()
-        return update_one_sel(global_jdata, local_jdata_cpy, False)
diff --git a/deepmd/dpmodel/__init__.py b/deepmd/dpmodel/__init__.py
new file mode 100644
index 0000000000..6a7bdb3585
--- /dev/null
+++ b/deepmd/dpmodel/__init__.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .common import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+    NativeOP,
+)
+from .model import (
+    DPModel,
+)
+from .output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+    get_deriv_name,
+    get_hessian_name,
+    get_reduce_name,
+    model_check_output,
+)
+
+__all__ = [
+    "DPModel",
+    "PRECISION_DICT",
+    "DEFAULT_PRECISION",
+    "NativeOP",
+    "ModelOutputDef",
+    "FittingOutputDef",
+    "OutputVariableDef",
+    "model_check_output",
+    "fitting_check_output",
+    "get_reduce_name",
+    "get_deriv_name",
+    "get_hessian_name",
+]
diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py
new file mode 100644
index 0000000000..37f6b8bf28
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/__init__.py
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The atomic model provides the prediction of some property on each
+atom.  All the atomic models are not supposed to be directly accessed
+by users, but it provides a convenient interface for the
+implementation of models.
+
+Taking the energy models for example, the developeres only needs to
+implement the atomic energy prediction via an atomic model, and the
+model can be automatically made by the `deepmd.dpmodel.make_model`
+method. The `DPModel` is made by
+```
+DPModel = make_model(DPAtomicModel)
+```
+
+"""
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .linear_atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+    LinearEnergyAtomicModel,
+)
+from .make_base_atomic_model import (
+    make_base_atomic_model,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+__all__ = [
+    "make_base_atomic_model",
+    "BaseAtomicModel",
+    "DPAtomicModel",
+    "PairTabAtomicModel",
+    "LinearEnergyAtomicModel",
+    "DPZBLLinearEnergyAtomicModel",
+]
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
new file mode 100644
index 0000000000..42d1e67138
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.dpmodel.utils import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+
+from .make_base_atomic_model import (
+    make_base_atomic_model,
+)
+
+BaseAtomicModel_ = make_base_atomic_model(np.ndarray)
+
+
+class BaseAtomicModel(BaseAtomicModel_):
+    def __init__(
+        self,
+        atom_exclude_types: List[int] = [],
+        pair_exclude_types: List[Tuple[int, int]] = [],
+    ):
+        super().__init__()
+        self.reinit_atom_exclude(atom_exclude_types)
+        self.reinit_pair_exclude(pair_exclude_types)
+
+    def reinit_atom_exclude(
+        self,
+        exclude_types: List[int] = [],
+    ):
+        self.atom_exclude_types = exclude_types
+        if exclude_types == []:
+            self.atom_excl = None
+        else:
+            self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types)
+
+    def reinit_pair_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.pair_exclude_types = exclude_types
+        if exclude_types == []:
+            self.pair_excl = None
+        else:
+            self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types)
+
+    def atomic_output_def(self) -> FittingOutputDef:
+        old_def = self.fitting_output_def()
+        old_list = list(old_def.get_data().values())
+        return FittingOutputDef(
+            old_list  # noqa:RUF005
+            + [
+                OutputVariableDef(
+                    name="mask",
+                    shape=[1],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                )
+            ]
+        )
+
+    def forward_common_atomic(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        nlist: np.ndarray,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Common interface for atomic inference.
+
+        This method accept extended coordinates, extended atom typs, neighbor list,
+        and predict the atomic contribution of the fit property.
+
+        Parameters
+        ----------
+        extended_coord
+            extended coodinates, shape: nf x (nall x 3)
+        extended_atype
+            extended atom typs, shape: nf x nall
+            for a type < 0 indicating the atomic is virtual.
+        nlist
+            neighbor list, shape: nf x nloc x nsel
+        mapping
+            extended to local index mapping, shape: nf x nall
+        fparam
+            frame parameters, shape: nf x dim_fparam
+        aparam
+            atomic parameter, shape: nf x nloc x dim_aparam
+
+        Returns
+        -------
+        ret_dict
+            dict of output atomic properties.
+            should implement the definition of `fitting_output_def`.
+            ret_dict["mask"] of shape nf x nloc will be provided.
+            ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real.
+            ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual.
+
+        """
+        _, nloc, _ = nlist.shape
+        atype = extended_atype[:, :nloc]
+        if self.pair_excl is not None:
+            pair_mask = self.pair_excl.build_type_exclude_mask(nlist, extended_atype)
+            # exclude neighbors in the nlist
+            nlist = np.where(pair_mask == 1, nlist, -1)
+
+        ext_atom_mask = self.make_atom_mask(extended_atype)
+        ret_dict = self.forward_atomic(
+            extended_coord,
+            np.where(ext_atom_mask, extended_atype, 0),
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+        )
+
+        # nf x nloc
+        atom_mask = ext_atom_mask[:, :nloc].astype(np.int32)
+        if self.atom_excl is not None:
+            atom_mask *= self.atom_excl.build_type_exclude_mask(atype)
+
+        for kk in ret_dict.keys():
+            out_shape = ret_dict[kk].shape
+            ret_dict[kk] = (
+                ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
+                * atom_mask[:, :, None]
+            ).reshape(out_shape)
+        ret_dict["mask"] = atom_mask
+
+        return ret_dict
+
+    def serialize(self) -> dict:
+        return {
+            "atom_exclude_types": self.atom_exclude_types,
+            "pair_exclude_types": self.pair_exclude_types,
+        }
diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
new file mode 100644
index 0000000000..8a40f8d238
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.dpmodel.fitting.base_fitting import (
+    BaseFitting,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+
+@BaseAtomicModel.register("standard")
+class DPAtomicModel(BaseAtomicModel):
+    """Model give atomic prediction of some physical property.
+
+    Parameters
+    ----------
+    descriptor
+            Descriptor
+    fitting_net
+            Fitting net
+    type_map
+            Mapping atom type to the name (str) of the type.
+            For example `type_map[1]` gives the name of the type 1.
+
+    """
+
+    def __init__(
+        self,
+        descriptor,
+        fitting,
+        type_map: List[str],
+        **kwargs,
+    ):
+        self.type_map = type_map
+        self.descriptor = descriptor
+        self.fitting = fitting
+        self.type_map = type_map
+        super().__init__(**kwargs)
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of the fitting net."""
+        return self.fitting.output_def()
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.descriptor.get_rcut()
+
+    def get_sel(self) -> List[int]:
+        """Get the neighbor selection."""
+        return self.descriptor.get_sel()
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.descriptor.mixed_types()
+
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : np.ndarray
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.fitting["bias_atom_e"] = (
+            out_bias + self.fitting["bias_atom_e"] if add else out_bias
+        )
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the output bias of the atomic model."""
+        return self.fitting["bias_atom_e"]
+
+    def forward_atomic(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        nlist: np.ndarray,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Models' atomic predictions.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region
+        extended_atype
+            atomic type in extended region
+        nlist
+            neighbor list. nf x nloc x nsel
+        mapping
+            mapps the extended indices to local indices. nf x nall
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the `FittingOutputDef`.
+
+        """
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        descriptor, rot_mat, g2, h2, sw = self.descriptor(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        ret = self.fitting(
+            descriptor,
+            atype,
+            gr=rot_mat,
+            g2=g2,
+            h2=h2,
+            fparam=fparam,
+            aparam=aparam,
+        )
+        return ret
+
+    def serialize(self) -> dict:
+        dd = super().serialize()
+        dd.update(
+            {
+                "@class": "Model",
+                "type": "standard",
+                "@version": 1,
+                "type_map": self.type_map,
+                "descriptor": self.descriptor.serialize(),
+                "fitting": self.fitting.serialize(),
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        descriptor_obj = BaseDescriptor.deserialize(data.pop("descriptor"))
+        fitting_obj = BaseFitting.deserialize(data.pop("fitting"))
+        type_map = data.pop("type_map")
+        obj = cls(descriptor_obj, fitting_obj, type_map=type_map, **data)
+        return obj
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.fitting.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.fitting.get_dim_aparam()
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.fitting.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
new file mode 100644
index 0000000000..93a885f3ab
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -0,0 +1,441 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+    nlist_distinguish_types,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from ..output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+
+class LinearEnergyAtomicModel(BaseAtomicModel):
+    """Linear model make linear combinations of several existing models.
+
+    Parameters
+    ----------
+    models : list[DPAtomicModel or PairTabAtomicModel]
+        A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel.
+    type_map : list[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    """
+
+    def __init__(
+        self,
+        models: List[BaseAtomicModel],
+        type_map: List[str],
+        **kwargs,
+    ):
+        self.models = models
+        sub_model_type_maps = [md.get_type_map() for md in models]
+        err_msg = []
+        self.mapping_list = []
+        common_type_map = set(type_map)
+        self.type_map = type_map
+        for tpmp in sub_model_type_maps:
+            if not common_type_map.issubset(set(tpmp)):
+                err_msg.append(
+                    f"type_map {tpmp} is not a subset of type_map {type_map}"
+                )
+            self.mapping_list.append(self.remap_atype(tpmp, self.type_map))
+        assert len(err_msg) == 0, "\n".join(err_msg)
+        self.mixed_types_list = [model.mixed_types() for model in self.models]
+        super().__init__(**kwargs)
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return max(self.get_model_rcuts())
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        raise self.type_map
+
+    def get_model_rcuts(self) -> List[float]:
+        """Get the cut-off radius for each individual models."""
+        return [model.get_rcut() for model in self.models]
+
+    def get_sel(self) -> List[int]:
+        return [max([model.get_nsel() for model in self.models])]
+
+    def get_model_nsels(self) -> List[int]:
+        """Get the processed sels for each individual models. Not distinguishing types."""
+        return [model.get_nsel() for model in self.models]
+
+    def get_model_sels(self) -> List[Union[int, List[int]]]:
+        """Get the sels for each individual models."""
+        return [model.get_sel() for model in self.models]
+
+    def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]:
+        # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut.
+        zipped = sorted(
+            zip(self.get_model_rcuts(), self.get_model_nsels()),
+            key=lambda x: (x[1], x[0]),
+        )
+        return [p[0] for p in zipped], [p[1] for p in zipped]
+
+    def forward_atomic(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Return atomic prediction.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region, (nframes, nall * 3)
+        extended_atype
+            atomic type in extended region, (nframes, nall)
+        nlist
+            neighbor list, (nframes, nloc, nsel).
+        mapping
+            mapps the extended indices to local indices.
+        fparam
+            frame parameter. (nframes, ndf)
+        aparam
+            atomic parameter. (nframes, nloc, nda)
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the fitting net output def.
+        """
+        nframes, nloc, nnei = nlist.shape
+        extended_coord = extended_coord.reshape(nframes, -1, 3)
+        sorted_rcuts, sorted_sels = self._sort_rcuts_sels()
+        nlists = build_multiple_neighbor_list(
+            extended_coord,
+            nlist,
+            sorted_rcuts,
+            sorted_sels,
+        )
+        raw_nlists = [
+            nlists[get_multiple_nlist_key(rcut, sel)]
+            for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels())
+        ]
+        nlists_ = [
+            nl if mt else nlist_distinguish_types(nl, extended_atype, sel)
+            for mt, nl, sel in zip(
+                self.mixed_types_list, raw_nlists, self.get_model_sels()
+            )
+        ]
+        ener_list = []
+
+        for i, model in enumerate(self.models):
+            mapping = self.mapping_list[i]
+            ener_list.append(
+                model.forward_atomic(
+                    extended_coord,
+                    mapping[extended_atype],
+                    nlists_[i],
+                    mapping,
+                    fparam,
+                    aparam,
+                )["energy"]
+            )
+        self.weights = self._compute_weight(extended_coord, extended_atype, nlists_)
+        self.atomic_bias = None
+        if self.atomic_bias is not None:
+            raise NotImplementedError("Need to add bias in a future PR.")
+        else:
+            fit_ret = {
+                "energy": np.sum(np.stack(ener_list) * np.stack(self.weights), axis=0),
+            }  # (nframes, nloc, 1)
+        return fit_ret
+
+    @staticmethod
+    def remap_atype(ori_map: List[str], new_map: List[str]) -> np.ndarray:
+        """
+        This method is used to map the atype from the common type_map to the original type_map of
+        indivial AtomicModels.
+
+        Parameters
+        ----------
+        ori_map : List[str]
+            The original type map of an AtomicModel.
+        new_map : List[str]
+            The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method,
+            must be a subset of the ori_map.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)}
+        # this maps the atype in the new map to the original map
+        mapping = np.array([type_2_idx[new_map[idx]] for idx in range(len(new_map))])
+        return mapping
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Model",
+            "type": "linear",
+            "@version": 1,
+            "models": [model.serialize() for model in self.models],
+            "type_map": self.type_map,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        type_map = data.pop("type_map")
+        models = [
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
+        ]
+        data.pop("models")
+        return cls(models, type_map, **data)
+
+    def _compute_weight(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        nlists_: List[np.ndarray],
+    ) -> List[np.ndarray]:
+        """This should be a list of user defined weights that matches the number of models to be combined."""
+        nmodels = len(self.models)
+        return [np.ones(1) / nmodels for _ in range(nmodels)]
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        # tricky...
+        return max([model.get_dim_fparam() for model in self.models])
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return max([model.get_dim_aparam() for model in self.models])
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        if any(model.get_sel_type() == [] for model in self.models):
+            return []
+        # join all the selected types
+        return list(set().union(*[model.get_sel_type() for model in self.models]))
+
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for all the models in the linear atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        for model in self.models:
+            model.set_out_bias(out_bias, add=add)
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the weighted output bias of the linear atomic model."""
+        # TODO add get_out_bias for linear atomic model
+        raise NotImplementedError
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
+
+
+class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel):
+    """Model linearly combine a list of AtomicModels.
+
+    Parameters
+    ----------
+    dp_model
+        The DPAtomicModel being combined.
+    zbl_model
+        The PairTable model being combined.
+    sw_rmin
+        The lower boundary of the interpolation between short-range tabulated interaction and DP.
+    sw_rmax
+        The upper boundary of the interpolation between short-range tabulated interaction and DP.
+    type_map
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    smin_alpha
+        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor.
+        This distance is calculated by softmin.
+    """
+
+    def __init__(
+        self,
+        dp_model: DPAtomicModel,
+        zbl_model: PairTabAtomicModel,
+        sw_rmin: float,
+        sw_rmax: float,
+        type_map: List[str],
+        smin_alpha: Optional[float] = 0.1,
+        **kwargs,
+    ):
+        models = [dp_model, zbl_model]
+        super().__init__(models, type_map, **kwargs)
+        self.dp_model = dp_model
+        self.zbl_model = zbl_model
+
+        self.sw_rmin = sw_rmin
+        self.sw_rmax = sw_rmax
+        self.smin_alpha = smin_alpha
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "type": "zbl",
+                "@version": 2,
+                "models": LinearEnergyAtomicModel(
+                    models=[self.models[0], self.models[1]], type_map=self.type_map
+                ).serialize(),
+                "sw_rmin": self.sw_rmin,
+                "sw_rmax": self.sw_rmax,
+                "smin_alpha": self.smin_alpha,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        data.pop("@class")
+        data.pop("type")
+        sw_rmin = data.pop("sw_rmin")
+        sw_rmax = data.pop("sw_rmax")
+        smin_alpha = data.pop("smin_alpha")
+        linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models"))
+        dp_model, zbl_model = linear_model.models
+        type_map = linear_model.type_map
+
+        return cls(
+            dp_model=dp_model,
+            zbl_model=zbl_model,
+            sw_rmin=sw_rmin,
+            sw_rmax=sw_rmax,
+            type_map=type_map,
+            smin_alpha=smin_alpha,
+            **data,
+        )
+
+    def _compute_weight(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        nlists_: List[np.ndarray],
+    ) -> List[np.ndarray]:
+        """ZBL weight.
+
+        Returns
+        -------
+        List[np.ndarray]
+            the atomic ZBL weight for interpolation. (nframes, nloc, 1)
+        """
+        assert (
+            self.sw_rmax > self.sw_rmin
+        ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`."
+
+        dp_nlist = nlists_[0]
+        zbl_nlist = nlists_[1]
+
+        zbl_nnei = zbl_nlist.shape[-1]
+        dp_nnei = dp_nlist.shape[-1]
+
+        # use the larger rr based on nlist
+        nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist
+        masked_nlist = np.clip(nlist_larger, 0, None)
+        pairwise_rr = PairTabAtomicModel._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )
+
+        numerator = np.sum(
+            pairwise_rr * np.exp(-pairwise_rr / self.smin_alpha), axis=-1
+        )  # masked nnei will be zero, no need to handle
+        denominator = np.sum(
+            np.where(
+                nlist_larger != -1,
+                np.exp(-pairwise_rr / self.smin_alpha),
+                np.zeros_like(nlist_larger),
+            ),
+            axis=-1,
+        )  # handle masked nnei.
+        with np.errstate(divide="ignore", invalid="ignore"):
+            sigma = numerator / denominator
+        u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
+        coef = np.zeros_like(u)
+        left_mask = sigma < self.sw_rmin
+        mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax)
+        right_mask = sigma >= self.sw_rmax
+        coef[left_mask] = 1
+        with np.errstate(invalid="ignore"):
+            smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1
+        coef[mid_mask] = smooth[mid_mask]
+        coef[right_mask] = 0
+        self.zbl_weight = coef
+        return [1 - np.expand_dims(coef, -1), np.expand_dims(coef, -1)]
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
new file mode 100644
index 0000000000..3e02a5d076
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+
+def make_base_atomic_model(
+    t_tensor,
+    fwd_method_name: str = "forward_atomic",
+):
+    """Make the base class for the atomic model.
+
+    Parameters
+    ----------
+    t_tensor
+        The type of the tensor. used in the type hint.
+    fwd_method_name
+        Name of the forward method. For dpmodels, it should be "call".
+        For torch models, it should be "forward".
+
+    """
+
+    class BAM(ABC, PluginVariant, make_plugin_registry("atomic model")):
+        """Base Atomic Model provides the interfaces of an atomic model."""
+
+        @abstractmethod
+        def fitting_output_def(self) -> FittingOutputDef:
+            """Get the output def of developer implemented atomic models."""
+            pass
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model.
+
+            By default it is the same as FittingOutputDef, but it
+            allows model level wrapper of the output defined by the developer.
+
+            """
+            return self.fitting_output_def()
+
+        @abstractmethod
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            pass
+
+        @abstractmethod
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+            pass
+
+        def get_ntypes(self) -> int:
+            """Get the number of atom types."""
+            return len(self.get_type_map())
+
+        @abstractmethod
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected atoms for each type."""
+            pass
+
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return sum(self.get_sel())
+
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.get_nsel()
+
+        @abstractmethod
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+
+        @abstractmethod
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+
+        @abstractmethod
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+
+        @abstractmethod
+        def set_out_bias(self, out_bias: t_tensor, add=False) -> None:
+            """
+            Modify the output bias for the atomic model.
+
+            Parameters
+            ----------
+            out_bias : t_tensor
+                The new bias to be applied.
+            add : bool, optional
+                Whether to add the new bias to the existing one.
+                If False, the output bias will be directly replaced by the new bias.
+                If True, the new bias will be added to the existing one.
+            """
+
+        @abstractmethod
+        def get_out_bias(self) -> t_tensor:
+            """Return the output bias of the atomic model."""
+
+        @abstractmethod
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+
+        @abstractmethod
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            pass
+
+        @abstractmethod
+        def fwd(
+            self,
+            extended_coord: t_tensor,
+            extended_atype: t_tensor,
+            nlist: t_tensor,
+            mapping: Optional[t_tensor] = None,
+            fparam: Optional[t_tensor] = None,
+            aparam: Optional[t_tensor] = None,
+        ) -> Dict[str, t_tensor]:
+            pass
+
+        @abstractmethod
+        def serialize(self) -> dict:
+            pass
+
+        @classmethod
+        @abstractmethod
+        def deserialize(cls, data: dict):
+            pass
+
+        def make_atom_mask(
+            self,
+            atype: t_tensor,
+        ) -> t_tensor:
+            """The atoms with type < 0 are treated as virutal atoms,
+            which serves as place-holders for multi-frame calculations
+            with different number of atoms in different frames.
+
+            Parameters
+            ----------
+            atype
+                Atom types. >= 0 for real atoms <0 for virtual atoms.
+
+            Returns
+            -------
+            mask
+                True for real atoms and False for virutal atoms.
+
+            """
+            # supposed to be supported by all backends
+            return atype >= 0
+
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+
+            """
+            odef = self.fitting_output_def()
+            if var_name is None:
+                require: List[bool] = []
+                for vv in odef.keys():
+                    require.append(self.do_grad_(vv, "r"))
+                return any(require)
+            else:
+                return self.do_grad_(var_name, "r")
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+
+            """
+            odef = self.fitting_output_def()
+            if var_name is None:
+                require: List[bool] = []
+                for vv in odef.keys():
+                    require.append(self.do_grad_(vv, "c"))
+                return any(require)
+            else:
+                return self.do_grad_(var_name, "c")
+
+        def do_grad_(self, var_name: str, base: str) -> bool:
+            """Tell if the output variable `var_name` is differentiable."""
+            assert var_name is not None
+            assert base in ["c", "r"]
+            if base == "c":
+                return self.fitting_output_def()[var_name].c_differentiable
+            return self.fitting_output_def()[var_name].r_differentiable
+
+    setattr(BAM, fwd_method_name, BAM.fwd)
+    delattr(BAM, "fwd")
+
+    return BAM
diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
new file mode 100644
index 0000000000..30ab58928b
--- /dev/null
+++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
@@ -0,0 +1,406 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+
+@BaseAtomicModel.register("pairtab")
+class PairTabAtomicModel(BaseAtomicModel):
+    """Pairwise tabulation energy model.
+
+    This model can be used to tabulate the pairwise energy between atoms for either
+    short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not
+    be used alone, but rather as one submodel of a linear (sum) model, such as
+    DP+D3.
+
+    Do not put the model on the first model of a linear model, since the linear
+    model fetches the type map from the first model.
+
+    At this moment, the model does not smooth the energy at the cutoff radius, so
+    one needs to make sure the energy has been smoothed to zero.
+
+    Parameters
+    ----------
+    tab_file : str
+        The path to the tabulation file.
+    rcut : float
+        The cutoff radius.
+    sel : int or list[int]
+        The maxmum number of atoms in the cut-off radius.
+    type_map : list[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    """
+
+    def __init__(
+        self,
+        tab_file: str,
+        rcut: float,
+        sel: Union[int, List[int]],
+        type_map: List[str],
+        **kwargs,
+    ):
+        super().__init__()
+        self.tab_file = tab_file
+        self.rcut = rcut
+        self.type_map = type_map
+
+        self.tab = PairTab(self.tab_file, rcut=rcut)
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+
+        if self.tab_file is not None:
+            self.tab_info, self.tab_data = self.tab.get()
+            nspline, ntypes_tab = self.tab_info[-2:].astype(int)
+            self.tab_data = self.tab_data.reshape(ntypes_tab, ntypes_tab, nspline, 4)
+            if self.ntypes != ntypes_tab:
+                raise ValueError(
+                    "The `type_map` provided does not match the number of columns in the table."
+                )
+        else:
+            self.tab_info, self.tab_data = None, None
+
+        if isinstance(sel, int):
+            self.sel = sel
+        elif isinstance(sel, list):
+            self.sel = sum(sel)
+        else:
+            raise TypeError("sel must be int or list[int]")
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def get_rcut(self) -> float:
+        return self.rcut
+
+    def get_type_map(self) -> List[str]:
+        return self.type_map
+
+    def get_sel(self) -> List[int]:
+        return [self.sel]
+
+    def get_nsel(self) -> int:
+        return self.sel
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        # to match DPA1 and DPA2.
+        return True
+
+    def set_out_bias(self, out_bias: np.ndarray, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias
+
+    def get_out_bias(self) -> np.ndarray:
+        """Return the output bias of the atomic model."""
+        return self.bias_atom_e
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "type": "pairtab",
+                "@version": 1,
+                "tab": self.tab.serialize(),
+                "rcut": self.rcut,
+                "sel": self.sel,
+                "type_map": self.type_map,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "PairTabAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        rcut = data.pop("rcut")
+        sel = data.pop("sel")
+        type_map = data.pop("type_map")
+        tab = PairTab.deserialize(data.pop("tab"))
+        tab_model = cls(None, rcut, sel, type_map, **data)
+        tab_model.tab = tab
+        tab_model.tab_info = tab_model.tab.tab_info
+        nspline, ntypes = tab_model.tab_info[-2:].astype(int)
+        tab_model.tab_data = tab_model.tab.tab_data.reshape(ntypes, ntypes, nspline, 4)
+        return tab_model
+
+    def forward_atomic(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        nframes, nloc, nnei = nlist.shape
+        extended_coord = extended_coord.reshape(nframes, -1, 3)
+
+        # this will mask all -1 in the nlist
+        mask = nlist >= 0
+        masked_nlist = nlist * mask
+
+        atype = extended_atype[:, :nloc]  # (nframes, nloc)
+        pairwise_rr = self._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )  # (nframes, nloc, nnei)
+        self.tab_data = self.tab_data.reshape(
+            self.tab.ntypes, self.tab.ntypes, self.tab.nspline, 4
+        )
+
+        # (nframes, nloc, nnei)
+        j_type = extended_atype[
+            np.arange(extended_atype.shape[0])[:, None, None], masked_nlist
+        ]
+
+        raw_atomic_energy = self._pair_tabulated_inter(
+            nlist, atype, j_type, pairwise_rr
+        )
+        atomic_energy = 0.5 * np.sum(
+            np.where(nlist != -1, raw_atomic_energy, np.zeros_like(raw_atomic_energy)),
+            axis=-1,
+        ).reshape(nframes, nloc, 1)
+
+        return {"energy": atomic_energy}
+
+    def _pair_tabulated_inter(
+        self,
+        nlist: np.ndarray,
+        i_type: np.ndarray,
+        j_type: np.ndarray,
+        rr: np.ndarray,
+    ) -> np.ndarray:
+        """Pairwise tabulated energy.
+
+        Parameters
+        ----------
+        nlist : np.ndarray
+            The unmasked neighbour list. (nframes, nloc)
+        i_type : np.ndarray
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : np.ndarray
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        rr : np.ndarray
+            The salar distance vector between two atoms. (nframes, nloc, nnei)
+
+        Returns
+        -------
+        np.ndarray
+            The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+
+        Raises
+        ------
+        Exception
+            If the distance is beyond the table.
+
+        Notes
+        -----
+        This function is used to calculate the pairwise energy between two atoms.
+        It uses a table containing cubic spline coefficients calculated in PairTab.
+        """
+        nframes, nloc, nnei = nlist.shape
+        rmin = self.tab_info[0]
+        hh = self.tab_info[1]
+        hi = 1.0 / hh
+
+        nspline = int(self.tab_info[2] + 0.1)
+
+        uu = (rr - rmin) * hi  # this is broadcasted to (nframes,nloc,nnei)
+
+        # if nnei of atom 0 has -1 in the nlist, uu would be 0.
+        # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms.
+        uu = np.where(nlist != -1, uu, nspline + 1)
+
+        if np.any(uu < 0):
+            raise Exception("coord go beyond table lower boundary")
+
+        idx = uu.astype(int)
+
+        uu -= idx
+        table_coef = self._extract_spline_coefficient(
+            i_type, j_type, idx, self.tab_data, nspline
+        )
+        table_coef = table_coef.reshape(nframes, nloc, nnei, 4)
+        ener = self._calculate_ener(table_coef, uu)
+        # here we need to overwrite energy to zero at rcut and beyond.
+        mask_beyond_rcut = rr >= self.rcut
+        # also overwrite values beyond extrapolation to zero
+        extrapolation_mask = rr >= self.tab.rmin + nspline * self.tab.hh
+        ener[mask_beyond_rcut] = 0
+        ener[extrapolation_mask] = 0
+
+        return ener
+
+    @staticmethod
+    def _get_pairwise_dist(coords: np.ndarray, nlist: np.ndarray) -> np.ndarray:
+        """Get pairwise distance `dr`.
+
+        Parameters
+        ----------
+        coords : np.ndarray
+            The coordinate of the atoms, shape of (nframes, nall, 3).
+        nlist
+            The masked nlist, shape of (nframes, nloc, nnei).
+
+        Returns
+        -------
+        np.ndarray
+            The pairwise distance between the atoms (nframes, nloc, nnei).
+        """
+        batch_indices = np.arange(nlist.shape[0])[:, None, None]
+        neighbor_atoms = coords[batch_indices, nlist]
+        loc_atoms = coords[:, : nlist.shape[1], :]
+        pairwise_dr = loc_atoms[:, :, None, :] - neighbor_atoms
+        pairwise_rr = np.sqrt(np.sum(np.power(pairwise_dr, 2), axis=-1))
+
+        return pairwise_rr
+
+    @staticmethod
+    def _extract_spline_coefficient(
+        i_type: np.ndarray,
+        j_type: np.ndarray,
+        idx: np.ndarray,
+        tab_data: np.ndarray,
+        nspline: int,
+    ) -> np.ndarray:
+        """Extract the spline coefficient from the table.
+
+        Parameters
+        ----------
+        i_type : np.ndarray
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : np.ndarray
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        idx : np.ndarray
+            The index of the spline coefficient. (nframes, nloc, nnei)
+        tab_data : np.ndarray
+            The table storing all the spline coefficient. (ntype, ntype, nspline, 4)
+        nspline : int
+            The number of splines in the table.
+
+        Returns
+        -------
+        np.ndarray
+            The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed.
+        """
+        # (nframes, nloc, nnei)
+        expanded_i_type = np.broadcast_to(
+            i_type[:, :, np.newaxis],
+            (i_type.shape[0], i_type.shape[1], j_type.shape[-1]),
+        )
+
+        # (nframes, nloc, nnei, nspline, 4)
+        expanded_tab_data = tab_data[expanded_i_type, j_type]
+
+        # (nframes, nloc, nnei, 1, 4)
+        expanded_idx = np.broadcast_to(
+            idx[..., np.newaxis, np.newaxis], (*idx.shape, 1, 4)
+        )
+        clipped_indices = np.clip(expanded_idx, 0, nspline - 1).astype(int)
+
+        # (nframes, nloc, nnei, 4)
+        final_coef = np.squeeze(
+            np.take_along_axis(expanded_tab_data, clipped_indices, 3)
+        )
+
+        # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`.
+        final_coef[expanded_idx.squeeze() > nspline] = 0
+        return final_coef
+
+    @staticmethod
+    def _calculate_ener(coef: np.ndarray, uu: np.ndarray) -> np.ndarray:
+        """Calculate energy using spline coeeficients.
+
+        Parameters
+        ----------
+        coef : np.ndarray
+            The spline coefficients. (nframes, nloc, nnei, 4)
+        uu : np.ndarray
+            The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei)
+
+        Returns
+        -------
+        np.ndarray
+            The atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+        """
+        a3, a2, a1, a0 = coef[..., 0], coef[..., 1], coef[..., 2], coef[..., 3]
+        etmp = (a3 * uu + a2) * uu + a1  # this should be elementwise operations.
+        ener = etmp * uu + a0  # this energy has the extrapolated value when rcut > rmax
+        return ener
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return 0
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return 0
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return []
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/dpmodel/common.py b/deepmd/dpmodel/common.py
new file mode 100644
index 0000000000..8030432385
--- /dev/null
+++ b/deepmd/dpmodel/common.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+
+import ml_dtypes
+import numpy as np
+
+from deepmd.common import (
+    VALID_PRECISION,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+PRECISION_DICT = {
+    "float16": np.float16,
+    "float32": np.float32,
+    "float64": np.float64,
+    "half": np.float16,
+    "single": np.float32,
+    "double": np.float64,
+    "int32": np.int32,
+    "int64": np.int64,
+    "default": GLOBAL_NP_FLOAT_PRECISION,
+    # NumPy doesn't have bfloat16 (and does't plan to add)
+    # ml_dtypes is a solution, but it seems not supporting np.save/np.load
+    # hdf5 hasn't supported bfloat16 as well (see https://forum.hdfgroup.org/t/11975)
+    "bfloat16": ml_dtypes.bfloat16,
+}
+assert VALID_PRECISION.issubset(PRECISION_DICT.keys())
+
+RESERVED_PRECISON_DICT = {
+    np.float16: "float16",
+    np.float32: "float32",
+    np.float64: "float64",
+    np.int32: "int32",
+    np.int64: "int64",
+    ml_dtypes.bfloat16: "bfloat16",
+}
+assert set(RESERVED_PRECISON_DICT.keys()) == set(PRECISION_DICT.values())
+DEFAULT_PRECISION = "float64"
+
+
+class NativeOP(ABC):
+    """The unit operation of a native model."""
+
+    @abstractmethod
+    def call(self, *args, **kwargs):
+        """Forward pass in NumPy implementation."""
+        pass
+
+    def __call__(self, *args, **kwargs):
+        """Forward pass in NumPy implementation."""
+        return self.call(*args, **kwargs)
+
+
+__all__ = [
+    "GLOBAL_NP_FLOAT_PRECISION",
+    "GLOBAL_ENER_FLOAT_PRECISION",
+    "PRECISION_DICT",
+    "RESERVED_PRECISON_DICT",
+    "DEFAULT_PRECISION",
+    "NativeOP",
+]
diff --git a/deepmd/dpmodel/descriptor/__init__.py b/deepmd/dpmodel/descriptor/__init__.py
new file mode 100644
index 0000000000..a19a2aa034
--- /dev/null
+++ b/deepmd/dpmodel/descriptor/__init__.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .hybrid import (
+    DescrptHybrid,
+)
+from .make_base_descriptor import (
+    make_base_descriptor,
+)
+from .se_e2_a import (
+    DescrptSeA,
+)
+from .se_r import (
+    DescrptSeR,
+)
+
+__all__ = [
+    "DescrptSeA",
+    "DescrptSeR",
+    "DescrptHybrid",
+    "make_base_descriptor",
+]
diff --git a/deepmd/dpmodel/descriptor/base_descriptor.py b/deepmd/dpmodel/descriptor/base_descriptor.py
new file mode 100644
index 0000000000..7429d3f213
--- /dev/null
+++ b/deepmd/dpmodel/descriptor/base_descriptor.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import numpy as np
+
+from .make_base_descriptor import (
+    make_base_descriptor,
+)
+
+BaseDescriptor = make_base_descriptor(np.ndarray, "call")
diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
new file mode 100644
index 0000000000..96640d75c8
--- /dev/null
+++ b/deepmd/dpmodel/descriptor/hybrid.py
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
+from deepmd.dpmodel.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.dpmodel.utils.nlist import (
+    nlist_distinguish_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@BaseDescriptor.register("hybrid")
+class DescrptHybrid(BaseDescriptor, NativeOP):
+    """Concate a list of descriptors to form a new descriptor.
+
+    Parameters
+    ----------
+    list : list : List[Union[BaseDescriptor, Dict[str, Any]]]
+        Build a descriptor from the concatenation of the list of descriptors.
+        The descriptor can be either an object or a dictionary.
+    """
+
+    def __init__(
+        self,
+        list: List[Union[BaseDescriptor, Dict[str, Any]]],
+    ) -> None:
+        super().__init__()
+        # warning: list is conflict with built-in list
+        descrpt_list = list
+        if descrpt_list == [] or descrpt_list is None:
+            raise RuntimeError(
+                "cannot build descriptor from an empty list of descriptors."
+            )
+        formatted_descript_list = []
+        for ii in descrpt_list:
+            if isinstance(ii, BaseDescriptor):
+                formatted_descript_list.append(ii)
+            elif isinstance(ii, dict):
+                formatted_descript_list.append(BaseDescriptor(**ii))
+            else:
+                raise NotImplementedError
+        self.descrpt_list = formatted_descript_list
+        self.numb_descrpt = len(self.descrpt_list)
+        for ii in range(1, self.numb_descrpt):
+            assert (
+                self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
+            ), f"number of atom types in {ii}th descrptor {self.descrpt_list[0].__class__.__name__} does not match others"
+        # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
+        hybrid_sel = self.get_sel()
+        self.nlist_cut_idx: List[np.ndarray] = []
+        if self.mixed_types() and not all(
+            descrpt.mixed_types() for descrpt in self.descrpt_list
+        ):
+            self.sel_no_mixed_types = np.max(
+                [
+                    descrpt.get_sel()
+                    for descrpt in self.descrpt_list
+                    if not descrpt.mixed_types()
+                ],
+                axis=0,
+            ).tolist()
+        else:
+            self.sel_no_mixed_types = None
+        for ii in range(self.numb_descrpt):
+            if self.mixed_types() == self.descrpt_list[ii].mixed_types():
+                hybrid_sel = self.get_sel()
+            else:
+                assert self.sel_no_mixed_types is not None
+                hybrid_sel = self.sel_no_mixed_types
+            sub_sel = self.descrpt_list[ii].get_sel()
+            start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1]
+            end_idx = start_idx + np.array(sub_sel)
+            cut_idx = np.concatenate(
+                [range(ss, ee) for ss, ee in zip(start_idx, end_idx)]
+            )
+            self.nlist_cut_idx.append(cut_idx)
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return np.max([descrpt.get_rcut() for descrpt in self.descrpt_list]).item()
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        if self.mixed_types():
+            return [
+                np.max(
+                    [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0
+                ).item()
+            ]
+        else:
+            return np.max(
+                [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0
+            ).tolist()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.descrpt_list[0].get_ntypes()
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return np.sum([descrpt.get_dim_out() for descrpt in self.descrpt_list]).item()
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return np.sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list]).item()
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return any(descrpt.mixed_types() for descrpt in self.descrpt_list)
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        raise NotImplementedError
+
+    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        for descrpt in self.descrpt_list:
+            descrpt.compute_input_stats(merged, path)
+
+    def call(
+        self,
+        coord_ext,
+        atype_ext,
+        nlist,
+        mapping: Optional[np.ndarray] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3.
+        g2
+            The rotationally invariant pair-partical representation.
+        h2
+            The rotationally equivariant pair-partical representation.
+        sw
+            The smooth switch function.
+        """
+        out_descriptor = []
+        out_gr = []
+        out_g2 = None
+        out_h2 = None
+        out_sw = None
+        if self.sel_no_mixed_types is not None:
+            nl_distinguish_types = nlist_distinguish_types(
+                nlist,
+                atype_ext,
+                self.sel_no_mixed_types,
+            )
+        else:
+            nl_distinguish_types = None
+        for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx):
+            # cut the nlist to the correct length
+            if self.mixed_types() == descrpt.mixed_types():
+                nl = nlist[:, :, nci]
+            else:
+                # mixed_types is True, but descrpt.mixed_types is False
+                assert nl_distinguish_types is not None
+                nl = nl_distinguish_types[:, :, nci]
+            odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping)
+            out_descriptor.append(odescriptor)
+            if gr is not None:
+                out_gr.append(gr)
+
+        out_descriptor = np.concatenate(out_descriptor, axis=-1)
+        out_gr = np.concatenate(out_gr, axis=-2) if out_gr else None
+        return out_descriptor, out_gr, out_g2, out_h2, out_sw
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["list"] = [
+            BaseDescriptor.update_sel(global_jdata, sub_jdata)
+            for sub_jdata in local_jdata["list"]
+        ]
+        return local_jdata_cpy
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "hybrid",
+            "@version": 1,
+            "list": [descrpt.serialize() for descrpt in self.descrpt_list],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptHybrid":
+        data = data.copy()
+        class_name = data.pop("@class")
+        assert class_name == "Descriptor"
+        class_type = data.pop("type")
+        assert class_type == "hybrid"
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        obj = cls(
+            list=[BaseDescriptor.deserialize(ii) for ii in data["list"]],
+        )
+        return obj
diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
new file mode 100644
index 0000000000..940bd0cd27
--- /dev/null
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+
+def make_base_descriptor(
+    t_tensor,
+    fwd_method_name: str = "forward",
+):
+    """Make the base class for the descriptor.
+
+    Parameters
+    ----------
+    t_tensor
+        The type of the tensor. used in the type hint.
+    fwd_method_name
+        Name of the forward method. For dpmodels, it should be "call".
+        For torch models, it should be "forward".
+
+    """
+
+    class BD(ABC, PluginVariant, make_plugin_registry("descriptor")):
+        """Base descriptor provides the interfaces of descriptor."""
+
+        def __new__(cls, *args, **kwargs):
+            if cls is BD:
+                cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__))
+            return super().__new__(cls)
+
+        @abstractmethod
+        def get_rcut(self) -> float:
+            """Returns the cut-off radius."""
+            pass
+
+        @abstractmethod
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected neighboring atoms for each type."""
+            pass
+
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return sum(self.get_sel())
+
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.get_nsel()
+
+        @abstractmethod
+        def get_ntypes(self) -> int:
+            """Returns the number of element types."""
+            pass
+
+        @abstractmethod
+        def get_dim_out(self) -> int:
+            """Returns the output descriptor dimension."""
+            pass
+
+        @abstractmethod
+        def get_dim_emb(self) -> int:
+            """Returns the embedding dimension of g2."""
+            pass
+
+        @abstractmethod
+        def mixed_types(self) -> bool:
+            """Returns if the descriptor requires a neighbor list that distinguish different
+            atomic types or not.
+            """
+            pass
+
+        @abstractmethod
+        def share_params(self, base_class, shared_level, resume=False):
+            """
+            Share the parameters of self to the base_class with shared_level during multitask training.
+            If not start from checkpoint (resume is False),
+            some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+            """
+            pass
+
+        def compute_input_stats(
+            self,
+            merged: Union[Callable[[], List[dict]], List[dict]],
+            path: Optional[DPPath] = None,
+        ):
+            """Update mean and stddev for descriptor elements."""
+            raise NotImplementedError
+
+        @abstractmethod
+        def fwd(
+            self,
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping: Optional[t_tensor] = None,
+        ):
+            """Calculate descriptor."""
+            pass
+
+        @abstractmethod
+        def serialize(self) -> dict:
+            """Serialize the obj to dict."""
+            pass
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "BD":
+            """Deserialize the model.
+
+            Parameters
+            ----------
+            data : dict
+                The serialized data
+
+            Returns
+            -------
+            BD
+                The deserialized descriptor
+            """
+            if cls is BD:
+                return BD.get_class_by_type(data["type"]).deserialize(data)
+            raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+        @classmethod
+        @abstractmethod
+        def update_sel(cls, global_jdata: dict, local_jdata: dict):
+            """Update the selection and perform neighbor statistics.
+
+            Parameters
+            ----------
+            global_jdata : dict
+                The global data, containing the training section
+            local_jdata : dict
+                The local data refer to the current class
+            """
+            # call subprocess
+            cls = cls.get_class_by_type(j_get_type(local_jdata, cls.__name__))
+            return cls.update_sel(global_jdata, local_jdata)
+
+    setattr(BD, fwd_method_name, BD.fwd)
+    delattr(BD, "fwd")
+
+    return BD
diff --git a/deepmd_utils/model_format/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
similarity index 55%
rename from deepmd_utils/model_format/se_e2_a.py
rename to deepmd/dpmodel/descriptor/se_e2_a.py
index b9143ee360..8d926034dd 100644
--- a/deepmd_utils/model_format/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -1,8 +1,23 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+
 import numpy as np
 
+from deepmd.dpmodel.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
 try:
-    from deepmd_utils._version import version as __version__
+    from deepmd._version import version as __version__
 except ImportError:
     __version__ = "unknown"
 
@@ -11,22 +26,29 @@
     Any,
     List,
     Optional,
+    Tuple,
 )
 
-from .common import (
+from deepmd.dpmodel import (
     DEFAULT_PRECISION,
+    PRECISION_DICT,
     NativeOP,
 )
-from .env_mat import (
-    EnvMat,
-)
-from .network import (
+from deepmd.dpmodel.utils import (
     EmbeddingNet,
+    EnvMat,
     NetworkCollection,
+    PairExcludeMask,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
 )
 
 
-class DescrptSeA(NativeOP):
+@BaseDescriptor.register("se_e2_a")
+@BaseDescriptor.register("se_a")
+class DescrptSeA(NativeOP, BaseDescriptor):
     r"""DeepPot-SE constructed from all information (both angular and radial) of
     atomic configurations. The embedding takes the distance between atoms as input.
 
@@ -65,7 +87,7 @@ class DescrptSeA(NativeOP):
 
     :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
     :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
-    :meth:`deepmd.utils.network.embedding_net`.
+    :meth:`deepmd.tf.utils.network.embedding_net`.
 
     Parameters
     ----------
@@ -73,7 +95,7 @@ class DescrptSeA(NativeOP):
             The cut-off radius :math:`r_c`
     rcut_smth
             From where the environment matrix should be smoothed :math:`r_s`
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
@@ -89,6 +111,8 @@ class DescrptSeA(NativeOP):
     exclude_types : List[List[int]]
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    env_protection: float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
     set_davg_zero
             Set the shift of embedding net input to zero.
     activation_function
@@ -120,23 +144,22 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
         trainable: bool = True,
         type_one_side: bool = True,
         exclude_types: List[List[int]] = [],
+        env_protection: float = 0.0,
         set_davg_zero: bool = False,
         activation_function: str = "tanh",
         precision: str = DEFAULT_PRECISION,
         spin: Optional[Any] = None,
+        # consistent with argcheck, not used though
+        seed: Optional[int] = None,
     ) -> None:
         ## seed, uniform_seed, multi_task, not included.
-        if not type_one_side:
-            raise NotImplementedError("type_one_side == False not implemented")
-        if exclude_types != []:
-            raise NotImplementedError("exclude_types is not implemented")
         if spin is not None:
             raise NotImplementedError("spin is not implemented")
 
@@ -149,11 +172,13 @@ def __init__(
         self.resnet_dt = resnet_dt
         self.trainable = trainable
         self.type_one_side = type_one_side
-        self.exclude_types = exclude_types
+        self.env_protection = env_protection
         self.set_davg_zero = set_davg_zero
         self.activation_function = activation_function
         self.precision = precision
         self.spin = spin
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
 
         in_dim = 1  # not considiering type embedding
         self.embeddings = NetworkCollection(
@@ -161,19 +186,24 @@ def __init__(
             ndim=(1 if self.type_one_side else 2),
             network_type="embedding_network",
         )
-        for ii in range(self.ntypes):
-            self.embeddings[(ii,)] = EmbeddingNet(
+        for embedding_idx in itertools.product(
+            range(self.ntypes), repeat=self.embeddings.ndim
+        ):
+            self.embeddings[embedding_idx] = EmbeddingNet(
                 in_dim,
                 self.neuron,
                 self.activation_function,
                 self.resnet_dt,
                 self.precision,
             )
-        self.env_mat = EnvMat(self.rcut, self.rcut_smth)
+        self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection)
         self.nnei = np.sum(self.sel)
-        self.nneix4 = self.nnei * 4
-        self.davg = np.zeros([self.ntypes, self.nneix4])
-        self.dstd = np.ones([self.ntypes, self.nneix4])
+        self.davg = np.zeros(
+            [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision]
+        )
+        self.dstd = np.ones(
+            [self.ntypes, self.nnei, 4], dtype=PRECISION_DICT[self.precision]
+        )
         self.orig_sel = self.sel
 
     def __setitem__(self, key, value):
@@ -192,22 +222,73 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.get_dim_out()
+
+    def get_dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.neuron[-1] * self.axis_neuron
+
+    def get_dim_emb(self):
+        """Returns the embedding (g2) dimension of this descriptor."""
+        return self.neuron[-1]
+
+    def get_rcut(self):
+        """Returns cutoff radius."""
+        return self.rcut
+
+    def get_sel(self):
+        """Returns cutoff radius."""
+        return self.sel
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return False
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        raise NotImplementedError
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        raise NotImplementedError
+
     def cal_g(
         self,
         ss,
-        ll,
+        embedding_idx,
     ):
-        nf, nloc, nnei = ss.shape[0:3]
-        ss = ss.reshape(nf, nloc, nnei, 1)
-        # nf x nloc x nnei x ng
-        gg = self.embeddings[(ll,)].call(ss)
+        nf_times_nloc, nnei = ss.shape[0:2]
+        ss = ss.reshape(nf_times_nloc, nnei, 1)
+        # (nf x nloc) x nnei x ng
+        gg = self.embeddings[embedding_idx].call(ss)
         return gg
 
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
     def call(
         self,
         coord_ext,
         atype_ext,
         nlist,
+        mapping: Optional[np.ndarray] = None,
     ):
         """Compute the descriptor.
 
@@ -219,35 +300,79 @@ def call(
             The extended aotm types. shape: nf x nall
         nlist
             The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping from extended to lcoal region. not used by this descriptor.
 
         Returns
         -------
         descriptor
-            The descriptor. shape: nf x nloc x ng x axis_neuron
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
         """
+        del mapping
         # nf x nloc x nnei x 4
         rr, ww = self.env_mat.call(coord_ext, atype_ext, nlist, self.davg, self.dstd)
         nf, nloc, nnei, _ = rr.shape
         sec = np.append([0], np.cumsum(self.sel))
 
         ng = self.neuron[-1]
-        gr = np.zeros([nf, nloc, ng, 4])
-        for tt in range(self.ntypes):
-            tr = rr[:, :, sec[tt] : sec[tt + 1], :]
+        gr = np.zeros([nf * nloc, ng, 4], dtype=PRECISION_DICT[self.precision])
+        exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
+        # merge nf and nloc axis, so for type_one_side == False,
+        # we don't require atype is the same in all frames
+        exclude_mask = exclude_mask.reshape(nf * nloc, nnei)
+        rr = rr.reshape(nf * nloc, nnei, 4)
+
+        for embedding_idx in itertools.product(
+            range(self.ntypes), repeat=self.embeddings.ndim
+        ):
+            if self.type_one_side:
+                (tt,) = embedding_idx
+                ti_mask = np.s_[:]
+            else:
+                ti, tt = embedding_idx
+                ti_mask = atype_ext[:, :nloc].ravel() == ti
+            mm = exclude_mask[ti_mask, sec[tt] : sec[tt + 1]]
+            tr = rr[ti_mask, sec[tt] : sec[tt + 1], :]
+            tr = tr * mm[:, :, None]
             ss = tr[..., 0:1]
-            gg = self.cal_g(ss, tt)
-            # nf x nloc x ng x 4
-            gr += np.einsum("flni,flnj->flij", gg, tr)
+            gg = self.cal_g(ss, embedding_idx)
+            gr_tmp = np.einsum("lni,lnj->lij", gg, tr)
+            gr[ti_mask] += gr_tmp
+        gr = gr.reshape(nf, nloc, ng, 4)
+        # nf x nloc x ng x 4
         gr /= self.nnei
         gr1 = gr[:, :, : self.axis_neuron, :]
         # nf x nloc x ng x ng1
         grrg = np.einsum("flid,fljd->flij", gr, gr1)
         # nf x nloc x (ng x ng1)
-        grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron)
-        return grrg
+        grrg = grrg.reshape(nf, nloc, ng * self.axis_neuron).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        return grrg, gr[..., 1:], None, None, ww
 
     def serialize(self) -> dict:
+        """Serialize the descriptor to dict."""
+        if not self.type_one_side and self.exclude_types:
+            for embedding_idx in itertools.product(range(self.ntypes), repeat=2):
+                # not actually used; to match serilization data from TF to pass the test
+                if embedding_idx in self.emask:
+                    self.embeddings[embedding_idx].clear()
+
         return {
+            "@class": "Descriptor",
+            "type": "se_e2_a",
+            "@version": 1,
             "rcut": self.rcut,
             "rcut_smth": self.rcut_smth,
             "sel": self.sel,
@@ -257,9 +382,11 @@ def serialize(self) -> dict:
             "trainable": self.trainable,
             "type_one_side": self.type_one_side,
             "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
             "set_davg_zero": self.set_davg_zero,
             "activation_function": self.activation_function,
-            "precision": self.precision,
+            # make deterministic
+            "precision": np.dtype(PRECISION_DICT[self.precision]).name,
             "spin": self.spin,
             "env_mat": self.env_mat.serialize(),
             "embeddings": self.embeddings.serialize(),
@@ -271,7 +398,11 @@ def serialize(self) -> dict:
 
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptSeA":
+        """Deserialize from dict."""
         data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
         variables = data.pop("@variables")
         embeddings = data.pop("embeddings")
         env_mat = data.pop("env_mat")
@@ -280,5 +411,18 @@ def deserialize(cls, data: dict) -> "DescrptSeA":
         obj["davg"] = variables["davg"]
         obj["dstd"] = variables["dstd"]
         obj.embeddings = NetworkCollection.deserialize(embeddings)
-        obj.env_mat = EnvMat.deserialize(env_mat)
         return obj
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False)
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
new file mode 100644
index 0000000000..9c9b4e096e
--- /dev/null
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -0,0 +1,353 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+from deepmd.dpmodel.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from deepmd._version import version as __version__
+except ImportError:
+    __version__ = "unknown"
+
+import copy
+from typing import (
+    Any,
+    List,
+    Optional,
+)
+
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+    NativeOP,
+)
+from deepmd.dpmodel.utils import (
+    EmbeddingNet,
+    EnvMat,
+    NetworkCollection,
+    PairExcludeMask,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e2_r")
+@BaseDescriptor.register("se_r")
+class DescrptSeR(NativeOP, BaseDescriptor):
+    r"""DeepPot-SE_R constructed from only the radial imformation of atomic configurations.
+
+
+    Parameters
+    ----------
+    rcut
+            The cut-off radius :math:`r_c`
+    rcut_smth
+            From where the environment matrix should be smoothed :math:`r_s`
+    sel : list[int]
+            sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    trainable
+            If the weights of embedding net are trainable.
+    type_one_side
+            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
+    exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    set_davg_zero
+            Set the shift of embedding net input to zero.
+    activation_function
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    multi_task
+            If the model has multi fitting nets to train.
+    spin
+            The deepspin object.
+
+    Limitations
+    -----------
+    The currently implementation does not support the following features
+
+    1. type_one_side == False
+    2. exclude_types != []
+    3. spin is not None
+
+    References
+    ----------
+    .. [1] Linfeng Zhang, Jiequn Han, Han Wang, Wissam A. Saidi, Roberto Car, and E. Weinan. 2018.
+       End-to-end symmetry preserving inter-atomic potential energy model for finite and extended
+       systems. In Proceedings of the 32nd International Conference on Neural Information Processing
+       Systems (NIPS'18). Curran Associates Inc., Red Hook, NY, USA, 4441-4451.
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[int],
+        neuron: List[int] = [24, 48, 96],
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        env_protection: float = 0.0,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        spin: Optional[Any] = None,
+        # consistent with argcheck, not used though
+        seed: Optional[int] = None,
+    ) -> None:
+        ## seed, uniform_seed, multi_task, not included.
+        if not type_one_side:
+            raise NotImplementedError("type_one_side == False not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.sel = sel
+        self.ntypes = len(self.sel)
+        self.neuron = neuron
+        self.resnet_dt = resnet_dt
+        self.trainable = trainable
+        self.type_one_side = type_one_side
+        self.exclude_types = exclude_types
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.spin = spin
+        self.emask = PairExcludeMask(self.ntypes, self.exclude_types)
+        self.env_protection = env_protection
+
+        in_dim = 1  # not considiering type embedding
+        self.embeddings = NetworkCollection(
+            ntypes=self.ntypes,
+            ndim=(1 if self.type_one_side else 2),
+            network_type="embedding_network",
+        )
+        if not self.type_one_side:
+            raise NotImplementedError("type_one_side == False not implemented")
+        for ii in range(self.ntypes):
+            self.embeddings[(ii,)] = EmbeddingNet(
+                in_dim,
+                self.neuron,
+                self.activation_function,
+                self.resnet_dt,
+                self.precision,
+            )
+        self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection)
+        self.nnei = np.sum(self.sel)
+        self.davg = np.zeros(
+            [self.ntypes, self.nnei, 1], dtype=PRECISION_DICT[self.precision]
+        )
+        self.dstd = np.ones(
+            [self.ntypes, self.nnei, 1], dtype=PRECISION_DICT[self.precision]
+        )
+        self.orig_sel = self.sel
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.davg = value
+        elif key in ("std", "data_std", "dstd"):
+            self.dstd = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.davg
+        elif key in ("std", "data_std", "dstd"):
+            return self.dstd
+        else:
+            raise KeyError(key)
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.get_dim_out()
+
+    def get_dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.neuron[-1]
+
+    def get_dim_emb(self):
+        """Returns the embedding (g2) dimension of this descriptor."""
+        raise NotImplementedError
+
+    def get_rcut(self):
+        """Returns cutoff radius."""
+        return self.rcut
+
+    def get_sel(self):
+        """Returns cutoff radius."""
+        return self.sel
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return False
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        raise NotImplementedError
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        raise NotImplementedError
+
+    def cal_g(
+        self,
+        ss,
+        ll,
+    ):
+        nf, nloc, nnei = ss.shape[0:3]
+        ss = ss.reshape(nf, nloc, nnei, 1)
+        # nf x nloc x nnei x ng
+        gg = self.embeddings[(ll,)].call(ss)
+        return gg
+
+    def call(
+        self,
+        coord_ext,
+        atype_ext,
+        nlist,
+        mapping: Optional[np.ndarray] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping from extended to lcoal region. not used by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+        """
+        del mapping
+        # nf x nloc x nnei x 1
+        rr, ww = self.env_mat.call(
+            coord_ext, atype_ext, nlist, self.davg, self.dstd, True
+        )
+        nf, nloc, nnei, _ = rr.shape
+        sec = np.append([0], np.cumsum(self.sel))
+
+        ng = self.neuron[-1]
+        xyz_scatter = np.zeros([nf, nloc, ng], dtype=PRECISION_DICT[self.precision])
+        exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext)
+        for tt in range(self.ntypes):
+            mm = exclude_mask[:, :, sec[tt] : sec[tt + 1]]
+            tr = rr[:, :, sec[tt] : sec[tt + 1], :]
+            tr = tr * mm[:, :, :, None]
+            gg = self.cal_g(tr, tt)
+            gg = np.mean(gg, axis=2)
+            # nf x nloc x ng x 1
+            xyz_scatter += gg * (self.sel[tt] / self.nnei)
+
+        res_rescale = 1.0 / 5.0
+        res = xyz_scatter * res_rescale
+        res = res.reshape(nf, nloc, -1).astype(GLOBAL_NP_FLOAT_PRECISION)
+        return res, None, None, None, ww
+
+    def serialize(self) -> dict:
+        """Serialize the descriptor to dict."""
+        return {
+            "@class": "Descriptor",
+            "type": "se_r",
+            "@version": 1,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "trainable": self.trainable,
+            "type_one_side": self.type_one_side,
+            "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function,
+            # make deterministic
+            "precision": np.dtype(PRECISION_DICT[self.precision]).name,
+            "spin": self.spin,
+            "env_mat": self.env_mat.serialize(),
+            "embeddings": self.embeddings.serialize(),
+            "@variables": {
+                "davg": self.davg,
+                "dstd": self.dstd,
+            },
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeR":
+        """Deserialize from dict."""
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        obj["davg"] = variables["davg"]
+        obj["dstd"] = variables["dstd"]
+        obj.embeddings = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False)
diff --git a/deepmd/dpmodel/fitting/__init__.py b/deepmd/dpmodel/fitting/__init__.py
new file mode 100644
index 0000000000..866a710a3b
--- /dev/null
+++ b/deepmd/dpmodel/fitting/__init__.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .dipole_fitting import (
+    DipoleFitting,
+)
+from .dos_fitting import (
+    DOSFittingNet,
+)
+from .ener_fitting import (
+    EnergyFittingNet,
+)
+from .invar_fitting import (
+    InvarFitting,
+)
+from .make_base_fitting import (
+    make_base_fitting,
+)
+from .polarizability_fitting import (
+    PolarFitting,
+)
+
+__all__ = [
+    "InvarFitting",
+    "make_base_fitting",
+    "DipoleFitting",
+    "EnergyFittingNet",
+    "PolarFitting",
+    "DOSFittingNet",
+]
diff --git a/deepmd/dpmodel/fitting/base_fitting.py b/deepmd/dpmodel/fitting/base_fitting.py
new file mode 100644
index 0000000000..bb1853a4a0
--- /dev/null
+++ b/deepmd/dpmodel/fitting/base_fitting.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+from .make_base_fitting import (
+    make_base_fitting,
+)
+
+BaseFitting = make_base_fitting(np.ndarray, fwd_method_name="call")
diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
new file mode 100644
index 0000000000..6d6324770c
--- /dev/null
+++ b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -0,0 +1,224 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.fitting.base_fitting import (
+    BaseFitting,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .general_fitting import (
+    GeneralFitting,
+)
+
+
+@BaseFitting.register("dipole")
+@fitting_check_output
+class DipoleFitting(GeneralFitting):
+    r"""Fitting rotationally equivariant diploe of the system.
+
+    Parameters
+    ----------
+    var_name
+            The name of the output variable.
+    ntypes
+            The number of atom types.
+    dim_descrpt
+            The dimension of the input descriptor.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron
+            Number of neurons :math:`N` in each hidden layer of the fitting net
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            :math:`y = x + dt * \phi (Wx + b)`
+    numb_fparam
+            Number of frame parameter
+    numb_aparam
+            Number of atomic parameter
+    rcond
+            The condition number for the regression of atomic energy.
+    tot_ener_zero
+            Force the total energy to zero. Useful for the charge fitting.
+    trainable
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
+            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
+    activation_function
+            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
+    use_aparam_as_mask: bool, optional
+            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
+            And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types
+            If true, use a uniform fitting net for all atom types, otherwise use
+            different fitting nets for different atom types.
+    exclude_types
+            Atomic contributions of the excluded atom types are set zero.
+    r_differentiable
+            If the variable is differentiated with respect to coordinates of atoms.
+            Only reduciable variable are differentiable.
+    c_differentiable
+            If the variable is differentiated with respect to the cell tensor (pbc case).
+            Only reduciable variable are differentiable.
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        mixed_types: bool = False,
+        exclude_types: List[int] = [],
+        r_differentiable: bool = True,
+        c_differentiable: bool = True,
+        old_impl=False,
+        # not used
+        seed: Optional[int] = None,
+    ):
+        # seed, uniform_seed are not included
+        if tot_ener_zero:
+            raise NotImplementedError("tot_ener_zero is not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if layer_name is not None:
+            raise NotImplementedError("layer_name is not implemented")
+
+        self.embedding_width = embedding_width
+        self.r_differentiable = r_differentiable
+        self.c_differentiable = c_differentiable
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            tot_ener_zero=tot_ener_zero,
+            trainable=trainable,
+            activation_function=activation_function,
+            precision=precision,
+            layer_name=layer_name,
+            use_aparam_as_mask=use_aparam_as_mask,
+            spin=spin,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+        )
+        self.old_impl = False
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.embedding_width
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "dipole"
+        data["embedding_width"] = self.embedding_width
+        data["old_impl"] = self.old_impl
+        data["r_differentiable"] = self.r_differentiable
+        data["c_differentiable"] = self.c_differentiable
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [3],
+                    reduciable=True,
+                    r_differentiable=self.r_differentiable,
+                    c_differentiable=self.c_differentiable,
+                ),
+            ]
+        )
+
+    def call(
+        self,
+        descriptor: np.ndarray,
+        atype: np.ndarray,
+        gr: Optional[np.ndarray] = None,
+        g2: Optional[np.ndarray] = None,
+        h2: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Calculate the fitting.
+
+        Parameters
+        ----------
+        descriptor
+            input descriptor. shape: nf x nloc x nd
+        atype
+            the atom type. shape: nf x nloc
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        fparam
+            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
+        aparam
+            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
+
+        """
+        nframes, nloc, _ = descriptor.shape
+        assert gr is not None, "Must provide the rotation matrix for dipole fitting."
+        # (nframes, nloc, m1)
+        out = self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        # (nframes * nloc, 1, m1)
+        out = out.reshape(-1, 1, self.embedding_width)
+        # (nframes * nloc, m1, 3)
+        gr = gr.reshape(nframes * nloc, -1, 3)
+        # (nframes, nloc, 3)
+        out = np.einsum("bim,bmj->bij", out, gr).squeeze(-2).reshape(nframes, nloc, 3)
+        return {self.var_name: out}
diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py
new file mode 100644
index 0000000000..7c86d392b0
--- /dev/null
+++ b/deepmd/dpmodel/fitting/dos_fitting.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.fitting.invar_fitting import (
+    InvarFitting,
+)
+
+if TYPE_CHECKING:
+    from deepmd.dpmodel.fitting.general_fitting import (
+        GeneralFitting,
+    )
+
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@InvarFitting.register("dos")
+class DOSFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        numb_dos: int = 300,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        bias_dos: Optional[np.ndarray] = None,
+        rcond: Optional[float] = None,
+        trainable: Union[bool, List[bool]] = True,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = False,
+        exclude_types: List[int] = [],
+        # not used
+        seed: Optional[int] = None,
+    ):
+        if bias_dos is not None:
+            self.bias_dos = bias_dos
+        else:
+            self.bias_dos = np.zeros((ntypes, numb_dos), dtype=DEFAULT_PRECISION)
+        super().__init__(
+            var_name="dos",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=numb_dos,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            bias_atom=bias_dos,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            trainable=trainable,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data["numb_dos"] = data.pop("dim_out")
+        data.pop("tot_ener_zero", None)
+        data.pop("var_name", None)
+        data.pop("layer_name", None)
+        data.pop("use_aparam_as_mask", None)
+        data.pop("spin", None)
+        data.pop("atom_ener", None)
+        return super().deserialize(data)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        dd = {
+            **super().serialize(),
+            "type": "dos",
+        }
+        dd["@variables"]["bias_atom_e"] = self.bias_atom_e
+
+        return dd
diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py
new file mode 100644
index 0000000000..7f83f1e886
--- /dev/null
+++ b/deepmd/dpmodel/fitting/ener_fitting.py
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    List,
+    Optional,
+)
+
+from deepmd.dpmodel.common import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.fitting.invar_fitting import (
+    InvarFitting,
+)
+
+if TYPE_CHECKING:
+    from deepmd.dpmodel.fitting.general_fitting import (
+        GeneralFitting,
+    )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@InvarFitting.register("ener")
+class EnergyFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        atom_ener: Optional[List[float]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        mixed_types: bool = False,
+        exclude_types: List[int] = [],
+        # not used
+        seed: Optional[int] = None,
+    ):
+        super().__init__(
+            var_name="energy",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=1,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            tot_ener_zero=tot_ener_zero,
+            trainable=trainable,
+            atom_ener=atom_ener,
+            activation_function=activation_function,
+            precision=precision,
+            layer_name=layer_name,
+            use_aparam_as_mask=use_aparam_as_mask,
+            spin=spin,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("var_name")
+        data.pop("dim_out")
+        return super().deserialize(data)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            **super().serialize(),
+            "type": "ener",
+        }
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
new file mode 100644
index 0000000000..5681f5bf0c
--- /dev/null
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -0,0 +1,388 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from abc import (
+    abstractmethod,
+)
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+    NativeOP,
+)
+from deepmd.dpmodel.utils import (
+    AtomExcludeMask,
+    FittingNet,
+    NetworkCollection,
+)
+
+from .base_fitting import (
+    BaseFitting,
+)
+
+
+class GeneralFitting(NativeOP, BaseFitting):
+    r"""General fitting class.
+
+    Parameters
+    ----------
+    var_name
+            The name of the output variable.
+    ntypes
+            The number of atom types.
+    dim_descrpt
+            The dimension of the input descriptor.
+    neuron
+            Number of neurons :math:`N` in each hidden layer of the fitting net
+    bias_atom_e
+            Average enery per atom for each element.
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            :math:`y = x + dt * \phi (Wx + b)`
+    numb_fparam
+            Number of frame parameter
+    numb_aparam
+            Number of atomic parameter
+    rcond
+            The condition number for the regression of atomic energy.
+    tot_ener_zero
+            Force the total energy to zero. Useful for the charge fitting.
+    trainable
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
+            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
+    activation_function
+            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
+    use_aparam_as_mask: bool, optional
+            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
+            And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types
+            If true, use a uniform fitting net for all atom types, otherwise use
+            different fitting nets for different atom types.
+    exclude_types: List[int]
+            Atomic contributions of the excluded atom types are set zero.
+    remove_vaccum_contribution: List[bool], optional
+        Remove vaccum contribution before the bias is added. The list assigned each
+        type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
+        length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        bias_atom_e: Optional[np.ndarray] = None,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        mixed_types: bool = True,
+        exclude_types: List[int] = [],
+        remove_vaccum_contribution: Optional[List[bool]] = None,
+    ):
+        self.var_name = var_name
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.neuron = neuron
+        self.resnet_dt = resnet_dt
+        self.numb_fparam = numb_fparam
+        self.numb_aparam = numb_aparam
+        self.rcond = rcond
+        self.tot_ener_zero = tot_ener_zero
+        self.trainable = trainable
+        if self.trainable is None:
+            self.trainable = [True for ii in range(len(self.neuron) + 1)]
+        if isinstance(self.trainable, bool):
+            self.trainable = [self.trainable] * (len(self.neuron) + 1)
+        self.activation_function = activation_function
+        self.precision = precision
+        self.layer_name = layer_name
+        self.use_aparam_as_mask = use_aparam_as_mask
+        self.spin = spin
+        self.mixed_types = mixed_types
+        # order matters, should be place after the assignment of ntypes
+        self.reinit_exclude(exclude_types)
+        if self.spin is not None:
+            raise NotImplementedError("spin is not supported")
+        self.remove_vaccum_contribution = remove_vaccum_contribution
+
+        net_dim_out = self._net_out_dim()
+        # init constants
+        if bias_atom_e is None:
+            self.bias_atom_e = np.zeros([self.ntypes, net_dim_out])
+        else:
+            assert bias_atom_e.shape == (self.ntypes, net_dim_out)
+            self.bias_atom_e = bias_atom_e
+        if self.numb_fparam > 0:
+            self.fparam_avg = np.zeros(self.numb_fparam)
+            self.fparam_inv_std = np.ones(self.numb_fparam)
+        else:
+            self.fparam_avg, self.fparam_inv_std = None, None
+        if self.numb_aparam > 0:
+            self.aparam_avg = np.zeros(self.numb_aparam)
+            self.aparam_inv_std = np.ones(self.numb_aparam)
+        else:
+            self.aparam_avg, self.aparam_inv_std = None, None
+        # init networks
+        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+        self.nets = NetworkCollection(
+            1 if not self.mixed_types else 0,
+            self.ntypes,
+            network_type="fitting_network",
+            networks=[
+                FittingNet(
+                    in_dim,
+                    net_dim_out,
+                    self.neuron,
+                    self.activation_function,
+                    self.resnet_dt,
+                    self.precision,
+                    bias_out=True,
+                )
+                for ii in range(self.ntypes if not self.mixed_types else 1)
+            ],
+        )
+
+    @abstractmethod
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        pass
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.numb_fparam
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.numb_aparam
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return [ii for ii in range(self.ntypes) if ii not in self.exclude_types]
+
+    def __setitem__(self, key, value):
+        if key in ["bias_atom_e"]:
+            self.bias_atom_e = value
+        elif key in ["fparam_avg"]:
+            self.fparam_avg = value
+        elif key in ["fparam_inv_std"]:
+            self.fparam_inv_std = value
+        elif key in ["aparam_avg"]:
+            self.aparam_avg = value
+        elif key in ["aparam_inv_std"]:
+            self.aparam_inv_std = value
+        elif key in ["scale"]:
+            self.scale = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["bias_atom_e"]:
+            return self.bias_atom_e
+        elif key in ["fparam_avg"]:
+            return self.fparam_avg
+        elif key in ["fparam_inv_std"]:
+            return self.fparam_inv_std
+        elif key in ["aparam_avg"]:
+            return self.aparam_avg
+        elif key in ["aparam_inv_std"]:
+            return self.aparam_inv_std
+        elif key in ["scale"]:
+            return self.scale
+        else:
+            raise KeyError(key)
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[int] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = AtomExcludeMask(self.ntypes, self.exclude_types)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            "@class": "Fitting",
+            "@version": 1,
+            "var_name": self.var_name,
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "rcond": self.rcond,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "mixed_types": self.mixed_types,
+            "exclude_types": self.exclude_types,
+            "nets": self.nets.serialize(),
+            "@variables": {
+                "bias_atom_e": self.bias_atom_e,
+                "fparam_avg": self.fparam_avg,
+                "fparam_inv_std": self.fparam_inv_std,
+                "aparam_avg": self.aparam_avg,
+                "aparam_inv_std": self.aparam_inv_std,
+            },
+            # not supported
+            "tot_ener_zero": self.tot_ener_zero,
+            "trainable": self.trainable,
+            "layer_name": self.layer_name,
+            "use_aparam_as_mask": self.use_aparam_as_mask,
+            "spin": self.spin,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        nets = data.pop("nets")
+        obj = cls(**data)
+        for kk in variables.keys():
+            obj[kk] = variables[kk]
+        obj.nets = NetworkCollection.deserialize(nets)
+        return obj
+
+    def _call_common(
+        self,
+        descriptor: np.ndarray,
+        atype: np.ndarray,
+        gr: Optional[np.ndarray] = None,
+        g2: Optional[np.ndarray] = None,
+        h2: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Calculate the fitting.
+
+        Parameters
+        ----------
+        descriptor
+            input descriptor. shape: nf x nloc x nd
+        atype
+            the atom type. shape: nf x nloc
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        fparam
+            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
+        aparam
+            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
+
+        """
+        nf, nloc, nd = descriptor.shape
+        net_dim_out = self._net_out_dim()
+        # check input dim
+        if nd != self.dim_descrpt:
+            raise ValueError(
+                "get an input descriptor of dim {nd},"
+                "which is not consistent with {self.dim_descrpt}."
+            )
+        xx = descriptor
+        if self.remove_vaccum_contribution is not None:
+            # TODO: comput the input for vaccum when setting remove_vaccum_contribution
+            # Idealy, the input for vaccum should be computed;
+            # we consider it as always zero for convenience.
+            # Needs a compute_input_stats for vaccum passed from the
+            # descriptor.
+            xx_zeros = np.zeros_like(xx)
+        else:
+            xx_zeros = None
+        # check fparam dim, concate to input descriptor
+        if self.numb_fparam > 0:
+            assert fparam is not None, "fparam should not be None"
+            if fparam.shape[-1] != self.numb_fparam:
+                raise ValueError(
+                    "get an input fparam of dim {fparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_fparam}.",
+                )
+            fparam = (fparam - self.fparam_avg) * self.fparam_inv_std
+            fparam = np.tile(fparam.reshape([nf, 1, self.numb_fparam]), [1, nloc, 1])
+            xx = np.concatenate(
+                [xx, fparam],
+                axis=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = np.concatenate(
+                    [xx_zeros, fparam],
+                    axis=-1,
+                )
+        # check aparam dim, concate to input descriptor
+        if self.numb_aparam > 0:
+            assert aparam is not None, "aparam should not be None"
+            if aparam.shape[-1] != self.numb_aparam:
+                raise ValueError(
+                    "get an input aparam of dim {aparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_aparam}.",
+                )
+            aparam = aparam.reshape([nf, nloc, self.numb_aparam])
+            aparam = (aparam - self.aparam_avg) * self.aparam_inv_std
+            xx = np.concatenate(
+                [xx, aparam],
+                axis=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = np.concatenate(
+                    [xx_zeros, aparam],
+                    axis=-1,
+                )
+
+        # calcualte the prediction
+        if not self.mixed_types:
+            outs = np.zeros([nf, nloc, net_dim_out])
+            for type_i in range(self.ntypes):
+                mask = np.tile(
+                    (atype == type_i).reshape([nf, nloc, 1]), [1, 1, net_dim_out]
+                )
+                atom_property = self.nets[(type_i,)](xx)
+                if self.remove_vaccum_contribution is not None and not (
+                    len(self.remove_vaccum_contribution) > type_i
+                    and not self.remove_vaccum_contribution[type_i]
+                ):
+                    assert xx_zeros is not None
+                    atom_property -= self.nets[(type_i,)](xx_zeros)
+                atom_property = atom_property + self.bias_atom_e[type_i]
+                atom_property = atom_property * mask
+                outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
+        else:
+            outs = self.nets[()](xx) + self.bias_atom_e[atype]
+            if xx_zeros is not None:
+                outs -= self.nets[()](xx_zeros)
+        # nf x nloc
+        exclude_mask = self.emask.build_type_exclude_mask(atype)
+        # nf x nloc x nod
+        outs = outs * exclude_mask[:, :, None]
+        return {self.var_name: outs}
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
new file mode 100644
index 0000000000..9bf1731830
--- /dev/null
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -0,0 +1,240 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .general_fitting import (
+    GeneralFitting,
+)
+
+
+@GeneralFitting.register("invar")
+@fitting_check_output
+class InvarFitting(GeneralFitting):
+    r"""Fitting the energy (or a rotationally invariant porperty of `dim_out`) of the system. The force and the virial can also be trained.
+
+    Lets take the energy fitting task as an example.
+    The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`:
+
+    .. math::
+        E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)}
+        \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)}
+
+    The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by
+
+    .. math::
+        \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})=
+            \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b})
+
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
+    is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and
+    :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively,
+    both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}`
+    is the activation function.
+
+    The output layer :math:`\mathcal{L}^{(n)}` is given by
+
+    .. math::
+        \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})=
+            \mathbf{x}^T\mathbf{w}+\mathbf{b}
+
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}`
+    is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and
+    :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively,
+    both of which are trainable if `trainable[n]` is `True`.
+
+    Parameters
+    ----------
+    var_name
+            The name of the output variable.
+    ntypes
+            The number of atom types.
+    dim_descrpt
+            The dimension of the input descriptor.
+    dim_out
+            The dimension of the output fit property.
+    neuron
+            Number of neurons :math:`N` in each hidden layer of the fitting net
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            :math:`y = x + dt * \phi (Wx + b)`
+    numb_fparam
+            Number of frame parameter
+    numb_aparam
+            Number of atomic parameter
+    rcond
+            The condition number for the regression of atomic energy.
+    bias_atom
+            Bias for each element.
+    tot_ener_zero
+            Force the total energy to zero. Useful for the charge fitting.
+    trainable
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
+            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
+    atom_ener
+            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+    activation_function
+            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
+    use_aparam_as_mask: bool, optional
+            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
+            And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types
+            If false, different atomic types uses different fitting net, otherwise different atom types share the same fitting net.
+    exclude_types: List[int]
+            Atomic contributions of the excluded atom types are set zero.
+
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        bias_atom: Optional[np.ndarray] = None,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        atom_ener: Optional[List[float]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        mixed_types: bool = True,
+        exclude_types: List[int] = [],
+    ):
+        # seed, uniform_seed are not included
+        if tot_ener_zero:
+            raise NotImplementedError("tot_ener_zero is not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if layer_name is not None:
+            raise NotImplementedError("layer_name is not implemented")
+
+        self.dim_out = dim_out
+        self.atom_ener = atom_ener
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            bias_atom_e=bias_atom,
+            tot_ener_zero=tot_ener_zero,
+            trainable=trainable,
+            activation_function=activation_function,
+            precision=precision,
+            layer_name=layer_name,
+            use_aparam_as_mask=use_aparam_as_mask,
+            spin=spin,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+            remove_vaccum_contribution=None
+            if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
+            else [x is not None for x in atom_ener],
+        )
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "invar"
+        data["dim_out"] = self.dim_out
+        data["atom_ener"] = self.atom_ener
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.dim_out
+
+    def compute_output_stats(self, merged):
+        """Update the output bias for fitting net."""
+        raise NotImplementedError
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def call(
+        self,
+        descriptor: np.ndarray,
+        atype: np.ndarray,
+        gr: Optional[np.ndarray] = None,
+        g2: Optional[np.ndarray] = None,
+        h2: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Calculate the fitting.
+
+        Parameters
+        ----------
+        descriptor
+            input descriptor. shape: nf x nloc x nd
+        atype
+            the atom type. shape: nf x nloc
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        fparam
+            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
+        aparam
+            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
+
+        """
+        return self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam)
diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py
new file mode 100644
index 0000000000..c7341798c3
--- /dev/null
+++ b/deepmd/dpmodel/fitting/make_base_fitting.py
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Dict,
+    Optional,
+)
+
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+
+def make_base_fitting(
+    t_tensor,
+    fwd_method_name: str = "forward",
+):
+    """Make the base class for the fitting.
+
+    Parameters
+    ----------
+    t_tensor
+        The type of the tensor. used in the type hint.
+    fwd_method_name
+        Name of the forward method. For dpmodels, it should be "call".
+        For torch models, it should be "forward".
+
+    """
+
+    class BF(ABC, PluginVariant, make_plugin_registry("fitting")):
+        """Base fitting provides the interfaces of fitting net."""
+
+        def __new__(cls, *args, **kwargs):
+            if cls is BF:
+                cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__))
+            return super().__new__(cls)
+
+        @abstractmethod
+        def output_def(self) -> FittingOutputDef:
+            """Returns the output def of the fitting net."""
+            pass
+
+        @abstractmethod
+        def fwd(
+            self,
+            descriptor: t_tensor,
+            atype: t_tensor,
+            gr: Optional[t_tensor] = None,
+            g2: Optional[t_tensor] = None,
+            h2: Optional[t_tensor] = None,
+            fparam: Optional[t_tensor] = None,
+            aparam: Optional[t_tensor] = None,
+        ) -> Dict[str, t_tensor]:
+            """Calculate fitting."""
+            pass
+
+        def compute_output_stats(self, merged):
+            """Update the output bias for fitting net."""
+            raise NotImplementedError
+
+        @abstractmethod
+        def serialize(self) -> dict:
+            """Serialize the obj to dict."""
+            pass
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "BF":
+            """Deserialize the fitting.
+
+            Parameters
+            ----------
+            data : dict
+                The serialized data
+
+            Returns
+            -------
+            BF
+                The deserialized fitting
+            """
+            if cls is BF:
+                return BF.get_class_by_type(data["type"]).deserialize(data)
+            raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+    setattr(BF, fwd_method_name, BF.fwd)
+    delattr(BF, "fwd")
+
+    return BF
diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
new file mode 100644
index 0000000000..5d75037137
--- /dev/null
+++ b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -0,0 +1,284 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.common import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+)
+from deepmd.dpmodel.fitting.base_fitting import (
+    BaseFitting,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .general_fitting import (
+    GeneralFitting,
+)
+
+
+@BaseFitting.register("polar")
+@fitting_check_output
+class PolarFitting(GeneralFitting):
+    r"""Fitting rotationally equivariant polarizability of the system.
+
+    Parameters
+    ----------
+    var_name
+            The name of the output variable.
+    ntypes
+            The number of atom types.
+    dim_descrpt
+            The dimension of the input descriptor.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron
+            Number of neurons :math:`N` in each hidden layer of the fitting net
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            :math:`y = x + dt * \phi (Wx + b)`
+    numb_fparam
+            Number of frame parameter
+    numb_aparam
+            Number of atomic parameter
+    rcond
+            The condition number for the regression of atomic energy.
+    tot_ener_zero
+            Force the total energy to zero. Useful for the charge fitting.
+    trainable
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
+            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
+    activation_function
+            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
+    use_aparam_as_mask: bool, optional
+            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
+            And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types
+            If true, use a uniform fitting net for all atom types, otherwise use
+            different fitting nets for different atom types.
+    fit_diag : bool
+            Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to
+            normal polarizability matrix by contracting with the rotation matrix.
+    scale : List[float]
+            The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
+    shift_diag : bool
+            Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        mixed_types: bool = False,
+        exclude_types: List[int] = [],
+        old_impl: bool = False,
+        fit_diag: bool = True,
+        scale: Optional[List[float]] = None,
+        shift_diag: bool = True,
+        # not used
+        seed: Optional[int] = None,
+    ):
+        # seed, uniform_seed are not included
+        if tot_ener_zero:
+            raise NotImplementedError("tot_ener_zero is not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if layer_name is not None:
+            raise NotImplementedError("layer_name is not implemented")
+
+        self.embedding_width = embedding_width
+        self.fit_diag = fit_diag
+        self.scale = scale
+        if self.scale is None:
+            self.scale = [1.0 for _ in range(ntypes)]
+        else:
+            if isinstance(self.scale, list):
+                assert (
+                    len(self.scale) == ntypes
+                ), "Scale should be a list of length ntypes."
+            elif isinstance(self.scale, float):
+                self.scale = [self.scale for _ in range(ntypes)]
+            else:
+                raise ValueError(
+                    "Scale must be a list of float of length ntypes or a float."
+                )
+        self.scale = np.array(self.scale, dtype=GLOBAL_NP_FLOAT_PRECISION).reshape(
+            ntypes, 1
+        )
+        self.shift_diag = shift_diag
+        self.constant_matrix = np.zeros(ntypes, dtype=GLOBAL_NP_FLOAT_PRECISION)
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            rcond=rcond,
+            tot_ener_zero=tot_ener_zero,
+            trainable=trainable,
+            activation_function=activation_function,
+            precision=precision,
+            layer_name=layer_name,
+            use_aparam_as_mask=use_aparam_as_mask,
+            spin=spin,
+            mixed_types=mixed_types,
+            exclude_types=exclude_types,
+        )
+        self.old_impl = False
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return (
+            self.embedding_width
+            if self.fit_diag
+            else self.embedding_width * self.embedding_width
+        )
+
+    def __setitem__(self, key, value):
+        if key in ["constant_matrix"]:
+            self.constant_matrix = value
+        else:
+            super().__setitem__(key, value)
+
+    def __getitem__(self, key):
+        if key in ["constant_matrix"]:
+            return self.constant_matrix
+        else:
+            return super().__getitem__(key)
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "polar"
+        data["@version"] = 2
+        data["embedding_width"] = self.embedding_width
+        data["old_impl"] = self.old_impl
+        data["fit_diag"] = self.fit_diag
+        data["shift_diag"] = self.shift_diag
+        data["@variables"]["scale"] = self.scale
+        data["@variables"]["constant_matrix"] = self.constant_matrix
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        return super().deserialize(data)
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [3, 3],
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def call(
+        self,
+        descriptor: np.ndarray,
+        atype: np.ndarray,
+        gr: Optional[np.ndarray] = None,
+        g2: Optional[np.ndarray] = None,
+        h2: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+    ) -> Dict[str, np.ndarray]:
+        """Calculate the fitting.
+
+        Parameters
+        ----------
+        descriptor
+            input descriptor. shape: nf x nloc x nd
+        atype
+            the atom type. shape: nf x nloc
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        fparam
+            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
+        aparam
+            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
+
+        """
+        nframes, nloc, _ = descriptor.shape
+        assert (
+            gr is not None
+        ), "Must provide the rotation matrix for polarizability fitting."
+        # (nframes, nloc, _net_out_dim)
+        out = self._call_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        out = out * self.scale[atype]
+        # (nframes * nloc, m1, 3)
+        gr = gr.reshape(nframes * nloc, -1, 3)
+
+        if self.fit_diag:
+            out = out.reshape(-1, self.embedding_width)
+            out = np.einsum("ij,ijk->ijk", out, gr)
+        else:
+            out = out.reshape(-1, self.embedding_width, self.embedding_width)
+            out = (out + np.transpose(out, axes=(0, 2, 1))) / 2
+            out = np.einsum("bim,bmj->bij", out, gr)  # (nframes * nloc, m1, 3)
+        out = np.einsum(
+            "bim,bmj->bij", np.transpose(gr, axes=(0, 2, 1)), out
+        )  # (nframes * nloc, 3, 3)
+        out = out.reshape(nframes, nloc, 3, 3)
+        if self.shift_diag:
+            bias = self.constant_matrix[atype]
+            # (nframes, nloc, 1)
+            bias = np.expand_dims(bias, axis=-1) * self.scale[atype]
+            eye = np.eye(3)
+            eye = np.tile(eye, (nframes, nloc, 1, 1))
+            # (nframes, nloc, 3, 3)
+            bias = np.expand_dims(bias, axis=-1) * eye
+            out = out + bias
+        return {self.var_name: out}
diff --git a/deepmd/train/__init__.py b/deepmd/dpmodel/infer/__init__.py
similarity index 100%
rename from deepmd/train/__init__.py
rename to deepmd/dpmodel/infer/__init__.py
diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py
new file mode 100644
index 0000000000..22267c895a
--- /dev/null
+++ b/deepmd/dpmodel/infer/deep_eval.py
@@ -0,0 +1,372 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.model.base_model import (
+    BaseModel,
+)
+from deepmd.dpmodel.output_def import (
+    ModelOutputDef,
+    OutputVariableCategory,
+    OutputVariableDef,
+)
+from deepmd.dpmodel.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.dpmodel.utils.network import (
+    load_dp_model,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
+from deepmd.infer.deep_eval import (
+    DeepEvalBackend,
+)
+from deepmd.infer.deep_polar import (
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+
+if TYPE_CHECKING:
+    import ase.neighborlist
+
+
+class DeepEval(DeepEvalBackend):
+    """NumPy backend implementaion of DeepEval.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    output_def : ModelOutputDef
+        The output definition of the model.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __init__(
+        self,
+        model_file: str,
+        output_def: ModelOutputDef,
+        *args: List[Any],
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        **kwargs: Dict[str, Any],
+    ):
+        self.output_def = output_def
+        self.model_path = model_file
+
+        model_data = load_dp_model(model_file)
+        self.dp = BaseModel.deserialize(model_data["model"])
+        self.rcut = self.dp.get_rcut()
+        self.type_map = self.dp.get_type_map()
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
+        return self.rcut
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return len(self.type_map)
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.type_map
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dp.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.dp.get_dim_aparam()
+
+    @property
+    def model_type(self) -> Type["DeepEvalWrapper"]:
+        """The the evaluator of the model type."""
+        model_output_type = self.dp.model_output_type()
+        if "energy" in model_output_type:
+            return DeepPot
+        elif "dos" in model_output_type:
+            return DeepDOS
+        elif "dipole" in model_output_type:
+            return DeepDipole
+        elif "polar" in model_output_type:
+            return DeepPolar
+        elif "wfc" in model_output_type:
+            return DeepWFC
+        else:
+            raise RuntimeError("Unknown model type")
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.dp.get_sel_type()
+
+    def get_numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return 0
+
+    def get_has_efield(self):
+        """Check if the model has efield."""
+        return False
+
+    def get_ntypes_spin(self):
+        """Get the number of spin atom types of this model."""
+        return 0
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Dict[str, Any],
+    ) -> Dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        **kwargs
+            Other parameters
+
+        Returns
+        -------
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
+        """
+        if fparam is not None or aparam is not None:
+            raise NotImplementedError
+        # convert all of the input to numpy array
+        atom_types = np.array(atom_types, dtype=np.int32)
+        coords = np.array(coords)
+        if cells is not None:
+            cells = np.array(cells)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, len(atom_types.shape) > 1
+        )
+        request_defs = self._get_request_defs(atomic)
+        out = self._eval_func(self._eval_model, numb_test, natoms)(
+            coords, cells, atom_types, request_defs
+        )
+        return dict(
+            zip(
+                [x.name for x in request_defs],
+                out,
+            )
+        )
+
+    def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]:
+        """Get the requested output definitions.
+
+        When atomic is True, all output_def are requested.
+        When atomic is False, only energy (tensor), force, and virial
+        are requested.
+
+        Parameters
+        ----------
+        atomic : bool
+            Whether to request the atomic output.
+
+        Returns
+        -------
+        list[OutputVariableDef]
+            The requested output definitions.
+        """
+        if atomic:
+            return list(self.output_def.var_defs.values())
+        else:
+            return [
+                x
+                for x in self.output_def.var_defs.values()
+                if x.category
+                in (
+                    OutputVariableCategory.REDU,
+                    OutputVariableCategory.DERV_R,
+                    OutputVariableCategory.DERV_C_REDU,
+                )
+            ]
+
+    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
+        """Wrapper method with auto batch size.
+
+        Parameters
+        ----------
+        inner_func : Callable
+            the method to be wrapped
+        numb_test : int
+            number of tests
+        natoms : int
+            number of atoms
+
+        Returns
+        -------
+        Callable
+            the wrapper
+        """
+        if self.auto_batch_size is not None:
+
+            def eval_func(*args, **kwargs):
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
+        else:
+            eval_func = inner_func
+        return eval_func
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: np.ndarray,
+        mixed_type: bool = False,
+    ) -> Tuple[int, int]:
+        if mixed_type:
+            natoms = len(atom_types[0])
+        else:
+            natoms = len(atom_types)
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def _eval_model(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        request_defs: List[OutputVariableDef],
+    ):
+        model = self.dp
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = coords.reshape([-1, natoms, 3])
+        type_input = atom_types
+        if cells is not None:
+            box_input = cells.reshape([-1, 3, 3])
+        else:
+            box_input = None
+
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
+        )
+        batch_output = model(
+            coord_input, type_input, box=box_input, do_atomic_virial=do_atomic_virial
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            # it seems not doing conversion
+            # dp_name = self._OUTDEF_DP2BACKEND[odef.name]
+            dp_name = odef.name
+            if dp_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                if batch_output[dp_name] is not None:
+                    out = batch_output[dp_name].reshape(shape)
+                else:
+                    out = np.full(shape, np.nan)
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(np.full(np.abs(shape), np.nan))  # this is kinda hacky
+        return tuple(results)
+
+    def _get_output_shape(self, odef, nframes, natoms):
+        if odef.category == OutputVariableCategory.DERV_C_REDU:
+            # virial
+            return [nframes, *odef.shape[:-1], 9]
+        elif odef.category == OutputVariableCategory.REDU:
+            # energy
+            return [nframes, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_C:
+            # atom_virial
+            return [nframes, *odef.shape[:-1], natoms, 9]
+        elif odef.category == OutputVariableCategory.DERV_R:
+            # force
+            return [nframes, *odef.shape[:-1], natoms, 3]
+        elif odef.category == OutputVariableCategory.OUT:
+            # atom_energy, atom_tensor
+            # Something wrong here?
+            # return [nframes, *shape, natoms, 1]
+            return [nframes, natoms, *odef.shape, 1]
+        else:
+            raise RuntimeError("unknown category")
diff --git a/deepmd/dpmodel/model/__init__.py b/deepmd/dpmodel/model/__init__.py
new file mode 100644
index 0000000000..c1ff15ab0d
--- /dev/null
+++ b/deepmd/dpmodel/model/__init__.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The model that takes the coordinates, cell and atom types as input
+and predicts some property. The models are automatically generated from
+atomic models by the `deepmd.dpmodel.make_model` method.
+
+The `make_model` method does the reduction, auto-differentiation
+(dummy for dpmodels) and communication of the atomic properties
+according to output variable definition
+`deepmd.dpmodel.OutputVariableDef`.
+
+All models should be inherited from :class:`deepmd.dpmodel.model.base_model.BaseModel`.
+Models generated by `make_model` have already done it.
+"""
+
+from .dp_model import (
+    DPModel,
+)
+from .make_model import (
+    make_model,
+)
+from .spin_model import (
+    SpinModel,
+)
+
+__all__ = [
+    "DPModel",
+    "SpinModel",
+    "make_model",
+]
diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
new file mode 100644
index 0000000000..5169d1b5fe
--- /dev/null
+++ b/deepmd/dpmodel/model/base_model.py
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import inspect
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Any,
+    List,
+    Type,
+)
+
+from deepmd.utils.plugin import (
+    PluginVariant,
+    make_plugin_registry,
+)
+
+
+def make_base_model() -> Type[object]:
+    class BaseBaseModel(ABC, PluginVariant, make_plugin_registry("model")):
+        """Base class for final exported model that will be directly used for inference.
+
+        The class defines some abstractmethods that will be directly called by the
+        inference interface. If the final model class inherits some of those methods
+        from other classes, `BaseModel` should be inherited as the last class to ensure
+        the correct method resolution order.
+
+        This class is backend-indepedent.
+
+        See Also
+        --------
+        deepmd.dpmodel.model.base_model.BaseModel
+            BaseModel class for DPModel backend.
+        """
+
+        def __new__(cls, *args, **kwargs):
+            if inspect.isabstract(cls):
+                cls = cls.get_class_by_type(kwargs.get("type", "standard"))
+            return super().__new__(cls)
+
+        @abstractmethod
+        def __call__(self, *args: Any, **kwds: Any) -> Any:
+            """Inference method.
+
+            Parameters
+            ----------
+            *args : Any
+                The input data for inference.
+            **kwds : Any
+                The input data for inference.
+
+            Returns
+            -------
+            Any
+                The output of the inference.
+            """
+            pass
+
+        @abstractmethod
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+
+        @abstractmethod
+        def get_rcut(self):
+            """Get the cut-off radius."""
+
+        @abstractmethod
+        def get_dim_fparam(self):
+            """Get the number (dimension) of frame parameters of this atomic model."""
+
+        @abstractmethod
+        def get_dim_aparam(self):
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+
+        @abstractmethod
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+
+        @abstractmethod
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+
+        @abstractmethod
+        def model_output_type(self) -> List[str]:
+            """Get the output type for the model."""
+
+        @abstractmethod
+        def serialize(self) -> dict:
+            """Serialize the model.
+
+            Returns
+            -------
+            dict
+                The serialized data
+            """
+            pass
+
+        @classmethod
+        def deserialize(cls, data: dict) -> "BaseBaseModel":
+            """Deserialize the model.
+
+            Parameters
+            ----------
+            data : dict
+                The serialized data
+
+            Returns
+            -------
+            BaseModel
+                The deserialized model
+            """
+            if inspect.isabstract(cls):
+                return cls.get_class_by_type(data["type"]).deserialize(data)
+            raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+        model_def_script: str
+
+        @abstractmethod
+        def get_model_def_script(self) -> str:
+            """Get the model definition script."""
+            pass
+
+        @abstractmethod
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            # for C++ interface
+            pass
+
+        @abstractmethod
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            pass
+
+        @classmethod
+        @abstractmethod
+        def update_sel(cls, global_jdata: dict, local_jdata: dict):
+            """Update the selection and perform neighbor statistics.
+
+            Parameters
+            ----------
+            global_jdata : dict
+                The global data, containing the training section
+            local_jdata : dict
+                The local data refer to the current class
+            """
+            cls = cls.get_class_by_type(local_jdata.get("type", "standard"))
+            return cls.update_sel(global_jdata, local_jdata)
+
+    return BaseBaseModel
+
+
+class BaseModel(make_base_model()):
+    """Base class for final exported model that will be directly used for inference.
+
+    The class defines some abstractmethods that will be directly called by the
+    inference interface. If the final model class inherbits some of those methods
+    from other classes, `BaseModel` should be inherited as the last class to ensure
+    the correct method resolution order.
+
+    This class is for the DPModel backend.
+
+    See Also
+    --------
+    deepmd.dpmodel.model.base_model.BaseBaseModel
+        Backend-independent BaseModel class.
+    """
+
+    def __init__(self) -> None:
+        self.model_def_script = ""
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
new file mode 100644
index 0000000000..8d84c435b4
--- /dev/null
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+from deepmd.dpmodel.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.dpmodel.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.dpmodel.model.base_model import (
+    BaseModel,
+)
+
+from .make_model import (
+    make_model,
+)
+
+
+# use "class" to resolve "Variable not allowed in type expression"
+@BaseModel.register("standard")
+class DPModel(make_model(DPAtomicModel)):
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["descriptor"] = BaseDescriptor.update_sel(
+            global_jdata, local_jdata["descriptor"]
+        )
+        return local_jdata_cpy
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
new file mode 100644
index 0000000000..68889ad331
--- /dev/null
+++ b/deepmd/dpmodel/model/make_model.py
@@ -0,0 +1,476 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
+from deepmd.dpmodel.common import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+    NativeOP,
+)
+from deepmd.dpmodel.model.base_model import (
+    BaseModel,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableCategory,
+    OutputVariableOperation,
+    check_operation_applied,
+)
+from deepmd.dpmodel.utils import (
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    nlist_distinguish_types,
+    normalize_coord,
+)
+
+from .transform_output import (
+    communicate_extended_output,
+    fit_output_to_model_output,
+)
+
+
+def make_model(T_AtomicModel: Type[BaseAtomicModel]):
+    """Make a model as a derived class of an atomic model.
+
+    The model provide two interfaces.
+
+    1. the `call_lower`, that takes extended coordinates, atyps and neighbor list,
+    and outputs the atomic and property and derivatives (if required) on the extended region.
+
+    2. the `call`, that takes coordinates, atypes and cell and predicts
+    the atomic and reduced property, and derivatives (if required) on the local region.
+
+    Parameters
+    ----------
+    T_AtomicModel
+        The atomic model.
+
+    Returns
+    -------
+    CM
+        The model.
+
+    """
+
+    class CM(NativeOP, BaseModel):
+        def __init__(
+            self,
+            *args,
+            # underscore to prevent conflict with normal inputs
+            atomic_model_: Optional[T_AtomicModel] = None,
+            **kwargs,
+        ):
+            BaseModel.__init__(self)
+            if atomic_model_ is not None:
+                self.atomic_model: T_AtomicModel = atomic_model_
+            else:
+                self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs)
+            self.precision_dict = PRECISION_DICT
+            self.reverse_precision_dict = RESERVED_PRECISON_DICT
+            self.global_np_float_precision = GLOBAL_NP_FLOAT_PRECISION
+            self.global_ener_float_precision = GLOBAL_ENER_FLOAT_PRECISION
+
+        def model_output_def(self):
+            """Get the output def for the model."""
+            return ModelOutputDef(self.atomic_output_def())
+
+        def model_output_type(self) -> List[str]:
+            """Get the output type for the model."""
+            output_def = self.model_output_def()
+            var_defs = output_def.var_defs
+            vars = [
+                kk
+                for kk, vv in var_defs.items()
+                if vv.category == OutputVariableCategory.OUT
+            ]
+            return vars
+
+        def call(
+            self,
+            coord,
+            atype,
+            box: Optional[np.ndarray] = None,
+            fparam: Optional[np.ndarray] = None,
+            aparam: Optional[np.ndarray] = None,
+            do_atomic_virial: bool = False,
+        ) -> Dict[str, np.ndarray]:
+            """Return model prediction.
+
+            Parameters
+            ----------
+            coord
+                The coordinates of the atoms.
+                shape: nf x (nloc x 3)
+            atype
+                The type of atoms. shape: nf x nloc
+            box
+                The simulation box. shape: nf x 9
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                If calculate the atomic virial.
+
+            Returns
+            -------
+            ret_dict
+                The result dict of type Dict[str,np.ndarray].
+                The keys are defined by the `ModelOutputDef`.
+
+            """
+            nframes, nloc = atype.shape[:2]
+            cc, bb, fp, ap, input_prec = self.input_type_cast(
+                coord, box=box, fparam=fparam, aparam=aparam
+            )
+            del coord, box, fparam, aparam
+            if bb is not None:
+                coord_normalized = normalize_coord(
+                    cc.reshape(nframes, nloc, 3),
+                    bb.reshape(nframes, 3, 3),
+                )
+            else:
+                coord_normalized = cc.copy()
+            extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+                coord_normalized, atype, bb, self.get_rcut()
+            )
+            nlist = build_neighbor_list(
+                extended_coord,
+                extended_atype,
+                nloc,
+                self.get_rcut(),
+                self.get_sel(),
+                distinguish_types=not self.mixed_types(),
+            )
+            extended_coord = extended_coord.reshape(nframes, -1, 3)
+            model_predict_lower = self.call_lower(
+                extended_coord,
+                extended_atype,
+                nlist,
+                mapping,
+                fparam=fp,
+                aparam=ap,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = communicate_extended_output(
+                model_predict_lower,
+                self.model_output_def(),
+                mapping,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def call_lower(
+            self,
+            extended_coord: np.ndarray,
+            extended_atype: np.ndarray,
+            nlist: np.ndarray,
+            mapping: Optional[np.ndarray] = None,
+            fparam: Optional[np.ndarray] = None,
+            aparam: Optional[np.ndarray] = None,
+            do_atomic_virial: bool = False,
+        ):
+            """Return model prediction. Lower interface that takes
+            extended atomic coordinates and types, nlist, and mapping
+            as input, and returns the predictions on the extended region.
+            The predictions are not reduced.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x (nall x 3).
+            extended_atype
+                atomic type in extended region. nf x nall.
+            nlist
+                neighbor list. nf x nloc x nsel.
+            mapping
+                mapps the extended indices to local indices. nf x nall.
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                whether calculate atomic virial
+
+            Returns
+            -------
+            result_dict
+                the result dict, defined by the `FittingOutputDef`.
+
+            """
+            nframes, nall = extended_atype.shape[:2]
+            extended_coord = extended_coord.reshape(nframes, -1, 3)
+            nlist = self.format_nlist(extended_coord, extended_atype, nlist)
+            cc_ext, _, fp, ap, input_prec = self.input_type_cast(
+                extended_coord, fparam=fparam, aparam=aparam
+            )
+            del extended_coord, fparam, aparam
+            atomic_ret = self.atomic_model.forward_common_atomic(
+                cc_ext,
+                extended_atype,
+                nlist,
+                mapping=mapping,
+                fparam=fp,
+                aparam=ap,
+            )
+            model_predict = fit_output_to_model_output(
+                atomic_ret,
+                self.atomic_output_def(),
+                cc_ext,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def input_type_cast(
+            self,
+            coord: np.ndarray,
+            box: Optional[np.ndarray] = None,
+            fparam: Optional[np.ndarray] = None,
+            aparam: Optional[np.ndarray] = None,
+        ) -> Tuple[
+            np.ndarray,
+            Optional[np.ndarray],
+            Optional[np.ndarray],
+            Optional[np.ndarray],
+            str,
+        ]:
+            """Cast the input data to global float type."""
+            input_prec = self.reverse_precision_dict[
+                self.precision_dict[coord.dtype.name]
+            ]
+            ###
+            ### type checking would not pass jit, convert to coord prec anyway
+            ###
+            _lst: List[Optional[np.ndarray]] = [
+                vv.astype(coord.dtype) if vv is not None else None
+                for vv in [box, fparam, aparam]
+            ]
+            box, fparam, aparam = _lst
+            if (
+                input_prec
+                == self.reverse_precision_dict[self.global_np_float_precision]
+            ):
+                return coord, box, fparam, aparam, input_prec
+            else:
+                pp = self.global_np_float_precision
+                return (
+                    coord.astype(pp),
+                    box.astype(pp) if box is not None else None,
+                    fparam.astype(pp) if fparam is not None else None,
+                    aparam.astype(pp) if aparam is not None else None,
+                    input_prec,
+                )
+
+        def output_type_cast(
+            self,
+            model_ret: Dict[str, np.ndarray],
+            input_prec: str,
+        ) -> Dict[str, np.ndarray]:
+            """Convert the model output to the input prec."""
+            do_cast = (
+                input_prec
+                != self.reverse_precision_dict[self.global_np_float_precision]
+            )
+            pp = self.precision_dict[input_prec]
+            odef = self.model_output_def()
+            for kk in odef.keys():
+                if kk not in model_ret.keys():
+                    # do not return energy_derv_c if not do_atomic_virial
+                    continue
+                if check_operation_applied(odef[kk], OutputVariableOperation.REDU):
+                    model_ret[kk] = (
+                        model_ret[kk].astype(self.global_ener_float_precision)
+                        if model_ret[kk] is not None
+                        else None
+                    )
+                elif do_cast:
+                    model_ret[kk] = (
+                        model_ret[kk].astype(pp) if model_ret[kk] is not None else None
+                    )
+            return model_ret
+
+        def format_nlist(
+            self,
+            extended_coord: np.ndarray,
+            extended_atype: np.ndarray,
+            nlist: np.ndarray,
+        ):
+            """Format the neighbor list.
+
+            1. If the number of neighbors in the `nlist` is equal to sum(self.sel),
+            it does nothong
+
+            2. If the number of neighbors in the `nlist` is smaller than sum(self.sel),
+            the `nlist` is pad with -1.
+
+            3. If the number of neighbors in the `nlist` is larger than sum(self.sel),
+            the nearest sum(sel) neighbors will be preseved.
+
+            Known limitations:
+
+            In the case of not self.mixed_types, the nlist is always formatted.
+            May have side effact on the efficiency.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x nall x 3
+            extended_atype
+                atomic type in extended region. nf x nall
+            nlist
+                neighbor list. nf x nloc x nsel
+
+            Returns
+            -------
+            formated_nlist
+                the formated nlist.
+
+            """
+            n_nf, n_nloc, n_nnei = nlist.shape
+            mixed_types = self.mixed_types()
+            ret = self._format_nlist(extended_coord, nlist, sum(self.get_sel()))
+            if not mixed_types:
+                ret = nlist_distinguish_types(ret, extended_atype, self.get_sel())
+            return ret
+
+        def _format_nlist(
+            self,
+            extended_coord: np.ndarray,
+            nlist: np.ndarray,
+            nnei: int,
+        ):
+            n_nf, n_nloc, n_nnei = nlist.shape
+            extended_coord = extended_coord.reshape([n_nf, -1, 3])
+            nall = extended_coord.shape[1]
+            rcut = self.get_rcut()
+
+            if n_nnei < nnei:
+                # make a copy before revise
+                ret = np.concatenate(
+                    [
+                        nlist,
+                        -1 * np.ones([n_nf, n_nloc, nnei - n_nnei], dtype=nlist.dtype),
+                    ],
+                    axis=-1,
+                )
+            elif n_nnei > nnei:
+                # make a copy before revise
+                m_real_nei = nlist >= 0
+                ret = np.where(m_real_nei, nlist, 0)
+                coord0 = extended_coord[:, :n_nloc, :]
+                index = ret.reshape(n_nf, n_nloc * n_nnei, 1).repeat(3, axis=2)
+                coord1 = np.take_along_axis(extended_coord, index, axis=1)
+                coord1 = coord1.reshape(n_nf, n_nloc, n_nnei, 3)
+                rr = np.linalg.norm(coord0[:, :, None, :] - coord1, axis=-1)
+                rr = np.where(m_real_nei, rr, float("inf"))
+                rr, ret_mapping = np.sort(rr, axis=-1), np.argsort(rr, axis=-1)
+                ret = np.take_along_axis(ret, ret_mapping, axis=2)
+                ret = np.where(rr > rcut, -1, ret)
+                ret = ret[..., :nnei]
+            else:  # n_nnei == nnei:
+                # copy anyway...
+                ret = nlist
+            assert ret.shape[-1] == nnei
+            return ret
+
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+            """
+            return self.atomic_model.do_grad_r(var_name)
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+            """
+            return self.atomic_model.do_grad_c(var_name)
+
+        def serialize(self) -> dict:
+            return self.atomic_model.serialize()
+
+        @classmethod
+        def deserialize(cls, data) -> "CM":
+            return cls(atomic_model_=T_AtomicModel.deserialize(data))
+
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+            return self.atomic_model.get_dim_fparam()
+
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+            return self.atomic_model.get_dim_aparam()
+
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+            return self.atomic_model.get_sel_type()
+
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+            return self.atomic_model.is_aparam_nall()
+
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            return self.atomic_model.get_rcut()
+
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+            return self.atomic_model.get_type_map()
+
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nsel()
+
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nnei()
+
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected atoms for each type."""
+            return self.atomic_model.get_sel()
+
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            return self.atomic_model.mixed_types()
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model."""
+            return self.atomic_model.atomic_output_def()
+
+    return CM
diff --git a/deepmd/dpmodel/model/model.py b/deepmd/dpmodel/model/model.py
new file mode 100644
index 0000000000..3fdf5b802b
--- /dev/null
+++ b/deepmd/dpmodel/model/model.py
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.dpmodel.descriptor.se_e2_a import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting.ener_fitting import (
+    EnergyFittingNet,
+)
+from deepmd.dpmodel.model.dp_model import (
+    DPModel,
+)
+from deepmd.dpmodel.model.spin_model import (
+    SpinModel,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+
+def get_standard_model(data: dict) -> DPModel:
+    """Get a standard DPModel from a dictionary.
+
+    Parameters
+    ----------
+    data : dict
+        The data to construct the model.
+    """
+    descriptor_type = data["descriptor"].pop("type")
+    fitting_type = data["fitting_net"].pop("type")
+    if descriptor_type == "se_e2_a":
+        descriptor = DescrptSeA(
+            **data["descriptor"],
+        )
+    else:
+        raise ValueError(f"Unknown descriptor type {descriptor_type}")
+    if fitting_type == "ener":
+        fitting = EnergyFittingNet(
+            ntypes=descriptor.get_ntypes(),
+            dim_descrpt=descriptor.get_dim_out(),
+            mixed_types=descriptor.mixed_types(),
+            **data["fitting_net"],
+        )
+    else:
+        raise ValueError(f"Unknown fitting type {fitting_type}")
+    return DPModel(
+        descriptor=descriptor,
+        fitting=fitting,
+        type_map=data["type_map"],
+        atom_exclude_types=data.get("atom_exclude_types", []),
+        pair_exclude_types=data.get("pair_exclude_types", []),
+    )
+
+
+def get_spin_model(data: dict) -> SpinModel:
+    """Get a spin model from a dictionary.
+
+    Parameters
+    ----------
+    data : dict
+        The data to construct the model.
+    """
+    # include virtual spin and placeholder types
+    data["type_map"] += [item + "_spin" for item in data["type_map"]]
+    spin = Spin(
+        use_spin=data["spin"]["use_spin"],
+        virtual_scale=data["spin"]["virtual_scale"],
+    )
+    pair_exclude_types = spin.get_pair_exclude_types(
+        exclude_types=data.get("pair_exclude_types", None)
+    )
+    data["pair_exclude_types"] = pair_exclude_types
+    # for descriptor data stat
+    data["descriptor"]["exclude_types"] = pair_exclude_types
+    atom_exclude_types = spin.get_atom_exclude_types(
+        exclude_types=data.get("atom_exclude_types", None)
+    )
+    data["atom_exclude_types"] = atom_exclude_types
+    if "env_protection" not in data["descriptor"]:
+        data["descriptor"]["env_protection"] = 1e-6
+    if data["descriptor"]["type"] in ["se_e2_a"]:
+        # only expand sel for se_e2_a
+        data["descriptor"]["sel"] += data["descriptor"]["sel"]
+    backbone_model = get_standard_model(data)
+    return SpinModel(backbone_model=backbone_model, spin=spin)
+
+
+def get_model(data: dict):
+    """Get a model from a dictionary.
+
+    Parameters
+    ----------
+    data : dict
+        The data to construct the model.
+    """
+    if "spin" in data:
+        return get_spin_model(data)
+    else:
+        return get_standard_model(data)
diff --git a/deepmd/dpmodel/model/spin_model.py b/deepmd/dpmodel/model/spin_model.py
new file mode 100644
index 0000000000..5b31b64fdf
--- /dev/null
+++ b/deepmd/dpmodel/model/spin_model.py
@@ -0,0 +1,394 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.model.dp_model import (
+    DPModel,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+
+class SpinModel:
+    """A spin model wrapper, with spin input preprocess and output split."""
+
+    def __init__(
+        self,
+        backbone_model,
+        spin: Spin,
+    ):
+        super().__init__()
+        self.backbone_model = backbone_model
+        self.spin = spin
+        self.ntypes_real = self.spin.ntypes_real
+        self.virtual_scale_mask = self.spin.get_virtual_scale_mask()
+        self.spin_mask = self.spin.get_spin_mask()
+
+    def process_spin_input(self, coord, atype, spin):
+        """Generate virtual coordinates and types, concat into the input."""
+        nframes, nloc = coord.shape[:-1]
+        atype_spin = np.concatenate([atype, atype + self.ntypes_real], axis=-1)
+        virtual_coord = coord + spin * self.virtual_scale_mask[atype].reshape(
+            [nframes, nloc, 1]
+        )
+        coord_spin = np.concatenate([coord, virtual_coord], axis=-2)
+        return coord_spin, atype_spin
+
+    def process_spin_input_lower(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        extended_spin: np.ndarray,
+        nlist: np.ndarray,
+        mapping: Optional[np.ndarray] = None,
+    ):
+        """
+        Add `extended_spin` into `extended_coord` to generate virtual atoms, and extend `nlist` and `mapping`.
+        Note that the final `extended_coord_updated` with shape [nframes, nall + nall, 3] has the following order:
+        - [:, :nloc]: original nloc real atoms.
+        - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+        - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+        - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+        """
+        nframes, nall = extended_coord.shape[:2]
+        nloc = nlist.shape[1]
+        virtual_extended_coord = (
+            extended_coord
+            + extended_spin
+            * self.virtual_scale_mask[extended_atype].reshape([nframes, nall, 1])
+        )
+        virtual_extended_atype = extended_atype + self.ntypes_real
+        extended_coord_updated = self.concat_switch_virtual(
+            extended_coord, virtual_extended_coord, nloc
+        )
+        extended_atype_updated = self.concat_switch_virtual(
+            extended_atype, virtual_extended_atype, nloc
+        )
+        if mapping is not None:
+            virtual_mapping = mapping + nloc
+            mapping_updated = self.concat_switch_virtual(mapping, virtual_mapping, nloc)
+        else:
+            mapping_updated = None
+        # extend the nlist
+        nlist_updated = self.extend_nlist(extended_atype, nlist)
+        return (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        )
+
+    def process_spin_output(
+        self, atype, out_tensor, add_mag: bool = True, virtual_scale: bool = True
+    ):
+        """Split the output both real and virtual atoms, and scale the latter."""
+        nframes, nloc_double = out_tensor.shape[:2]
+        nloc = nloc_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask
+        else:
+            virtual_scale_mask = self.spin_mask
+        atomic_mask = virtual_scale_mask[atype].reshape([nframes, nloc, 1])
+        out_real, out_mag = np.split(out_tensor, [nloc], axis=1)
+        if add_mag:
+            out_real = out_real + out_mag
+        out_mag = (out_mag.reshape([nframes, nloc, -1]) * atomic_mask).reshape(
+            out_mag.shape
+        )
+        return out_real, out_mag, atomic_mask > 0.0
+
+    def process_spin_output_lower(
+        self,
+        extended_atype,
+        extended_out_tensor,
+        nloc: int,
+        add_mag: bool = True,
+        virtual_scale: bool = True,
+    ):
+        """Split the extended output of both real and virtual atoms with switch, and scale the latter."""
+        nframes, nall_double = extended_out_tensor.shape[:2]
+        nall = nall_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask
+        else:
+            virtual_scale_mask = self.spin_mask
+        atomic_mask = virtual_scale_mask[extended_atype].reshape([nframes, nall, 1])
+        extended_out_real = np.concatenate(
+            [
+                extended_out_tensor[:, :nloc],
+                extended_out_tensor[:, nloc + nloc : nloc + nall],
+            ],
+            axis=1,
+        )
+        extended_out_mag = np.concatenate(
+            [
+                extended_out_tensor[:, nloc : nloc + nloc],
+                extended_out_tensor[:, nloc + nall :],
+            ],
+            axis=1,
+        )
+        if add_mag:
+            extended_out_real = extended_out_real + extended_out_mag
+        extended_out_mag = (
+            extended_out_mag.reshape([nframes, nall, -1]) * atomic_mask
+        ).reshape(extended_out_mag.shape)
+        return extended_out_real, extended_out_mag, atomic_mask > 0.0
+
+    @staticmethod
+    def extend_nlist(extended_atype, nlist):
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_atype.shape[1]
+        nlist_mask = nlist != -1
+        nlist[nlist == -1] = 0
+        nlist_shift = nlist + nall
+        nlist[~nlist_mask] = -1
+        nlist_shift[~nlist_mask] = -1
+        self_spin = np.arange(0, nloc, dtype=nlist.dtype) + nall
+        self_spin = self_spin.reshape(1, -1, 1).repeat(nframes, axis=0)
+        # self spin + real neighbor + virtual neighbor
+        # nf x nloc x (1 + nnei + nnei)
+        extended_nlist = np.concatenate([self_spin, nlist, nlist_shift], axis=-1)
+        # nf x (nloc + nloc) x (1 + nnei + nnei)
+        extended_nlist = np.concatenate(
+            [extended_nlist, -1 * np.ones_like(extended_nlist)], axis=-2
+        )
+        # update the index for switch
+        first_part_index = (nloc <= extended_nlist) & (extended_nlist < nall)
+        second_part_index = (nall <= extended_nlist) & (extended_nlist < (nall + nloc))
+        extended_nlist[first_part_index] += nloc
+        extended_nlist[second_part_index] -= nall - nloc
+        return extended_nlist
+
+    @staticmethod
+    def concat_switch_virtual(extended_tensor, extended_tensor_virtual, nloc: int):
+        nframes, nall = extended_tensor.shape[:2]
+        out_shape = list(extended_tensor.shape)
+        out_shape[1] *= 2
+        extended_tensor_updated = np.zeros(
+            out_shape,
+            dtype=extended_tensor.dtype,
+        )
+        extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc]
+        extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[
+            :, :nloc
+        ]
+        extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[
+            :, nloc:
+        ]
+        extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:]
+        return extended_tensor_updated.reshape(out_shape)
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        tmap = self.backbone_model.get_type_map()
+        ntypes = len(tmap) // 2  # ignore the virtual type
+        return tmap[:ntypes]
+
+    def get_rcut(self):
+        """Get the cut-off radius."""
+        return self.backbone_model.get_rcut()
+
+    def get_dim_fparam(self):
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.backbone_model.get_dim_fparam()
+
+    def get_dim_aparam(self):
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.backbone_model.get_dim_aparam()
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.backbone_model.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.backbone_model.is_aparam_nall()
+
+    def model_output_type(self) -> List[str]:
+        """Get the output type for the model."""
+        return self.backbone_model.model_output_type()
+
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.backbone_model.get_model_def_script()
+
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        # for C++ interface
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nnei() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nnei()
+
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nsel() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nsel()
+
+    @staticmethod
+    def has_spin() -> bool:
+        """Returns whether it has spin input and output."""
+        return True
+
+    def __getattr__(self, name):
+        """Get attribute from the wrapped model."""
+        if name in self.__dict__:
+            return self.__dict__[name]
+        else:
+            return getattr(self.backbone_model, name)
+
+    def serialize(self) -> dict:
+        return {
+            "backbone_model": self.backbone_model.serialize(),
+            "spin": self.spin.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data) -> "SpinModel":
+        backbone_model_obj = DPModel.deserialize(data["backbone_model"])
+        spin = Spin.deserialize(data["spin"])
+        return cls(
+            backbone_model=backbone_model_obj,
+            spin=spin,
+        )
+
+    def call(
+        self,
+        coord,
+        atype,
+        spin,
+        box: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, np.ndarray]:
+        """Return model prediction.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of the atoms.
+            shape: nf x (nloc x 3)
+        atype
+            The type of atoms. shape: nf x nloc
+        spin
+            The spins of the atoms.
+            shape: nf x (nloc x 3)
+        box
+            The simulation box. shape: nf x 9
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+        do_atomic_virial
+            If calculate the atomic virial.
+
+        Returns
+        -------
+        ret_dict
+            The result dict of type Dict[str,np.ndarray].
+            The keys are defined by the `ModelOutputDef`.
+
+        """
+        nframes, nloc = coord.shape[:2]
+        coord_updated, atype_updated = self.process_spin_input(coord, atype, spin)
+        model_predict = self.backbone_model.call(
+            coord_updated,
+            atype_updated,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_predict[f"{var_name}"] = np.split(
+            model_predict[f"{var_name}"], [nloc], axis=1
+        )[0]
+        # for now omit the grad output
+        return model_predict
+
+    def call_lower(
+        self,
+        extended_coord: np.ndarray,
+        extended_atype: np.ndarray,
+        extended_spin: np.ndarray,
+        nlist: np.ndarray,
+        mapping: Optional[np.ndarray] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        do_atomic_virial: bool = False,
+    ):
+        """Return model prediction. Lower interface that takes
+        extended atomic coordinates, types and spins, nlist, and mapping
+        as input, and returns the predictions on the extended region.
+        The predictions are not reduced.
+
+        Parameters
+        ----------
+        extended_coord
+            coordinates in extended region. nf x (nall x 3).
+        extended_atype
+            atomic type in extended region. nf x nall.
+        extended_spin
+            spins in extended region. nf x (nall x 3).
+        nlist
+            neighbor list. nf x nloc x nsel.
+        mapping
+            maps the extended indices to local indices. nf x nall.
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+        do_atomic_virial
+            whether calculate atomic virial
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the `FittingOutputDef`.
+
+        """
+        nframes, nloc = nlist.shape[:2]
+        (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        ) = self.process_spin_input_lower(
+            extended_coord, extended_atype, extended_spin, nlist, mapping=mapping
+        )
+        model_predict = self.backbone_model.call_lower(
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping=mapping_updated,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_predict[f"{var_name}"] = np.split(
+            model_predict[f"{var_name}"], [nloc], axis=1
+        )[0]
+        # for now omit the grad output
+        return model_predict
diff --git a/deepmd/dpmodel/model/transform_output.py b/deepmd/dpmodel/model/transform_output.py
new file mode 100644
index 0000000000..c87c79f7d4
--- /dev/null
+++ b/deepmd/dpmodel/model/transform_output.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    get_deriv_name,
+    get_reduce_name,
+)
+
+
+def fit_output_to_model_output(
+    fit_ret: Dict[str, np.ndarray],
+    fit_output_def: FittingOutputDef,
+    coord_ext: np.ndarray,
+    do_atomic_virial: bool = False,
+) -> Dict[str, np.ndarray]:
+    """Transform the output of the fitting network to
+    the model output.
+
+    """
+    model_ret = dict(fit_ret.items())
+    for kk, vv in fit_ret.items():
+        vdef = fit_output_def[kk]
+        shap = vdef.shape
+        atom_axis = -(len(shap) + 1)
+        if vdef.reduciable:
+            kk_redu = get_reduce_name(kk)
+            # cast to energy prec brefore reduction
+            model_ret[kk_redu] = np.sum(
+                vv.astype(GLOBAL_ENER_FLOAT_PRECISION), axis=atom_axis
+            )
+            if vdef.r_differentiable:
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                # name-holders
+                model_ret[kk_derv_r] = None
+            if vdef.c_differentiable:
+                assert vdef.r_differentiable
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                model_ret[kk_derv_c] = None
+    return model_ret
+
+
+def communicate_extended_output(
+    model_ret: Dict[str, np.ndarray],
+    model_output_def: ModelOutputDef,
+    mapping: np.ndarray,  # nf x nloc
+    do_atomic_virial: bool = False,
+) -> Dict[str, np.ndarray]:
+    """Transform the output of the model network defined on
+    local and ghost (extended) atoms to local atoms.
+
+    """
+    new_ret = {}
+    for kk in model_output_def.keys_outp():
+        vv = model_ret[kk]
+        vdef = model_output_def[kk]
+        new_ret[kk] = vv
+        if vdef.reduciable:
+            kk_redu = get_reduce_name(kk)
+            new_ret[kk_redu] = model_ret[kk_redu]
+            if vdef.r_differentiable:
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                # name holders
+                new_ret[kk_derv_r] = None
+            if vdef.c_differentiable:
+                assert vdef.r_differentiable
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                new_ret[kk_derv_c] = None
+                new_ret[kk_derv_c + "_redu"] = None
+                if not do_atomic_virial:
+                    # pop atomic virial, because it is not correctly calculated.
+                    new_ret.pop(kk_derv_c)
+    return new_ret
diff --git a/deepmd/dpmodel/output_def.py b/deepmd/dpmodel/output_def.py
new file mode 100644
index 0000000000..cbebb4908a
--- /dev/null
+++ b/deepmd/dpmodel/output_def.py
@@ -0,0 +1,501 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+from enum import (
+    IntEnum,
+)
+from typing import (
+    Dict,
+    List,
+    Tuple,
+)
+
+
+def check_shape(
+    shape: List[int],
+    def_shape: List[int],
+):
+    """Check if the shape satisfies the defined shape."""
+    assert len(shape) == len(def_shape)
+    if def_shape[-1] == -1:
+        if list(shape[:-1]) != def_shape[:-1]:
+            raise ValueError(f"{shape[:-1]} shape not matching def {def_shape[:-1]}")
+    else:
+        if list(shape) != def_shape:
+            raise ValueError(f"{shape} shape not matching def {def_shape}")
+
+
+def check_var(var, var_def):
+    if var_def.atomic:
+        # var.shape == [nf, nloc, *var_def.shape]
+        if len(var.shape) != len(var_def.shape) + 2:
+            raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}")
+        check_shape(list(var.shape[2:]), var_def.shape)
+    else:
+        # var.shape == [nf, *var_def.shape]
+        if len(var.shape) != len(var_def.shape) + 1:
+            raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}")
+        check_shape(list(var.shape[1:]), var_def.shape)
+
+
+def model_check_output(cls):
+    """Check if the output of the Model is consistent with the definition.
+
+    Two methods are assumed to be provided by the Model:
+    1. Model.output_def that gives the output definition.
+    2. Model.__call__ that defines the forward path of the model.
+
+    """
+
+    @functools.wraps(cls, updated=())
+    class wrapper(cls):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            self.md = self.output_def()
+
+        def __call__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            ret = cls.__call__(self, *args, **kwargs)
+            for kk in self.md.keys_outp():
+                dd = self.md[kk]
+                check_var(ret[kk], dd)
+                if dd.reduciable:
+                    rk = get_reduce_name(kk)
+                    check_var(ret[rk], self.md[rk])
+                if dd.r_differentiable:
+                    dnr, dnc = get_deriv_name(kk)
+                    check_var(ret[dnr], self.md[dnr])
+                if dd.c_differentiable:
+                    assert dd.r_differentiable
+                    check_var(ret[dnc], self.md[dnc])
+            return ret
+
+    return wrapper
+
+
+def fitting_check_output(cls):
+    """Check if the output of the Fitting is consistent with the definition.
+
+    Two methods are assumed to be provided by the Fitting:
+    1. Fitting.output_def that gives the output definition.
+    2. Fitting.__call__ defines the forward path of the fitting.
+
+    """
+
+    @functools.wraps(cls, updated=())
+    class wrapper(cls):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            self.md = self.output_def()
+
+        def __call__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            ret = cls.__call__(self, *args, **kwargs)
+            for kk in self.md.keys():
+                dd = self.md[kk]
+                check_var(ret[kk], dd)
+            return ret
+
+    return wrapper
+
+
+class OutputVariableOperation(IntEnum):
+    """Defines the operation of the output variable."""
+
+    _NONE = 0
+    """No operation."""
+    REDU = 1
+    """Reduce the output variable."""
+    DERV_R = 2
+    """Derivative w.r.t. coordinates."""
+    DERV_C = 4
+    """Derivative w.r.t. cell."""
+    _SEC_DERV_R = 8
+    """Second derivative w.r.t. coordinates."""
+    MAG = 16
+    """Magnetic output."""
+
+
+class OutputVariableCategory(IntEnum):
+    """Defines the category of the output variable."""
+
+    OUT = OutputVariableOperation._NONE
+    """Output variable. (e.g. atom energy)"""
+    REDU = OutputVariableOperation.REDU
+    """Reduced output variable. (e.g. system energy)"""
+    DERV_R = OutputVariableOperation.DERV_R
+    """Negative derivative w.r.t. coordinates. (e.g. force)"""
+    DERV_C = OutputVariableOperation.DERV_C
+    """Atomic component of the virial, see PRB 104, 224202 (2021)  """
+    DERV_C_REDU = OutputVariableOperation.DERV_C | OutputVariableOperation.REDU
+    """Virial, the transposed negative gradient with cell tensor times cell tensor, see eq 40 JCP 159, 054801 (2023). """
+    DERV_R_DERV_R = OutputVariableOperation.DERV_R | OutputVariableOperation._SEC_DERV_R
+    """Hession matrix, the second derivative w.r.t. coordinates."""
+    DERV_R_MAG = OutputVariableOperation.DERV_R | OutputVariableOperation.MAG
+    """Magnetic part of negative derivative w.r.t. coordinates. (e.g. magnetic force)"""
+    DERV_C_MAG = OutputVariableOperation.DERV_C | OutputVariableOperation.MAG
+    """Magnetic part of atomic component of the virial."""
+
+
+class OutputVariableDef:
+    """Defines the shape and other properties of the one output variable.
+
+    It is assume that the fitting network output variables for each
+    local atom. This class defines one output variable, including its
+    name, shape, reducibility and differentiability.
+
+    Parameters
+    ----------
+    name
+          Name of the output variable. Notice that the xxxx_redu,
+          xxxx_derv_c, xxxx_derv_r are reserved names that should
+          not be used to define variables.
+    shape
+          The shape of the variable. e.g. energy should be [1],
+          dipole should be [3], polarizabilty should be [3,3].
+    reduciable
+          If the variable is reduced.
+    r_differentiable
+          If the variable is differentiated with respect to coordinates
+          of atoms. Only reduciable variable are differentiable.
+          Negative derivative w.r.t. coordinates will be calcualted. (e.g. force)
+    c_differentiable
+          If the variable is differentiated with respect to the
+          cell tensor (pbc case). Only reduciable variable
+          are differentiable.
+          Virial, the transposed negative gradient with cell tensor times
+          cell tensor, will be calculated, see eq 40 JCP 159, 054801 (2023).
+    atomic : bool
+          If the variable is defined for each atom.
+    category : int
+          The category of the output variable.
+    r_hessian : bool
+          If hessian is requred
+    magnetic : bool
+          If the derivatives of variable have magnetic parts.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        shape: List[int],
+        reduciable: bool = False,
+        r_differentiable: bool = False,
+        c_differentiable: bool = False,
+        atomic: bool = True,
+        category: int = OutputVariableCategory.OUT.value,
+        r_hessian: bool = False,
+        magnetic: bool = False,
+    ):
+        self.name = name
+        self.shape = list(shape)
+        # jit doesn't support math.prod(self.shape)
+        self.output_size = 1
+        len_shape = len(self.shape)
+        for i in range(len_shape):
+            self.output_size *= self.shape[i]
+        self.atomic = atomic
+        self.reduciable = reduciable
+        self.r_differentiable = r_differentiable
+        self.c_differentiable = c_differentiable
+        if self.c_differentiable and not self.r_differentiable:
+            raise ValueError("c differentiable requires r_differentiable")
+        if self.reduciable and not self.atomic:
+            raise ValueError("a reduciable variable should be atomic")
+        self.category = category
+        self.r_hessian = r_hessian
+        self.magnetic = magnetic
+        if self.r_hessian:
+            if not self.reduciable:
+                raise ValueError("only reduciable variable can calculate hessian")
+            if not self.r_differentiable:
+                raise ValueError("only r_differentiable variable can calculate hessian")
+
+
+class FittingOutputDef:
+    """Defines the shapes and other properties of the fitting network outputs.
+
+    It is assume that the fitting network output variables for each
+    local atom. This class defines all the outputs.
+
+    Parameters
+    ----------
+    var_defs
+          List of output variable definitions.
+
+    """
+
+    def __init__(
+        self,
+        var_defs: List[OutputVariableDef],
+    ):
+        self.var_defs = {vv.name: vv for vv in var_defs}
+
+    def __getitem__(
+        self,
+        key: str,
+    ) -> OutputVariableDef:
+        return self.var_defs[key]
+
+    def get_data(self) -> Dict[str, OutputVariableDef]:
+        return self.var_defs
+
+    def keys(self):
+        return self.var_defs.keys()
+
+
+class ModelOutputDef:
+    """Defines the shapes and other properties of the model outputs.
+
+    The model reduce and differentiate fitting outputs if applicable.
+    If a variable is named by foo, then the reduced variable is called
+    foo_redu, the derivative w.r.t. coordinates is called foo_derv_r
+    and the derivative w.r.t. cell is called foo_derv_c.
+
+    Parameters
+    ----------
+    fit_defs
+          Definition for the fitting net output
+
+    """
+
+    def __init__(
+        self,
+        fit_defs: FittingOutputDef,
+    ):
+        self.def_outp = fit_defs
+        self.def_redu = do_reduce(self.def_outp.get_data())
+        self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp.get_data())
+        self.def_hess_r, _ = do_derivative(self.def_derv_r)
+        self.def_derv_c_redu = do_reduce(self.def_derv_c)
+        self.def_mask = do_mask(self.def_outp.get_data())
+        self.var_defs: Dict[str, OutputVariableDef] = {}
+        for ii in [
+            self.def_outp.get_data(),
+            self.def_redu,
+            self.def_derv_c,
+            self.def_derv_r,
+            self.def_derv_c_redu,
+            self.def_hess_r,
+            self.def_mask,
+        ]:
+            self.var_defs.update(ii)
+
+    def __getitem__(
+        self,
+        key: str,
+    ) -> OutputVariableDef:
+        return self.var_defs[key]
+
+    def get_data(
+        self,
+        key: str,
+    ) -> Dict[str, OutputVariableDef]:
+        return self.var_defs
+
+    def keys(self):
+        return self.var_defs.keys()
+
+    def keys_outp(self):
+        return self.def_outp.keys()
+
+    def keys_redu(self):
+        return self.def_redu.keys()
+
+    def keys_derv_r(self):
+        return self.def_derv_r.keys()
+
+    def keys_hess_r(self):
+        return self.def_hess_r.keys()
+
+    def keys_derv_c(self):
+        return self.def_derv_c.keys()
+
+    def keys_derv_c_redu(self):
+        return self.def_derv_c_redu.keys()
+
+
+def get_reduce_name(name: str) -> str:
+    return name + "_redu"
+
+
+def get_deriv_name(name: str) -> Tuple[str, str]:
+    return name + "_derv_r", name + "_derv_c"
+
+
+def get_deriv_name_mag(name: str) -> Tuple[str, str]:
+    return name + "_derv_r_mag", name + "_derv_c_mag"
+
+
+def get_hessian_name(name: str) -> str:
+    return name + "_derv_r_derv_r"
+
+
+def apply_operation(var_def: OutputVariableDef, op: OutputVariableOperation) -> int:
+    """Apply an operation to the category of a variable definition.
+
+    Parameters
+    ----------
+    var_def : OutputVariableDef
+        The variable definition.
+    op : OutputVariableOperation
+        The operation to be applied.
+
+    Returns
+    -------
+    int
+        The new category of the variable definition.
+
+    Raises
+    ------
+    ValueError
+        If the operation has been applied to the variable definition,
+        and exceed the maximum limitation.
+    """
+    if op == OutputVariableOperation.REDU or op == OutputVariableOperation.DERV_C:
+        if check_operation_applied(var_def, op):
+            raise ValueError(f"operation {op} has been applied")
+    elif op == OutputVariableOperation.DERV_R:
+        if check_operation_applied(var_def, OutputVariableOperation.DERV_R):
+            op = OutputVariableOperation._SEC_DERV_R
+            if check_operation_applied(var_def, OutputVariableOperation._SEC_DERV_R):
+                raise ValueError(f"operation {op} has been applied twice")
+    else:
+        raise ValueError(f"operation {op} not supported")
+    return var_def.category | op.value
+
+
+def check_operation_applied(
+    var_def: OutputVariableDef, op: OutputVariableOperation
+) -> bool:
+    """Check if a operation has been applied to a variable definition.
+
+    Parameters
+    ----------
+    var_def : OutputVariableDef
+        The variable definition.
+    op : OutputVariableOperation
+        The operation to be checked.
+
+    Returns
+    -------
+    bool
+        True if the operation has been applied, False otherwise.
+    """
+    return var_def.category & op.value == op.value
+
+
+def do_reduce(
+    def_outp_data: Dict[str, OutputVariableDef],
+) -> Dict[str, OutputVariableDef]:
+    def_redu: Dict[str, OutputVariableDef] = {}
+    for kk, vv in def_outp_data.items():
+        if vv.reduciable:
+            rk = get_reduce_name(kk)
+            def_redu[rk] = OutputVariableDef(
+                rk,
+                vv.shape,
+                reduciable=False,
+                r_differentiable=False,
+                c_differentiable=False,
+                atomic=False,
+                category=apply_operation(vv, OutputVariableOperation.REDU),
+            )
+    return def_redu
+
+
+def do_mask(
+    def_outp_data: Dict[str, OutputVariableDef],
+) -> Dict[str, OutputVariableDef]:
+    def_mask: Dict[str, OutputVariableDef] = {}
+    # for deep eval when has atomic mask
+    def_mask["mask"] = OutputVariableDef(
+        name="mask",
+        shape=[1],
+        reduciable=False,
+        r_differentiable=False,
+        c_differentiable=False,
+    )
+    for kk, vv in def_outp_data.items():
+        if vv.magnetic:
+            # for deep eval when has atomic mask for magnetic atoms
+            def_mask["mask_mag"] = OutputVariableDef(
+                name="mask_mag",
+                shape=[1],
+                reduciable=False,
+                r_differentiable=False,
+                c_differentiable=False,
+            )
+    return def_mask
+
+
+def do_derivative(
+    def_outp_data: Dict[str, OutputVariableDef],
+) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]:
+    def_derv_r: Dict[str, OutputVariableDef] = {}
+    def_derv_c: Dict[str, OutputVariableDef] = {}
+    for kk, vv in def_outp_data.items():
+        rkr, rkc = get_deriv_name(kk)
+        rkrm, rkcm = get_deriv_name_mag(kk)
+        if vv.r_differentiable:
+            def_derv_r[rkr] = OutputVariableDef(
+                rkr,
+                vv.shape + [3],  # noqa: RUF005
+                reduciable=False,
+                r_differentiable=(
+                    vv.r_hessian and vv.category == OutputVariableCategory.OUT.value
+                ),
+                c_differentiable=False,
+                atomic=True,
+                category=apply_operation(vv, OutputVariableOperation.DERV_R),
+            )
+            if vv.magnetic:
+                def_derv_r[rkrm] = OutputVariableDef(
+                    rkrm,
+                    vv.shape + [3],  # noqa: RUF005
+                    reduciable=False,
+                    r_differentiable=(
+                        vv.r_hessian and vv.category == OutputVariableCategory.OUT.value
+                    ),
+                    c_differentiable=False,
+                    atomic=True,
+                    category=apply_operation(vv, OutputVariableOperation.DERV_R),
+                    magnetic=True,
+                )
+
+        if vv.c_differentiable:
+            assert vv.r_differentiable
+            def_derv_c[rkc] = OutputVariableDef(
+                rkc,
+                vv.shape + [9],  # noqa: RUF005
+                reduciable=True,
+                r_differentiable=False,
+                c_differentiable=False,
+                atomic=True,
+                category=apply_operation(vv, OutputVariableOperation.DERV_C),
+            )
+            if vv.magnetic:
+                def_derv_r[rkcm] = OutputVariableDef(
+                    rkcm,
+                    vv.shape + [9],  # noqa: RUF005
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                    atomic=True,
+                    category=apply_operation(vv, OutputVariableOperation.DERV_C),
+                    magnetic=True,
+                )
+    return def_derv_r, def_derv_c
diff --git a/deepmd_utils/model_format/__init__.py b/deepmd/dpmodel/utils/__init__.py
similarity index 52%
rename from deepmd_utils/model_format/__init__.py
rename to deepmd/dpmodel/utils/__init__.py
index 253bca3507..60a4486d52 100644
--- a/deepmd_utils/model_format/__init__.py
+++ b/deepmd/dpmodel/utils/__init__.py
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from .common import (
-    DEFAULT_PRECISION,
-    PRECISION_DICT,
-    NativeOP,
-)
 from .env_mat import (
     EnvMat,
 )
+from .exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
 from .network import (
     EmbeddingNet,
     FittingNet,
@@ -20,21 +19,21 @@
     save_dp_model,
     traverse_model_dict,
 )
-from .output_def import (
-    FittingOutputDef,
-    ModelOutputDef,
-    OutputVariableDef,
-    fitting_check_output,
-    get_deriv_name,
-    get_reduce_name,
-    model_check_output,
+from .nlist import (
+    build_multiple_neighbor_list,
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    get_multiple_nlist_key,
+    nlist_distinguish_types,
 )
-from .se_e2_a import (
-    DescrptSeA,
+from .region import (
+    inter2phys,
+    normalize_coord,
+    phys2inter,
+    to_face_distance,
 )
 
 __all__ = [
-    "DescrptSeA",
     "EnvMat",
     "make_multilayer_network",
     "make_embedding_network",
@@ -44,17 +43,20 @@
     "NativeLayer",
     "NativeNet",
     "NetworkCollection",
-    "NativeOP",
     "load_dp_model",
     "save_dp_model",
     "traverse_model_dict",
     "PRECISION_DICT",
     "DEFAULT_PRECISION",
-    "ModelOutputDef",
-    "FittingOutputDef",
-    "OutputVariableDef",
-    "model_check_output",
-    "fitting_check_output",
-    "get_reduce_name",
-    "get_deriv_name",
+    "build_neighbor_list",
+    "nlist_distinguish_types",
+    "get_multiple_nlist_key",
+    "build_multiple_neighbor_list",
+    "extend_coord_with_ghosts",
+    "normalize_coord",
+    "inter2phys",
+    "phys2inter",
+    "to_face_distance",
+    "AtomExcludeMask",
+    "PairExcludeMask",
 ]
diff --git a/deepmd/dpmodel/utils/batch_size.py b/deepmd/dpmodel/utils/batch_size.py
new file mode 100644
index 0000000000..ec9503f3b1
--- /dev/null
+++ b/deepmd/dpmodel/utils/batch_size.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
+
+
+class AutoBatchSize(AutoBatchSizeBase):
+    """Automatic batch size for NumPy."""
+
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+        return False
+
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+        """
+        # NumPy never export numpy.core._exceptions.MemoryError
+        return False
diff --git a/deepmd_utils/model_format/env_mat.py b/deepmd/dpmodel/utils/env_mat.py
similarity index 66%
rename from deepmd_utils/model_format/env_mat.py
rename to deepmd/dpmodel/utils/env_mat.py
index 7822bd7d0c..0c2ca43c40 100644
--- a/deepmd_utils/model_format/env_mat.py
+++ b/deepmd/dpmodel/utils/env_mat.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from .common import (
+from deepmd.dpmodel import (
     NativeOP,
 )
 
@@ -17,6 +17,8 @@ def compute_smooth_weight(
     rmax: float,
 ):
     """Compute smooth weight for descriptor elements."""
+    if rmin >= rmax:
+        raise ValueError("rmin should be less than rmax.")
     min_mask = distance <= rmin
     max_mask = distance >= rmax
     mid_mask = np.logical_not(np.logical_or(min_mask, max_mask))
@@ -30,6 +32,8 @@ def _make_env_mat(
     coord,
     rcut: float,
     ruct_smth: float,
+    radial_only: bool = False,
+    protection: float = 0.0,
 ):
     """Make smooth environment matrix."""
     nf, nloc, nnei = nlist.shape
@@ -50,11 +54,15 @@ def _make_env_mat(
     length = np.linalg.norm(diff, axis=-1, keepdims=True)
     # for index 0 nloc atom
     length = length + ~np.expand_dims(mask, -1)
-    t0 = 1 / length
-    t1 = diff / length**2
+    t0 = 1 / (length + protection)
+    t1 = diff / (length + protection) ** 2
     weight = compute_smooth_weight(length, ruct_smth, rcut)
-    env_mat_se_a = np.concatenate([t0, t1], axis=-1) * weight * np.expand_dims(mask, -1)
-    return env_mat_se_a, diff * np.expand_dims(mask, -1), weight
+    weight = weight * np.expand_dims(mask, -1)
+    if radial_only:
+        env_mat = t0 * weight
+    else:
+        env_mat = np.concatenate([t0, t1], axis=-1) * weight
+    return env_mat, diff * np.expand_dims(mask, -1), weight
 
 
 class EnvMat(NativeOP):
@@ -62,9 +70,11 @@ def __init__(
         self,
         rcut,
         rcut_smth,
+        protection: float = 0.0,
     ):
         self.rcut = rcut
         self.rcut_smth = rcut_smth
+        self.protection = protection
 
     def call(
         self,
@@ -73,6 +83,7 @@ def call(
         nlist: np.ndarray,
         davg: Optional[np.ndarray] = None,
         dstd: Optional[np.ndarray] = None,
+        radial_only: bool = False,
     ) -> Union[np.ndarray, np.ndarray]:
         """Compute the environment matrix.
 
@@ -85,18 +96,23 @@ def call(
         atype_ext
             The extended aotm types. shape: nf x nall
         davg
-            The data avg. shape: nt x nnei x 4
+            The data avg. shape: nt x nnei x (4 or 1)
         dstd
-            The inverse of data std. shape: nt x nnei x 4
+            The inverse of data std. shape: nt x nnei x (4 or 1)
+        radial_only
+            Whether to only compute radial part of the environment matrix.
+            If True, the output will be of shape nf x nloc x nnei x 1.
+            Otherwise, the output will be of shape nf x nloc x nnei x 4.
+            Default: False.
 
         Returns
         -------
         env_mat
-            The environment matrix. shape: nf x nloc x nnei x 4
+            The environment matrix. shape: nf x nloc x nnei x (4 or 1)
         switch
             The value of switch function. shape: nf x nloc x nnei
         """
-        em, sw = self._call(nlist, coord_ext)
+        em, sw = self._call(nlist, coord_ext, radial_only)
         nf, nloc, nnei = nlist.shape
         atype = atype_ext[:, :nloc]
         if davg is not None:
@@ -105,12 +121,15 @@ def call(
             em /= dstd[atype]
         return em, sw
 
-    def _call(
-        self,
-        nlist,
-        coord_ext,
-    ):
-        em, diff, ww = _make_env_mat(nlist, coord_ext, self.rcut, self.rcut_smth)
+    def _call(self, nlist, coord_ext, radial_only):
+        em, diff, ww = _make_env_mat(
+            nlist,
+            coord_ext,
+            self.rcut,
+            self.rcut_smth,
+            radial_only=radial_only,
+            protection=self.protection,
+        )
         return em, ww
 
     def serialize(
diff --git a/deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/utils/exclude_mask.py
new file mode 100644
index 0000000000..ff668b8153
--- /dev/null
+++ b/deepmd/dpmodel/utils/exclude_mask.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Tuple,
+)
+
+import numpy as np
+
+
+class AtomExcludeMask:
+    """Computes the type exclusion mask for atoms."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: List[int] = [],
+    ):
+        self.ntypes = ntypes
+        self.exclude_types = exclude_types
+        self.type_mask = np.array(
+            [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)],
+            dtype=np.int32,
+        )
+        # (ntypes)
+        self.type_mask = self.type_mask.reshape([-1])
+
+    def get_exclude_types(self):
+        return self.exclude_types
+
+    def get_type_mask(self):
+        return self.type_mask
+
+    def build_type_exclude_mask(
+        self,
+        atype: np.ndarray,
+    ):
+        """Compute type exclusion mask for atoms.
+
+        Parameters
+        ----------
+        atype
+            The extended aotm types. shape: nf x natom
+
+        Returns
+        -------
+        mask
+            The type exclusion mask for atoms. shape: nf x natom
+            Element [ff,ii] being 0 if type(ii) is excluded,
+            otherwise being 1.
+
+        """
+        nf, natom = atype.shape
+        return self.type_mask[atype].reshape(nf, natom)
+
+
+class PairExcludeMask:
+    """Computes the type exclusion mask for atom pairs."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.ntypes = ntypes
+        self.exclude_types = set()
+        for tt in exclude_types:
+            assert len(tt) == 2
+            self.exclude_types.add((tt[0], tt[1]))
+            self.exclude_types.add((tt[1], tt[0]))
+        # ntypes + 1 for nlist masks
+        self.type_mask = np.array(
+            [
+                [
+                    1 if (tt_i, tt_j) not in self.exclude_types else 0
+                    for tt_i in range(ntypes + 1)
+                ]
+                for tt_j in range(ntypes + 1)
+            ],
+            dtype=np.int32,
+        )
+        # (ntypes+1 x ntypes+1)
+        self.type_mask = self.type_mask.reshape([-1])
+
+    def get_exclude_types(self):
+        return self.exclude_types
+
+    def build_type_exclude_mask(
+        self,
+        nlist: np.ndarray,
+        atype_ext: np.ndarray,
+    ):
+        """Compute type exclusion mask for atom pairs.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        atype_ext
+            The extended aotm types. shape: nf x nall
+
+        Returns
+        -------
+        mask
+            The type exclusion mask for pair atoms of shape: nf x nloc x nnei.
+            Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded,
+            otherwise being 1.
+
+        """
+        if len(self.exclude_types) == 0:
+            # safely return 1 if nothing is excluded.
+            return np.ones_like(nlist, dtype=np.int32)
+        nf, nloc, nnei = nlist.shape
+        nall = atype_ext.shape[1]
+        # add virtual atom of type ntypes. nf x nall+1
+        ae = np.concatenate(
+            [atype_ext, self.ntypes * np.ones([nf, 1], dtype=atype_ext.dtype)], axis=-1
+        )
+        type_i = atype_ext[:, :nloc].reshape(nf, nloc) * (self.ntypes + 1)
+        # nf x nloc x nnei
+        index = np.where(nlist == -1, nall, nlist).reshape(nf, nloc * nnei)
+        type_j = np.take_along_axis(ae, index, axis=1).reshape(nf, nloc, nnei)
+        type_ij = type_i[:, :, None] + type_j
+        # nf x (nloc x nnei)
+        type_ij = type_ij.reshape(nf, nloc * nnei)
+        mask = self.type_mask[type_ij].reshape(nf, nloc, nnei)
+        return mask
+
+    def __contains__(self, item):
+        return item in self.exclude_types
diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py
new file mode 100644
index 0000000000..96b39d20ad
--- /dev/null
+++ b/deepmd/dpmodel/utils/neighbor_stat.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Iterator,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    NativeOP,
+)
+from deepmd.dpmodel.utils.nlist import (
+    extend_coord_with_ghosts,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat
+
+
+class NeighborStatOP(NativeOP):
+    """Class for getting neighbor statics data information.
+
+    Parameters
+    ----------
+    ntypes
+        The num of atom types
+    rcut
+        The cut-off radius
+    mixed_types : bool, optional
+        If True, treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_types: bool,
+    ) -> None:
+        self.rcut = rcut
+        self.ntypes = ntypes
+        self.mixed_types = mixed_types
+
+    def call(
+        self,
+        coord: np.ndarray,
+        atype: np.ndarray,
+        cell: Optional[np.ndarray],
+    ) -> Tuple[float, np.ndarray]:
+        """Calculate the neareest neighbor distance between atoms, maximum nbor size of
+        atoms and the output data range of the environment matrix.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+
+        Returns
+        -------
+        float
+            The minimal squared distance between two atoms
+        np.ndarray
+            The maximal number of neighbors
+        """
+        nframes = coord.shape[0]
+        coord = coord.reshape(nframes, -1, 3)
+        nloc = coord.shape[1]
+        coord = coord.reshape(nframes, nloc * 3)
+        extend_coord, extend_atype, _ = extend_coord_with_ghosts(
+            coord, atype, cell, self.rcut
+        )
+
+        coord1 = extend_coord.reshape(nframes, -1)
+        nall = coord1.shape[1] // 3
+        coord0 = coord1[:, : nloc * 3]
+        diff = (
+            coord1.reshape([nframes, -1, 3])[:, None, :, :]
+            - coord0.reshape([nframes, -1, 3])[:, :, None, :]
+        )
+        assert list(diff.shape) == [nframes, nloc, nall, 3]
+        # remove the diagonal elements
+        mask = np.eye(nloc, nall, dtype=bool)
+        diff[:, mask] = np.inf
+        rr2 = np.sum(np.square(diff), axis=-1)
+        min_rr2 = np.min(rr2, axis=-1)
+        # count the number of neighbors
+        if not self.mixed_types:
+            mask = rr2 < self.rcut**2
+            nnei = np.zeros((nframes, nloc, self.ntypes), dtype=int)
+            for ii in range(self.ntypes):
+                nnei[:, :, ii] = np.sum(
+                    mask & (extend_atype == ii)[:, None, :], axis=-1
+                )
+        else:
+            mask = rr2 < self.rcut**2
+            # virtual type (<0) are not counted
+            nnei = np.sum(mask & (extend_atype >= 0)[:, None, :], axis=-1).reshape(
+                nframes, nloc, 1
+            )
+        max_nnei = np.max(nnei, axis=1)
+        return min_rr2, max_nnei
+
+
+class NeighborStat(BaseNeighborStat):
+    """Neighbor statistics using pure NumPy.
+
+    Parameters
+    ----------
+    ntypes : int
+        The num of atom types
+    rcut : float
+        The cut-off radius
+    mixed_type : bool, optional, default=False
+        Treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_type: bool = False,
+    ) -> None:
+        super().__init__(ntypes, rcut, mixed_type)
+        self.op = NeighborStatOP(ntypes, rcut, mixed_type)
+
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+        """Abstract method for producing data.
+
+        Yields
+        ------
+        np.ndarray
+            The maximal number of neighbors
+        float
+            The squared minimal distance between two atoms
+        str
+            The directory of the data system
+        """
+        for ii in range(len(data.system_dirs)):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]
+                data_set_data = data_set._load_set(jj)
+                minrr2, max_nnei = self.op(
+                    data_set_data["coord"],
+                    data_set_data["type"],
+                    data_set_data["box"] if data_set.pbc else None,
+                )
+                yield np.max(max_nnei, axis=0), np.min(minrr2), jj
diff --git a/deepmd_utils/model_format/network.py b/deepmd/dpmodel/utils/network.py
similarity index 83%
rename from deepmd_utils/model_format/network.py
rename to deepmd/dpmodel/utils/network.py
index 71ed659787..661358ed70 100644
--- a/deepmd_utils/model_format/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -3,10 +3,15 @@
 
 See issue #2982 for more information.
 """
+
 import copy
 import itertools
 import json
+from datetime import (
+    datetime,
+)
 from typing import (
+    Callable,
     ClassVar,
     Dict,
     List,
@@ -17,12 +22,16 @@
 import h5py
 import numpy as np
 
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
 try:
-    from deepmd_utils._version import version as __version__
+    from deepmd._version import version as __version__
 except ImportError:
     __version__ = "unknown"
 
-from .common import (
+from deepmd.dpmodel import (
     DEFAULT_PRECISION,
     PRECISION_DICT,
     NativeOP,
@@ -54,6 +63,8 @@ def traverse_model_dict(model_obj, callback: callable, is_variable: bool = False
     elif isinstance(model_obj, list):
         for ii, vv in enumerate(model_obj):
             model_obj[ii] = traverse_model_dict(vv, callback, is_variable=is_variable)
+    elif model_obj is None:
+        return model_obj
     elif is_variable:
         model_obj = callback(model_obj)
     return model_obj
@@ -79,7 +90,9 @@ def __call__(self):
         return self.count
 
 
-def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] = None):
+# TODO: move save_dp_model and load_dp_model to a seperated module
+# should be moved to otherwhere...
+def save_dp_model(filename: str, model_dict: dict) -> None:
     """Save a DP model to a file in the native format.
 
     Parameters
@@ -88,15 +101,9 @@ def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] =
         The filename to save to.
     model_dict : dict
         The model dict to save.
-    extra_info : dict, optional
-        Extra meta information to save.
     """
     model_dict = model_dict.copy()
     variable_counter = Counter()
-    if extra_info is not None:
-        extra_info = extra_info.copy()
-    else:
-        extra_info = {}
     with h5py.File(filename, "w") as f:
         model_dict = traverse_model_dict(
             model_dict,
@@ -105,10 +112,11 @@ def save_dp_model(filename: str, model_dict: dict, extra_info: Optional[dict] =
             ).name,
         )
         save_dict = {
-            "model": model_dict,
             "software": "deepmd-kit",
             "version": __version__,
-            **extra_info,
+            # use UTC+0 time
+            "time": str(datetime.utcnow()),
+            **model_dict,
         }
         f.attrs["json"] = json.dumps(save_dict, separators=(",", ":"))
 
@@ -161,6 +169,8 @@ def __init__(
     ) -> None:
         prec = PRECISION_DICT[precision.lower()]
         self.precision = precision
+        # only use_timestep when skip connection is established.
+        use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2)
         rng = np.random.default_rng()
         self.w = rng.normal(size=(num_in, num_out)).astype(prec)
         self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None
@@ -186,11 +196,14 @@ def serialize(self) -> dict:
             "idt": self.idt,
         }
         return {
+            "@class": "Layer",
+            "@version": 1,
             "bias": self.b is not None,
             "use_timestep": self.idt is not None,
             "activation_function": self.activation_function,
             "resnet": self.resnet,
-            "precision": self.precision,
+            # make deterministic
+            "precision": np.dtype(PRECISION_DICT[self.precision]).name,
             "@variables": data,
         }
 
@@ -204,6 +217,8 @@ def deserialize(cls, data: dict) -> "NativeLayer":
             The dict to deserialize from.
         """
         data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
         variables = data.pop("@variables")
         assert variables["w"] is not None and len(variables["w"].shape) == 2
         num_in, num_out = variables["w"].shape
@@ -217,6 +232,10 @@ def deserialize(cls, data: dict) -> "NativeLayer":
             variables.get("b", None),
             variables.get("idt", None),
         )
+        if obj.b is not None:
+            obj.b = obj.b.ravel()
+        if obj.idt is not None:
+            obj.idt = obj.idt.ravel()
         obj.check_shape_consistency()
         return obj
 
@@ -297,14 +316,7 @@ def call(self, x: np.ndarray) -> np.ndarray:
         """
         if self.w is None or self.activation_function is None:
             raise ValueError("w, b, and activation_function must be set")
-        if self.activation_function == "tanh":
-            fn = np.tanh
-        elif self.activation_function.lower() == "none":
-
-            def fn(x):
-                return x
-        else:
-            raise NotImplementedError(self.activation_function)
+        fn = get_activation_fn(self.activation_function)
         y = (
             np.matmul(x, self.w) + self.b
             if self.b is not None
@@ -320,6 +332,55 @@ def fn(x):
         return y
 
 
+def get_activation_fn(activation_function: str) -> Callable[[np.ndarray], np.ndarray]:
+    activation_function = activation_function.lower()
+    if activation_function == "tanh":
+        return np.tanh
+    elif activation_function == "relu":
+
+        def fn(x):
+            # https://stackoverflow.com/a/47936476/9567349
+            return x * (x > 0)
+
+        return fn
+    elif activation_function in ("gelu", "gelu_tf"):
+
+        def fn(x):
+            # generated by GitHub Copilot
+            return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
+
+        return fn
+    elif activation_function == "relu6":
+
+        def fn(x):
+            # generated by GitHub Copilot
+            return np.minimum(np.maximum(x, 0), 6)
+
+        return fn
+    elif activation_function == "softplus":
+
+        def fn(x):
+            # generated by GitHub Copilot
+            return np.log(1 + np.exp(x))
+
+        return fn
+    elif activation_function == "sigmoid":
+
+        def fn(x):
+            # generated by GitHub Copilot
+            return 1 / (1 + np.exp(-x))
+
+        return fn
+    elif activation_function.lower() in ("none", "linear"):
+
+        def fn(x):
+            return x
+
+        return fn
+    else:
+        raise NotImplementedError(activation_function)
+
+
 def make_multilayer_network(T_NetworkLayer, ModuleBase):
     class NN(ModuleBase):
         """Native representation of a neural network.
@@ -345,7 +406,11 @@ def serialize(self) -> dict:
             dict
                 The serialized network.
             """
-            return {"layers": [layer.serialize() for layer in self.layers]}
+            return {
+                "@class": "NN",
+                "@version": 1,
+                "layers": [layer.serialize() for layer in self.layers],
+            }
 
         @classmethod
         def deserialize(cls, data: dict) -> "NN":
@@ -356,6 +421,9 @@ def deserialize(cls, data: dict) -> "NN":
             data : dict
                 The dict to deserialize from.
             """
+            data = data.copy()
+            check_version_compatibility(data.pop("@version", 1), 1, 1)
+            data.pop("@class", None)
             return cls(data["layers"])
 
         def __getitem__(self, key):
@@ -392,6 +460,15 @@ def call(self, x):
                 x = layer(x)
             return x
 
+        def clear(self):
+            """Clear the network parameters to zero."""
+            for layer in self.layers:
+                layer.w.fill(0.0)
+                if layer.b is not None:
+                    layer.b.fill(0.0)
+                if layer.idt is not None:
+                    layer.idt.fill(0.0)
+
     return NN
 
 
@@ -458,11 +535,14 @@ def serialize(self) -> dict:
                 The serialized network.
             """
             return {
+                "@class": "EmbeddingNetwork",
+                "@version": 1,
                 "in_dim": self.in_dim,
                 "neuron": self.neuron.copy(),
                 "activation_function": self.activation_function,
                 "resnet_dt": self.resnet_dt,
-                "precision": self.precision,
+                # make deterministic
+                "precision": np.dtype(PRECISION_DICT[self.precision]).name,
                 "layers": [layer.serialize() for layer in self.layers],
             }
 
@@ -476,6 +556,8 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
                 The dict to deserialize from.
             """
             data = copy.deepcopy(data)
+            check_version_compatibility(data.pop("@version", 1), 1, 1)
+            data.pop("@class", None)
             layers = data.pop("layers")
             obj = cls(**data)
             super(EN, obj).__init__(layers)
@@ -528,7 +610,8 @@ def __init__(
                 resnet_dt=resnet_dt,
                 precision=precision,
             )
-            i_in, i_ot = neuron[-1], out_dim
+            i_in = neuron[-1] if len(neuron) > 0 else in_dim
+            i_ot = out_dim
             self.layers.append(
                 T_NetworkLayer(
                     i_in,
@@ -552,6 +635,8 @@ def serialize(self) -> dict:
                 The serialized network.
             """
             return {
+                "@class": "FittingNetwork",
+                "@version": 1,
                 "in_dim": self.in_dim,
                 "out_dim": self.out_dim,
                 "neuron": self.neuron.copy(),
@@ -572,6 +657,8 @@ def deserialize(cls, data: dict) -> "FittingNet":
                 The dict to deserialize from.
             """
             data = copy.deepcopy(data)
+            check_version_compatibility(data.pop("@version", 1), 1, 1)
+            data.pop("@class", None)
             layers = data.pop("layers")
             obj = cls(**data)
             T_Network.__init__(obj, layers)
@@ -674,6 +761,8 @@ def serialize(self) -> dict:
         network_type_map_inv = {v: k for k, v in self.NETWORK_TYPE_MAP.items()}
         network_type_name = network_type_map_inv[self.network_type]
         return {
+            "@class": "NetworkCollection",
+            "@version": 1,
             "ndim": self.ndim,
             "ntypes": self.ntypes,
             "network_type": network_type_name,
@@ -689,4 +778,7 @@ def deserialize(cls, data: dict) -> "NetworkCollection":
         data : dict
             The dict to deserialize from.
         """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
         return cls(**data)
diff --git a/deepmd/dpmodel/utils/nlist.py b/deepmd/dpmodel/utils/nlist.py
new file mode 100644
index 0000000000..ca8b18023b
--- /dev/null
+++ b/deepmd/dpmodel/utils/nlist.py
@@ -0,0 +1,264 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+from .region import (
+    to_face_distance,
+)
+
+
+## translated from torch implemantation by chatgpt
+def build_neighbor_list(
+    coord: np.ndarray,
+    atype: np.ndarray,
+    nloc: int,
+    rcut: float,
+    sel: Union[int, List[int]],
+    distinguish_types: bool = True,
+) -> np.ndarray:
+    """Build neightbor list for a single frame. keeps nsel neighbors.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        exptended coordinates of shape [batch_size, nall x 3]
+    atype : np.ndarray
+        extended atomic types of shape [batch_size, nall]
+        type < 0 the atom is treat as virtual atoms.
+    nloc : int
+        number of local atoms.
+    rcut : float
+        cut-off radius
+    sel : int or List[int]
+        maximal number of neighbors (of each type).
+        if distinguish_types==True, nsel should be list and
+        the length of nsel should be equal to number of
+        types.
+    distinguish_types : bool
+        distinguish different types.
+
+    Returns
+    -------
+    neighbor_list : np.ndarray
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        are stored in an ascending order. If the number of
+        neighbors is less than nsel, the positions are masked
+        with -1. The neighbor list of an atom looks like
+        |------ nsel ------|
+        xx xx xx xx -1 -1 -1
+        if distinguish_types==True and we have two types
+        |---- nsel[0] -----| |---- nsel[1] -----|
+        xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
+
+    """
+    batch_size = coord.shape[0]
+    coord = coord.reshape(batch_size, -1)
+    nall = coord.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    xmax = np.max(coord) + 2.0 * rcut
+    # nf x nall
+    is_vir = atype < 0
+    coord1 = np.where(is_vir[:, :, None], xmax, coord.reshape(-1, nall, 3)).reshape(
+        -1, nall * 3
+    )
+    if isinstance(sel, int):
+        sel = [sel]
+    nsel = sum(sel)
+    coord0 = coord1[:, : nloc * 3]
+    diff = (
+        coord1.reshape([batch_size, -1, 3])[:, None, :, :]
+        - coord0.reshape([batch_size, -1, 3])[:, :, None, :]
+    )
+    assert list(diff.shape) == [batch_size, nloc, nall, 3]
+    rr = np.linalg.norm(diff, axis=-1)
+    # if central atom has two zero distances, sorting sometimes can not exclude itself
+    rr -= np.eye(nloc, nall, dtype=diff.dtype)[np.newaxis, :, :]
+    nlist = np.argsort(rr, axis=-1)
+    rr = np.sort(rr, axis=-1)
+    rr = rr[:, :, 1:]
+    nlist = nlist[:, :, 1:]
+    nnei = rr.shape[2]
+    if nsel <= nnei:
+        rr = rr[:, :, :nsel]
+        nlist = nlist[:, :, :nsel]
+    else:
+        rr = np.concatenate(
+            [rr, np.ones([batch_size, nloc, nsel - nnei]) + rcut], axis=-1
+        )
+        nlist = np.concatenate(
+            [nlist, np.ones([batch_size, nloc, nsel - nnei], dtype=nlist.dtype)],
+            axis=-1,
+        )
+    assert list(nlist.shape) == [batch_size, nloc, nsel]
+    nlist = np.where(np.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist)
+
+    if distinguish_types:
+        return nlist_distinguish_types(nlist, atype, sel)
+    else:
+        return nlist
+
+
+def nlist_distinguish_types(
+    nlist: np.ndarray,
+    atype: np.ndarray,
+    sel: List[int],
+):
+    """Given a nlist that does not distinguish atom types, return a nlist that
+    distinguish atom types.
+
+    """
+    nf, nloc, _ = nlist.shape
+    ret_nlist = []
+    tmp_atype = np.tile(atype[:, None], [1, nloc, 1])
+    mask = nlist == -1
+    tnlist_0 = nlist.copy()
+    tnlist_0[mask] = 0
+    tnlist = np.take_along_axis(tmp_atype, tnlist_0, axis=2).squeeze()
+    tnlist = np.where(mask, -1, tnlist)
+    snsel = tnlist.shape[2]
+    for ii, ss in enumerate(sel):
+        pick_mask = (tnlist == ii).astype(np.int32)
+        sorted_indices = np.argsort(-pick_mask, kind="stable", axis=-1)
+        pick_mask_sorted = -np.sort(-pick_mask, axis=-1)
+        inlist = np.take_along_axis(nlist, sorted_indices, axis=2)
+        inlist = np.where(~pick_mask_sorted.astype(bool), -1, inlist)
+        ret_nlist.append(np.split(inlist, [ss, snsel - ss], axis=-1)[0])
+    ret = np.concatenate(ret_nlist, axis=-1)
+    return ret
+
+
+def get_multiple_nlist_key(rcut: float, nsel: int) -> str:
+    return str(rcut) + "_" + str(nsel)
+
+
+## translated from torch implemantation by chatgpt
+def build_multiple_neighbor_list(
+    coord: np.ndarray,
+    nlist: np.ndarray,
+    rcuts: List[float],
+    nsels: List[int],
+) -> Dict[str, np.ndarray]:
+    """Input one neighbor list, and produce multiple neighbor lists with
+    different cutoff radius and numbers of selection out of it.  The
+    required rcuts and nsels should be smaller or equal to the input nlist.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        exptended coordinates of shape [batch_size, nall x 3]
+    nlist : np.ndarray
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        should be stored in an ascending order.
+    rcuts : List[float]
+        list of cut-off radius in ascending order.
+    nsels : List[int]
+        maximal number of neighbors in ascending order.
+
+    Returns
+    -------
+    nlist_dict : Dict[str, np.ndarray]
+        A dict of nlists, key given by get_multiple_nlist_key(rc, nsel)
+        value being the corresponding nlist.
+
+    """
+    assert len(rcuts) == len(nsels)
+    if len(rcuts) == 0:
+        return {}
+    nb, nloc, nsel = nlist.shape
+    if nsel < nsels[-1]:
+        pad = -1 * np.ones((nb, nloc, nsels[-1] - nsel), dtype=nlist.dtype)
+        nlist = np.concatenate([nlist, pad], axis=-1)
+        nsel = nsels[-1]
+    coord1 = coord.reshape(nb, -1, 3)
+    nall = coord1.shape[1]
+    coord0 = coord1[:, :nloc, :]
+    nlist_mask = nlist == -1
+    tnlist_0 = nlist.copy()
+    tnlist_0[nlist_mask] = 0
+    index = np.tile(tnlist_0.reshape(nb, nloc * nsel, 1), [1, 1, 3])
+    coord2 = np.take_along_axis(coord1, index, axis=1).reshape(nb, nloc, nsel, 3)
+    diff = coord2 - coord0[:, :, None, :]
+    rr = np.linalg.norm(diff, axis=-1)
+    rr = np.where(nlist_mask, float("inf"), rr)
+    nlist0 = nlist
+    ret = {}
+    for rc, ns in zip(rcuts[::-1], nsels[::-1]):
+        tnlist_1 = np.copy(nlist0[:, :, :ns])
+        tnlist_1[rr[:, :, :ns] > rc] = -1
+        ret[get_multiple_nlist_key(rc, ns)] = tnlist_1
+    return ret
+
+
+## translated from torch implemantation by chatgpt
+def extend_coord_with_ghosts(
+    coord: np.ndarray,
+    atype: np.ndarray,
+    cell: Optional[np.ndarray],
+    rcut: float,
+):
+    """Extend the coordinates of the atoms by appending peridoc images.
+    The number of images is large enough to ensure all the neighbors
+    within rcut are appended.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        original coordinates of shape [-1, nloc*3].
+    atype : np.ndarray
+        atom type of shape [-1, nloc].
+    cell : np.ndarray
+        simulation cell tensor of shape [-1, 9].
+    rcut : float
+        the cutoff radius
+
+    Returns
+    -------
+    extended_coord: np.ndarray
+        extended coordinates of shape [-1, nall*3].
+    extended_atype: np.ndarray
+        extended atom type of shape [-1, nall].
+    index_mapping: np.ndarray
+        maping extended index to the local index
+
+    """
+    nf, nloc = atype.shape
+    aidx = np.tile(np.arange(nloc)[np.newaxis, :], (nf, 1))
+    if cell is None:
+        nall = nloc
+        extend_coord = coord.copy()
+        extend_atype = atype.copy()
+        extend_aidx = aidx.copy()
+    else:
+        coord = coord.reshape((nf, nloc, 3))
+        cell = cell.reshape((nf, 3, 3))
+        to_face = to_face_distance(cell)
+        nbuff = np.ceil(rcut / to_face).astype(int)
+        nbuff = np.max(nbuff, axis=0)
+        xi = np.arange(-nbuff[0], nbuff[0] + 1, 1)
+        yi = np.arange(-nbuff[1], nbuff[1] + 1, 1)
+        zi = np.arange(-nbuff[2], nbuff[2] + 1, 1)
+        xyz = np.outer(xi, np.array([1, 0, 0]))[:, np.newaxis, np.newaxis, :]
+        xyz = xyz + np.outer(yi, np.array([0, 1, 0]))[np.newaxis, :, np.newaxis, :]
+        xyz = xyz + np.outer(zi, np.array([0, 0, 1]))[np.newaxis, np.newaxis, :, :]
+        xyz = xyz.reshape(-1, 3)
+        shift_idx = xyz[np.argsort(np.linalg.norm(xyz, axis=1))]
+        ns, _ = shift_idx.shape
+        nall = ns * nloc
+        shift_vec = np.einsum("sd,fdk->fsk", shift_idx, cell)
+        extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :]
+        extend_atype = np.tile(atype[:, :, np.newaxis], (1, ns, 1))
+        extend_aidx = np.tile(aidx[:, :, np.newaxis], (1, ns, 1))
+
+    return (
+        extend_coord.reshape((nf, nall * 3)),
+        extend_atype.reshape((nf, nall)),
+        extend_aidx.reshape((nf, nall)),
+    )
diff --git a/deepmd/dpmodel/utils/region.py b/deepmd/dpmodel/utils/region.py
new file mode 100644
index 0000000000..ddbc4b29b8
--- /dev/null
+++ b/deepmd/dpmodel/utils/region.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+def phys2inter(
+    coord: np.ndarray,
+    cell: np.ndarray,
+) -> np.ndarray:
+    """Convert physical coordinates to internal(direct) coordinates.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        physical coordinates of shape [*, na, 3].
+    cell : np.ndarray
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    inter_coord: np.ndarray
+        the internal coordinates
+
+    """
+    rec_cell = np.linalg.inv(cell)
+    return np.matmul(coord, rec_cell)
+
+
+def inter2phys(
+    coord: np.ndarray,
+    cell: np.ndarray,
+) -> np.ndarray:
+    """Convert internal(direct) coordinates to physical coordinates.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        internal coordinates of shape [*, na, 3].
+    cell : np.ndarray
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    phys_coord: np.ndarray
+        the physical coordinates
+
+    """
+    return np.matmul(coord, cell)
+
+
+def normalize_coord(
+    coord: np.ndarray,
+    cell: np.ndarray,
+) -> np.ndarray:
+    """Apply PBC according to the atomic coordinates.
+
+    Parameters
+    ----------
+    coord : np.ndarray
+        orignal coordinates of shape [*, na, 3].
+    cell : np.ndarray
+        simulation cell shape [*, 3, 3].
+
+    Returns
+    -------
+    wrapped_coord: np.ndarray
+        wrapped coordinates of shape [*, na, 3].
+
+    """
+    icoord = phys2inter(coord, cell)
+    icoord = np.remainder(icoord, 1.0)
+    return inter2phys(icoord, cell)
+
+
+def to_face_distance(
+    cell: np.ndarray,
+) -> np.ndarray:
+    """Compute the to-face-distance of the simulation cell.
+
+    Parameters
+    ----------
+    cell : np.ndarray
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    dist: np.ndarray
+        the to face distances of shape [*, 3]
+
+    """
+    cshape = cell.shape
+    dist = b_to_face_distance(cell.reshape([-1, 3, 3]))
+    return dist.reshape(list(cshape[:-2]) + [3])  # noqa:RUF005
+
+
+def b_to_face_distance(cell):
+    volume = np.linalg.det(cell)
+    c_yz = np.cross(cell[:, 1], cell[:, 2], axis=-1)
+    _h2yz = volume / np.linalg.norm(c_yz, axis=-1)
+    c_zx = np.cross(cell[:, 2], cell[:, 0], axis=-1)
+    _h2zx = volume / np.linalg.norm(c_zx, axis=-1)
+    c_xy = np.cross(cell[:, 0], cell[:, 1], axis=-1)
+    _h2xy = volume / np.linalg.norm(c_xy, axis=-1)
+    return np.stack([_h2yz, _h2zx, _h2xy], axis=1)
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
new file mode 100644
index 0000000000..7527c122f3
--- /dev/null
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.common import (
+    PRECISION_DICT,
+    NativeOP,
+)
+from deepmd.dpmodel.utils.network import (
+    EmbeddingNet,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+class TypeEmbedNet(NativeOP):
+    r"""Type embedding network.
+
+    Parameters
+    ----------
+    ntypes : int
+        Number of atom types
+    neuron : list[int]
+        Number of neurons in each hidden layers of the embedding net
+    resnet_dt
+        Time-step `dt` in the resnet construction: y = x + dt * \phi (Wx + b)
+    activation_function
+        The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+        The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable
+        If the weights of embedding net are trainable.
+    seed
+        Random seed for initializing the network parameters.
+    padding
+        Concat the zero padding to the output, as the default embedding of empty type.
+    """
+
+    def __init__(
+        self,
+        *,
+        ntypes: int,
+        neuron: List[int],
+        resnet_dt: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        padding: bool = False,
+    ) -> None:
+        self.ntypes = ntypes
+        self.neuron = neuron
+        self.seed = seed
+        self.resnet_dt = resnet_dt
+        self.precision = precision
+        self.activation_function = str(activation_function)
+        self.trainable = trainable
+        self.padding = padding
+        self.embedding_net = EmbeddingNet(
+            ntypes,
+            self.neuron,
+            self.activation_function,
+            self.resnet_dt,
+            self.precision,
+        )
+
+    def call(self) -> np.ndarray:
+        """Compute the type embedding network."""
+        embed = self.embedding_net(
+            np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
+        )
+        if self.padding:
+            embed = np.pad(embed, ((0, 1), (0, 0)), mode="constant")
+        return embed
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data_cls = data.pop("@class")
+        assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
+
+        embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        type_embedding_net = cls(**data)
+        type_embedding_net.embedding_net = embedding_net
+        return type_embedding_net
+
+    def serialize(self) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        return {
+            "@class": "TypeEmbedNet",
+            "@version": 1,
+            "ntypes": self.ntypes,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "precision": self.precision,
+            "activation_function": self.activation_function,
+            "trainable": self.trainable,
+            "padding": self.padding,
+            "embedding": self.embedding_net.serialize(),
+        }
diff --git a/deepmd/dpmodel/utils/update_sel.py b/deepmd/dpmodel/utils/update_sel.py
new file mode 100644
index 0000000000..48463b5743
--- /dev/null
+++ b/deepmd/dpmodel/utils/update_sel.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Type,
+)
+
+from deepmd.dpmodel.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.update_sel import (
+    BaseUpdateSel,
+)
+
+
+class UpdateSel(BaseUpdateSel):
+    @property
+    def neighbor_stat(self) -> Type[NeighborStat]:
+        return NeighborStat
+
+    def hook(self, min_nbor_dist, max_nbor_size):
+        # TODO: save to the model in UpdateSel.hook
+        pass
diff --git a/deepmd/driver.py b/deepmd/driver.py
new file mode 100644
index 0000000000..0b48f2ac84
--- /dev/null
+++ b/deepmd/driver.py
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""dpdata driver."""
+
+# Derived from https://github.com/deepmodeling/dpdata/blob/18a0ed5ebced8b1f6887038883d46f31ae9990a4/dpdata/plugins/deepmd.py#L361-L443
+# under LGPL-3.0-or-later license.
+# The original deepmd driver maintained in the dpdata package will be overriden.
+# The class in the dpdata package needs to handle different situations for v1 and v2 interface,
+# which is too complex with the development of deepmd-kit.
+# So, it will be a good idea to ship it with DeePMD-kit itself.
+import dpdata
+from dpdata.utils import (
+    sort_atom_names,
+)
+
+
+@dpdata.driver.Driver.register("dp")
+@dpdata.driver.Driver.register("deepmd")
+@dpdata.driver.Driver.register("deepmd-kit")
+class DPDriver(dpdata.driver.Driver):
+    """DeePMD-kit driver.
+
+    Parameters
+    ----------
+    dp : deepmd.DeepPot or str
+        The deepmd-kit potential class or the filename of the model.
+
+    Examples
+    --------
+    >>> DPDriver("frozen_model.pb")
+    """
+
+    def __init__(self, dp: str) -> None:
+        from deepmd.infer.deep_pot import (
+            DeepPot,
+        )
+
+        if not isinstance(dp, DeepPot):
+            self.dp = DeepPot(dp, auto_batch_size=True)
+        else:
+            self.dp = dp
+
+    def label(self, data: dict) -> dict:
+        """Label a system data by deepmd-kit. Returns new data with energy, forces, and virials.
+
+        Parameters
+        ----------
+        data : dict
+            data with coordinates and atom types
+
+        Returns
+        -------
+        dict
+            labeled data with energies and forces
+        """
+        nframes = data["coords"].shape[0]
+        natoms = data["coords"].shape[1]
+        type_map = self.dp.get_type_map()
+        # important: dpdata type_map may not be the same as the model type_map
+        # note: while we want to change the type_map when feeding to DeepPot,
+        # we don't want to change the type_map in the returned data
+        sorted_data = sort_atom_names(data.copy(), type_map=type_map)
+        atype = sorted_data["atom_types"]
+
+        coord = data["coords"].reshape((nframes, natoms * 3))
+        if "nopbc" not in data:
+            cell = data["cells"].reshape((nframes, 9))
+        else:
+            cell = None
+        e, f, v = self.dp.eval(coord, cell, atype)
+        data = data.copy()
+        data["energies"] = e.reshape((nframes,))
+        data["forces"] = f.reshape((nframes, natoms, 3))
+        data["virials"] = v.reshape((nframes, 3, 3))
+        return data
diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py
index 9c3a8b31e1..6ceb116d85 100644
--- a/deepmd/entrypoints/__init__.py
+++ b/deepmd/entrypoints/__init__.py
@@ -1,48 +1 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Submodule that contains all the DeePMD-Kit entry point scripts."""
-
-from ..infer.model_devi import (
-    make_model_devi,
-)
-from .compress import (
-    compress,
-)
-from .convert import (
-    convert,
-)
-from .doc import (
-    doc_train_input,
-)
-from .freeze import (
-    freeze,
-)
-from .gui import (
-    start_dpgui,
-)
-from .neighbor_stat import (
-    neighbor_stat,
-)
-from .test import (
-    test,
-)
-
-# import `train` as `train_dp` to avoid the conflict of the
-# module name `train` and the function name `train`
-from .train import train as train_dp
-from .transfer import (
-    transfer,
-)
-
-__all__ = [
-    "doc_train_input",
-    "freeze",
-    "test",
-    "train_dp",
-    "transfer",
-    "compress",
-    "doc_train_input",
-    "make_model_devi",
-    "convert",
-    "neighbor_stat",
-    "start_dpgui",
-]
diff --git a/deepmd/entrypoints/convert_backend.py b/deepmd/entrypoints/convert_backend.py
new file mode 100644
index 0000000000..39967d565c
--- /dev/null
+++ b/deepmd/entrypoints/convert_backend.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.backend.backend import (
+    Backend,
+)
+
+
+def convert_backend(
+    *,  # Enforce keyword-only arguments
+    INPUT: str,
+    OUTPUT: str,
+    **kwargs,
+) -> None:
+    """Convert a model file from one backend to another.
+
+    Parameters
+    ----------
+    INPUT : str
+        The input model file.
+    INPUT : str
+        The output model file.
+    """
+    inp_backend: Backend = Backend.detect_backend_by_model(INPUT)()
+    out_backend: Backend = Backend.detect_backend_by_model(OUTPUT)()
+    inp_hook = inp_backend.serialize_hook
+    out_hook = out_backend.deserialize_hook
+    data = inp_hook(INPUT)
+    out_hook(OUTPUT, data)
diff --git a/deepmd/entrypoints/doc.py b/deepmd/entrypoints/doc.py
index cc28e52930..e55e84f9d3 100644
--- a/deepmd/entrypoints/doc.py
+++ b/deepmd/entrypoints/doc.py
@@ -1,6 +1,20 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd_utils.entrypoints.doc import (
-    doc_train_input,
+"""Module that prints train input arguments docstrings."""
+
+from deepmd.utils.argcheck import (
+    gen_doc,
+    gen_json,
 )
 
 __all__ = ["doc_train_input"]
+
+
+def doc_train_input(*, out_type: str = "rst", **kwargs):
+    """Print out trining input arguments to console."""
+    if out_type == "rst":
+        doc_str = gen_doc(make_anchor=True)
+    elif out_type == "json":
+        doc_str = gen_json()
+    else:
+        raise RuntimeError("Unsupported out type %s" % out_type)
+    print(doc_str)  # noqa: T201
diff --git a/deepmd/entrypoints/gui.py b/deepmd/entrypoints/gui.py
index 72de65f1c2..8b6b9e0a09 100644
--- a/deepmd/entrypoints/gui.py
+++ b/deepmd/entrypoints/gui.py
@@ -1,6 +1,31 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd_utils.entrypoints.gui import (
-    start_dpgui,
-)
+"""DP-GUI entrypoint."""
 
-__all__ = ["start_dpgui"]
+
+def start_dpgui(*, port: int, bind_all: bool, **kwargs):
+    """Host DP-GUI server.
+
+    Parameters
+    ----------
+    port : int
+        The port to serve DP-GUI on.
+    bind_all : bool
+        Serve on all public interfaces. This will expose your DP-GUI instance
+        to the network on both IPv4 and IPv6 (where available).
+    **kwargs
+        additional arguments
+
+    Raises
+    ------
+    ModuleNotFoundError
+        The dpgui package is not installed
+    """
+    try:
+        from dpgui import (
+            start_dpgui,
+        )
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            "To use DP-GUI, please install the dpgui package:\npip install dpgui"
+        ) from e
+    start_dpgui(port=port, bind_all=bind_all)
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 2c6ac26a7f..9f05b9a530 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -1,47 +1,41 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""DeePMD-Kit entry point module."""
+"""Common entrypoints."""
 
 import argparse
 from pathlib import (
     Path,
 )
-from typing import (
-    List,
-    Optional,
-    Union,
-)
 
-from deepmd.common import (
-    clear_session,
+from deepmd.backend.backend import (
+    Backend,
+)
+from deepmd.backend.suffix import (
+    format_model_suffix,
+)
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
 )
-from deepmd.entrypoints import (
-    compress,
-    convert,
+from deepmd.entrypoints.doc import (
     doc_train_input,
-    freeze,
-    make_model_devi,
-    neighbor_stat,
+)
+from deepmd.entrypoints.gui import (
     start_dpgui,
-    test,
-    train_dp,
-    transfer,
 )
-from deepmd.loggers import (
-    set_log_handles,
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
 )
-from deepmd.nvnmd.entrypoints.train import (
-    train_nvnmd,
+from deepmd.entrypoints.test import (
+    test,
 )
-from deepmd_utils.main import (
-    get_ll,
-    main_parser,
-    parse_args,
+from deepmd.infer.model_devi import (
+    make_model_devi,
+)
+from deepmd.loggers.loggers import (
+    set_log_handles,
 )
-
-__all__ = ["main", "parse_args", "get_ll", "main_parser"]
 
 
-def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
+def main(args: argparse.Namespace):
     """DeePMD-Kit entry point.
 
     Parameters
@@ -56,46 +50,36 @@ def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
     RuntimeError
         if no command was input
     """
-    if args is not None:
-        clear_session()
-
-    if not isinstance(args, argparse.Namespace):
-        args = parse_args(args=args)
-
-    # do not set log handles for None, it is useless
-    # log handles for train will be set separatelly
-    # when the use of MPI will be determined in `RunOptions`
-    if args.command not in (None, "train"):
-        set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None)
+    set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None)
 
     dict_args = vars(args)
 
-    if args.command == "train":
-        train_dp(**dict_args)
-    elif args.command == "freeze":
-        freeze(**dict_args)
-    elif args.command == "test":
+    if args.command == "test":
+        dict_args["model"] = format_model_suffix(
+            dict_args["model"],
+            feature=Backend.Feature.DEEP_EVAL,
+            preferred_backend=args.backend,
+            strict_prefer=False,
+        )
         test(**dict_args)
-    elif args.command == "transfer":
-        transfer(**dict_args)
-    elif args.command == "compress":
-        compress(**dict_args)
     elif args.command == "doc-train-input":
         doc_train_input(**dict_args)
     elif args.command == "model-devi":
+        dict_args["models"] = [
+            format_model_suffix(
+                mm,
+                feature=Backend.Feature.DEEP_EVAL,
+                preferred_backend=args.backend,
+                strict_prefer=False,
+            )
+            for mm in dict_args["models"]
+        ]
         make_model_devi(**dict_args)
-    elif args.command == "convert-from":
-        convert(**dict_args)
     elif args.command == "neighbor-stat":
         neighbor_stat(**dict_args)
-    elif args.command == "train-nvnmd":  # nvnmd
-        train_nvnmd(**dict_args)
     elif args.command == "gui":
         start_dpgui(**dict_args)
-    elif args.command is None:
-        pass
+    elif args.command == "convert-backend":
+        convert_backend(**dict_args)
     else:
-        raise RuntimeError(f"unknown command {args.command}")
-
-    if args is not None:
-        clear_session()
+        raise ValueError(f"Unknown command: {args.command}")
diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py
index 28cab00ad2..a68a3fd3bb 100644
--- a/deepmd/entrypoints/neighbor_stat.py
+++ b/deepmd/entrypoints/neighbor_stat.py
@@ -4,15 +4,15 @@
     List,
 )
 
+from deepmd.backend.backend import (
+    Backend,
+)
 from deepmd.common import (
     expand_sys_str,
 )
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.neighbor_stat import (
-    NeighborStat,
-)
 
 log = logging.getLogger(__name__)
 
@@ -22,7 +22,8 @@ def neighbor_stat(
     system: str,
     rcut: float,
     type_map: List[str],
-    one_type: bool = False,
+    mixed_type: bool = False,
+    backend: str = "tensorflow",
     **kwargs,
 ):
     """Calculate neighbor statistics.
@@ -35,17 +36,48 @@ def neighbor_stat(
         cutoff radius
     type_map : list[str]
         type map
-    one_type : bool, optional, default=False
+    mixed_type : bool, optional, default=False
         treat all types as a single type
+    backend : str, optional, default="tensorflow"
+        backend to use
     **kwargs
         additional arguments
 
     Examples
     --------
-    >>> neighbor_stat(system='.', rcut=6., type_map=["C", "H", "O", "N", "P", "S", "Mg", "Na", "HW", "OW", "mNa", "mCl", "mC", "mH", "mMg", "mN", "mO", "mP"])
+    >>> neighbor_stat(
+    ...     system=".",
+    ...     rcut=6.0,
+    ...     type_map=[
+    ...         "C",
+    ...         "H",
+    ...         "O",
+    ...         "N",
+    ...         "P",
+    ...         "S",
+    ...         "Mg",
+    ...         "Na",
+    ...         "HW",
+    ...         "OW",
+    ...         "mNa",
+    ...         "mCl",
+    ...         "mC",
+    ...         "mH",
+    ...         "mMg",
+    ...         "mN",
+    ...         "mO",
+    ...         "mP",
+    ...     ],
+    ... )
     min_nbor_dist: 0.6599510670195264
     max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5]
     """
+    backends = Backend.get_backends_by_feature(Backend.Feature.NEIGHBOR_STAT)
+    try:
+        backend_obj = backends[backend]()
+    except KeyError:
+        raise ValueError(f"Invalid backend {backend}")
+    NeighborStat = backend_obj.neighbor_stat
     all_sys = expand_sys_str(system)
     if not len(all_sys):
         raise RuntimeError("Did not find valid system")
@@ -57,7 +89,7 @@ def neighbor_stat(
         type_map=type_map,
     )
     data.get_batch()
-    nei = NeighborStat(data.get_ntypes(), rcut, one_type=one_type)
+    nei = NeighborStat(data.get_ntypes(), rcut, mixed_type=mixed_type)
     min_nbor_dist, max_nbor_size = nei.get_stat(data)
     log.info("min_nbor_dist: %f" % min_nbor_dist)
     log.info("max_nbor_size: %s" % str(max_nbor_size))
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index 4658b16e7c..cad6e12d2b 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test trained DeePMD model."""
+
 import logging
 from pathlib import (
     Path,
@@ -14,12 +15,28 @@
 
 import numpy as np
 
-from deepmd import (
-    DeepPotential,
-)
 from deepmd.common import (
     expand_sys_str,
 )
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
 from deepmd.utils import random as dp_random
 from deepmd.utils.data import (
     DeepmdData,
@@ -29,14 +46,14 @@
 )
 
 if TYPE_CHECKING:
-    from deepmd.infer import (
+    from deepmd.tf.infer import (
         DeepDipole,
         DeepDOS,
         DeepPolar,
         DeepPot,
         DeepWFC,
     )
-    from deepmd.infer.deep_tensor import (
+    from deepmd.tf.infer.deep_tensor import (
         DeepTensor,
     )
 
@@ -56,6 +73,7 @@ def test(
     shuffle_test: bool,
     detail_file: str,
     atomic: bool,
+    head: Optional[str] = None,
     **kwargs,
 ):
     """Test model predictions.
@@ -80,6 +98,8 @@ def test(
         file where test details will be output
     atomic : bool
         whether per atom quantities should be computed
+    head : Optional[str], optional
+        (Supported backend: PyTorch) Task head to test if in multi-task mode.
     **kwargs
         additional arguments
 
@@ -92,9 +112,8 @@ def test(
         # only float has inf, but should work for min
         numb_test = float("inf")
     if datafile is not None:
-        datalist = open(datafile)
-        all_sys = datalist.read().splitlines()
-        datalist.close()
+        with open(datafile) as datalist:
+            all_sys = datalist.read().splitlines()
     else:
         all_sys = expand_sys_str(system)
 
@@ -108,14 +127,14 @@ def test(
         dp_random.seed(rand_seed % (2**32))
 
     # init model
-    dp = DeepPotential(model)
+    dp = DeepEval(model, head=head)
 
     for cc, system in enumerate(all_sys):
         log.info("# ---------------output of dp test--------------- ")
         log.info(f"# testing system : {system}")
 
         # create data class
-        tmap = dp.get_type_map() if dp.model_type == "ener" else None
+        tmap = dp.get_type_map() if isinstance(dp, DeepPot) else None
         data = DeepmdData(
             system,
             set_prefix,
@@ -124,7 +143,7 @@ def test(
             sort_atoms=False,
         )
 
-        if dp.model_type == "ener":
+        if isinstance(dp, DeepPot):
             err = test_ener(
                 dp,
                 data,
@@ -134,7 +153,7 @@ def test(
                 atomic,
                 append_detail=(cc != 0),
             )
-        elif dp.model_type == "dos":
+        elif isinstance(dp, DeepDOS):
             err = test_dos(
                 dp,
                 data,
@@ -144,11 +163,11 @@ def test(
                 atomic,
                 append_detail=(cc != 0),
             )
-        elif dp.model_type == "dipole":
+        elif isinstance(dp, DeepDipole):
             err = test_dipole(dp, data, numb_test, detail_file, atomic)
-        elif dp.model_type == "polar":
+        elif isinstance(dp, DeepPolar):
             err = test_polar(dp, data, numb_test, detail_file, atomic=atomic)
-        elif dp.model_type == "global_polar":  # should not appear in this new version
+        elif isinstance(dp, DeepGlobalPolar):  # should not appear in this new version
             log.warning(
                 "Global polar model is not currently supported. Please directly use the polar mode and change loss parameters."
             )
@@ -166,17 +185,17 @@ def test(
     if len(all_sys) > 1:
         log.info("# ----------weighted average of errors----------- ")
         log.info(f"# number of systems : {len(all_sys)}")
-        if dp.model_type == "ener":
+        if isinstance(dp, DeepPot):
             print_ener_sys_avg(avg_err)
-        elif dp.model_type == "dos":
+        elif isinstance(dp, DeepDOS):
             print_dos_sys_avg(avg_err)
-        elif dp.model_type == "dipole":
+        elif isinstance(dp, DeepDipole):
             print_dipole_sys_avg(avg_err)
-        elif dp.model_type == "polar":
+        elif isinstance(dp, DeepPolar):
             print_polar_sys_avg(avg_err)
-        elif dp.model_type == "global_polar":
+        elif isinstance(dp, DeepGlobalPolar):
             print_polar_sys_avg(avg_err)
-        elif dp.model_type == "wfc":
+        elif isinstance(dp, DeepGlobalPolar):
             print_wfc_sys_avg(avg_err)
         log.info("# ----------------------------------------------- ")
 
@@ -280,6 +299,9 @@ def test_ener(
         )
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
+    if dp.has_spin:
+        data.add("spin", 3, atomic=True, must=True, high_prec=False)
+        data.add("force_mag", 3, atomic=True, must=False, high_prec=False)
 
     test_data = data.get_test()
     mixed_type = data.mixed_type
@@ -293,6 +315,10 @@ def test_ener(
         efield = test_data["efield"][:numb_test].reshape([numb_test, -1])
     else:
         efield = None
+    if dp.has_spin:
+        spin = test_data["spin"][:numb_test].reshape([numb_test, -1])
+    else:
+        spin = None
     if not data.pbc:
         box = None
     if mixed_type:
@@ -317,6 +343,7 @@ def test_ener(
         atomic=has_atom_ener,
         efield=efield,
         mixed_type=mixed_type,
+        spin=spin,
     )
     energy = ret[0]
     force = ret[1]
@@ -329,26 +356,50 @@ def test_ener(
         av = ret[4]
         ae = ae.reshape([numb_test, -1])
         av = av.reshape([numb_test, -1])
-    if dp.get_ntypes_spin() != 0:
-        ntypes_real = dp.get_ntypes() - dp.get_ntypes_spin()
-        nloc = natoms
-        nloc_real = sum([np.count_nonzero(atype == ii) for ii in range(ntypes_real)])
-        force_r = np.split(
-            force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1
-        )[0]
-        force_m = np.split(
-            force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1
-        )[1]
-        test_force_r = np.split(
-            test_data["force"][:numb_test],
-            indices_or_sections=[nloc_real * 3, nloc * 3],
-            axis=1,
-        )[0]
-        test_force_m = np.split(
-            test_data["force"][:numb_test],
-            indices_or_sections=[nloc_real * 3, nloc * 3],
-            axis=1,
-        )[1]
+        if dp.has_spin:
+            force_m = ret[5]
+            force_m = force_m.reshape([numb_test, -1])
+            mask_mag = ret[6]
+            mask_mag = mask_mag.reshape([numb_test, -1])
+    else:
+        if dp.has_spin:
+            force_m = ret[3]
+            force_m = force_m.reshape([numb_test, -1])
+            mask_mag = ret[4]
+            mask_mag = mask_mag.reshape([numb_test, -1])
+    out_put_spin = dp.get_ntypes_spin() != 0 or dp.has_spin
+    if out_put_spin:
+        if dp.get_ntypes_spin() != 0:  # old tf support for spin
+            ntypes_real = dp.get_ntypes() - dp.get_ntypes_spin()
+            nloc = natoms
+            nloc_real = sum(
+                [np.count_nonzero(atype == ii) for ii in range(ntypes_real)]
+            )
+            force_r = np.split(
+                force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1
+            )[0]
+            force_m = np.split(
+                force, indices_or_sections=[nloc_real * 3, nloc * 3], axis=1
+            )[1]
+            test_force_r = np.split(
+                test_data["force"][:numb_test],
+                indices_or_sections=[nloc_real * 3, nloc * 3],
+                axis=1,
+            )[0]
+            test_force_m = np.split(
+                test_data["force"][:numb_test],
+                indices_or_sections=[nloc_real * 3, nloc * 3],
+                axis=1,
+            )[1]
+        else:  # pt support for spin
+            force_r = force
+            test_force_r = test_data["force"][:numb_test]
+            # The shape of force_m and test_force_m are [-1, 3],
+            # which is designed for mixed_type cases
+            force_m = force_m.reshape(-1, 3)[mask_mag.reshape(-1)]
+            test_force_m = test_data["force_mag"][:numb_test].reshape(-1, 3)[
+                mask_mag.reshape(-1)
+            ]
 
     diff_e = energy - test_data["energy"][:numb_test].reshape([-1, 1])
     mae_e = mae(diff_e)
@@ -367,7 +418,7 @@ def test_ener(
         diff_ae = test_data["atom_ener"][:numb_test].reshape([-1]) - ae.reshape([-1])
         mae_ae = mae(diff_ae)
         rmse_ae = rmse(diff_ae)
-    if dp.get_ntypes_spin() != 0:
+    if out_put_spin:
         mae_fr = mae(force_r - test_force_r)
         mae_fm = mae(force_m - test_force_m)
         rmse_fr = rmse(force_r - test_force_r)
@@ -378,16 +429,16 @@ def test_ener(
     log.info(f"Energy RMSE        : {rmse_e:e} eV")
     log.info(f"Energy MAE/Natoms  : {mae_ea:e} eV")
     log.info(f"Energy RMSE/Natoms : {rmse_ea:e} eV")
-    if dp.get_ntypes_spin() == 0:
+    if not out_put_spin:
         log.info(f"Force  MAE         : {mae_f:e} eV/A")
         log.info(f"Force  RMSE        : {rmse_f:e} eV/A")
     else:
         log.info(f"Force atom MAE      : {mae_fr:e} eV/A")
-        log.info(f"Force spin MAE      : {mae_fm:e} eV/uB")
         log.info(f"Force atom RMSE     : {rmse_fr:e} eV/A")
+        log.info(f"Force spin MAE      : {mae_fm:e} eV/uB")
         log.info(f"Force spin RMSE     : {rmse_fm:e} eV/uB")
 
-    if data.pbc:
+    if data.pbc and not out_put_spin:
         log.info(f"Virial MAE         : {mae_v:e} eV")
         log.info(f"Virial RMSE        : {rmse_v:e} eV")
         log.info(f"Virial MAE/Natoms  : {mae_va:e} eV")
@@ -419,7 +470,7 @@ def test_ener(
             header="%s: data_e pred_e" % system,
             append=append_detail,
         )
-        if dp.get_ntypes_spin() == 0:
+        if not out_put_spin:
             pf = np.concatenate(
                 (
                     np.reshape(test_data["force"][:numb_test], [-1, 3]),
@@ -479,7 +530,7 @@ def test_ener(
             "pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz",
             append=append_detail,
         )
-    if dp.get_ntypes_spin() == 0:
+    if not out_put_spin:
         return {
             "mae_e": (mae_e, energy.size),
             "mae_ea": (mae_ea, energy.size),
@@ -842,6 +893,10 @@ def test_polar(
         rmse_fs = rmse_f / np.sqrt(sel_natoms)
         rmse_fa = rmse_f / sel_natoms
     else:
+        sel_mask = np.isin(atype, sel_type)
+        polar = polar.reshape((polar.shape[0], -1, 9))[:, sel_mask, :].reshape(
+            (polar.shape[0], -1)
+        )
         rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test])
 
     log.info(f"# number of test data : {numb_test:d} ")
@@ -978,6 +1033,10 @@ def test_dipole(
         rmse_fs = rmse_f / np.sqrt(sel_natoms)
         rmse_fa = rmse_f / sel_natoms
     else:
+        sel_mask = np.isin(atype, sel_type)
+        dipole = dipole.reshape((dipole.shape[0], -1, 3))[:, sel_mask, :].reshape(
+            (dipole.shape[0], -1)
+        )
         rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test])
 
     log.info(f"# number of test data : {numb_test:d}")
diff --git a/deepmd/env.py b/deepmd/env.py
index f290dc0a90..8215de39ac 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -1,219 +1,55 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module that sets tensorflow working environment and exports inportant constants."""
-
-import ctypes
 import logging
 import os
-import platform
 from configparser import (
     ConfigParser,
 )
-from importlib import (
-    import_module,
-    reload,
-)
 from pathlib import (
     Path,
 )
 from typing import (
-    TYPE_CHECKING,
-    Any,
     Dict,
     Tuple,
 )
 
 import numpy as np
-from packaging.version import (
-    Version,
-)
 
 import deepmd.lib
-from deepmd_utils.env import (
-    GLOBAL_ENER_FLOAT_PRECISION,
-    GLOBAL_NP_FLOAT_PRECISION,
-    global_float_prec,
-)
-
-if TYPE_CHECKING:
-    from types import (
-        ModuleType,
-    )
-
-
-def dlopen_library(module: str, filename: str):
-    """Dlopen a library from a module.
-
-    Parameters
-    ----------
-    module : str
-        The module name.
-    filename : str
-        The library filename pattern.
-    """
-    try:
-        m = import_module(module)
-    except ModuleNotFoundError:
-        pass
-    else:
-        libs = sorted(Path(m.__path__[0]).glob(filename))
-        # hope that there is only one version installed...
-        if len(libs):
-            ctypes.CDLL(str(libs[0].absolute()))
-
-
-# dlopen pip cuda library before tensorflow
-if platform.system() == "Linux":
-    dlopen_library("nvidia.cuda_runtime.lib", "libcudart.so*")
-    dlopen_library("nvidia.cublas.lib", "libcublasLt.so*")
-    dlopen_library("nvidia.cublas.lib", "libcublas.so*")
-    dlopen_library("nvidia.cufft.lib", "libcufft.so*")
-    dlopen_library("nvidia.curand.lib", "libcurand.so*")
-    dlopen_library("nvidia.cusolver.lib", "libcusolver.so*")
-    dlopen_library("nvidia.cusparse.lib", "libcusparse.so*")
-    dlopen_library("nvidia.cudnn.lib", "libcudnn.so*")
-
-
-# import tensorflow v1 compatability
-try:
-    import tensorflow.compat.v1 as tf
-
-    tf.disable_v2_behavior()
-except ImportError:
-    import tensorflow as tf
-try:
-    import tensorflow.compat.v2 as tfv2
-except ImportError:
-    tfv2 = None
 
 __all__ = [
-    "GLOBAL_CONFIG",
-    "GLOBAL_TF_FLOAT_PRECISION",
     "GLOBAL_NP_FLOAT_PRECISION",
     "GLOBAL_ENER_FLOAT_PRECISION",
     "global_float_prec",
-    "global_cvt_2_tf_float",
-    "global_cvt_2_ener_float",
-    "MODEL_VERSION",
-    "SHARED_LIB_DIR",
+    "GLOBAL_CONFIG",
     "SHARED_LIB_MODULE",
-    "default_tf_session_config",
-    "reset_default_tf_session_config",
-    "op_module",
-    "op_grads_module",
-    "TRANSFER_PATTERN",
-    "FITTING_NET_PATTERN",
-    "EMBEDDING_NET_PATTERN",
-    "TYPE_EMBEDDING_PATTERN",
-    "ATTENTION_LAYER_PATTERN",
-    "REMOVE_SUFFIX_DICT",
-    "TF_VERSION",
+    "SHARED_LIB_DIR",
 ]
 
+log = logging.getLogger(__name__)
+
+
 SHARED_LIB_MODULE = "lib"
 SHARED_LIB_DIR = Path(deepmd.lib.__path__[0])
 CONFIG_FILE = SHARED_LIB_DIR / "run_config.ini"
 
-# Python library version
-try:
-    tf_py_version = tf.version.VERSION
-except AttributeError:
-    tf_py_version = tf.__version__
-
-EMBEDDING_NET_PATTERN = str(
-    r"filter_type_\d+/matrix_\d+_\d+|"
-    r"filter_type_\d+/bias_\d+_\d+|"
-    r"filter_type_\d+/idt_\d+_\d+|"
-    r"filter_type_all/matrix_\d+|"
-    r"filter_type_all/matrix_\d+_\d+|"
-    r"filter_type_all/matrix_\d+_\d+_\d+|"
-    r"filter_type_all/bias_\d+|"
-    r"filter_type_all/bias_\d+_\d+|"
-    r"filter_type_all/bias_\d+_\d+_\d+|"
-    r"filter_type_all/idt_\d+|"
-    r"filter_type_all/idt_\d+_\d+|"
-)
 
-FITTING_NET_PATTERN = str(
-    r"layer_\d+/matrix|"
-    r"layer_\d+_type_\d+/matrix|"
-    r"layer_\d+/bias|"
-    r"layer_\d+_type_\d+/bias|"
-    r"layer_\d+/idt|"
-    r"layer_\d+_type_\d+/idt|"
-    r"final_layer/matrix|"
-    r"final_layer_type_\d+/matrix|"
-    r"final_layer/bias|"
-    r"final_layer_type_\d+/bias|"
-    # layer_name
-    r"share_.+_type_\d/matrix|"
-    r"share_.+_type_\d/bias|"
-    r"share_.+_type_\d/idt|"
-    r"share_.+/matrix|"
-    r"share_.+/bias|"
-    r"share_.+/idt|"
-)
-
-TYPE_EMBEDDING_PATTERN = str(
-    r"type_embed_net+/matrix_\d+|"
-    r"type_embed_net+/bias_\d+|"
-    r"type_embed_net+/idt_\d+|"
-)
-
-ATTENTION_LAYER_PATTERN = str(
-    r"attention_layer_\d+/c_query/matrix|"
-    r"attention_layer_\d+/c_query/bias|"
-    r"attention_layer_\d+/c_key/matrix|"
-    r"attention_layer_\d+/c_key/bias|"
-    r"attention_layer_\d+/c_value/matrix|"
-    r"attention_layer_\d+/c_value/bias|"
-    r"attention_layer_\d+/c_out/matrix|"
-    r"attention_layer_\d+/c_out/bias|"
-    r"attention_layer_\d+/layer_normalization/beta|"
-    r"attention_layer_\d+/layer_normalization/gamma|"
-    r"attention_layer_\d+/layer_normalization_\d+/beta|"
-    r"attention_layer_\d+/layer_normalization_\d+/gamma|"
-)
-
-TRANSFER_PATTERN = (
-    EMBEDDING_NET_PATTERN
-    + FITTING_NET_PATTERN
-    + TYPE_EMBEDDING_PATTERN
-    + str(
-        r"descrpt_attr/t_avg|"
-        r"descrpt_attr/t_std|"
-        r"fitting_attr/t_fparam_avg|"
-        r"fitting_attr/t_fparam_istd|"
-        r"fitting_attr/t_aparam_avg|"
-        r"fitting_attr/t_aparam_istd|"
-        r"model_attr/t_tab_info|"
-        r"model_attr/t_tab_data|"
+# FLOAT_PREC
+dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
+if dp_float_prec in ("high", ""):
+    # default is high
+    GLOBAL_NP_FLOAT_PRECISION = np.float64
+    GLOBAL_ENER_FLOAT_PRECISION = np.float64
+    global_float_prec = "double"
+elif dp_float_prec == "low":
+    GLOBAL_NP_FLOAT_PRECISION = np.float32
+    GLOBAL_ENER_FLOAT_PRECISION = np.float64
+    global_float_prec = "float"
+else:
+    raise RuntimeError(
+        "Unsupported float precision option: %s. Supported: high,"
+        "low. Please set precision with environmental variable "
+        "DP_INTERFACE_PREC." % dp_float_prec
     )
-)
-
-REMOVE_SUFFIX_DICT = {
-    "model_attr/sel_type_{}": "model_attr/sel_type",
-    "model_attr/output_dim_{}": "model_attr/output_dim",
-    "_{}/": "/",
-    # when atom_ener is set
-    "_{}_1/": "_1/",
-    "o_energy_{}": "o_energy",
-    "o_force_{}": "o_force",
-    "o_virial_{}": "o_virial",
-    "o_atom_energy_{}": "o_atom_energy",
-    "o_atom_virial_{}": "o_atom_virial",
-    "o_dipole_{}": "o_dipole",
-    "o_global_dipole_{}": "o_global_dipole",
-    "o_polar_{}": "o_polar",
-    "o_global_polar_{}": "o_global_polar",
-    "o_rmat_{}": "o_rmat",
-    "o_rmat_deriv_{}": "o_rmat_deriv",
-    "o_nlist_{}": "o_nlist",
-    "o_rij_{}": "o_rij",
-    "o_dm_force_{}": "o_dm_force",
-    "o_dm_virial_{}": "o_dm_virial",
-    "o_dm_av_{}": "o_dm_av",
-    "o_wfc_{}": "o_wfc",
-}
 
 
 def set_env_if_empty(key: str, value: str, verbose: bool = True):
@@ -231,224 +67,68 @@ def set_env_if_empty(key: str, value: str, verbose: bool = True):
     if os.environ.get(key) is None:
         os.environ[key] = value
         if verbose:
-            logging.warning(
+            log.warning(
                 f"Environment variable {key} is empty. Use the default value {value}"
             )
 
 
-def set_mkl():
-    """Tuning MKL for the best performance.
-
-    References
-    ----------
-    TF overview
-    https://www.tensorflow.org/guide/performance/overview
-
-    Fixing an issue in numpy built by MKL
-    https://github.com/ContinuumIO/anaconda-issues/issues/11367
-    https://github.com/numpy/numpy/issues/12374
-
-    check whether the numpy is built by mkl, see
-    https://github.com/numpy/numpy/issues/14751
-    """
-    try:
-        is_mkl = (
-            np.show_config("dicts")
-            .get("Build Dependencies", {})
-            .get("blas", {})
-            .get("name", "")
-            .lower()
-            .startswith("mkl")
-        )
-    except TypeError:
-        is_mkl = "mkl_rt" in np.__config__.get_info("blas_mkl_info").get(
-            "libraries", []
-        )
-    if is_mkl:
-        set_env_if_empty("KMP_BLOCKTIME", "0")
-        set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
-        reload(np)
-
-
-def set_tf_default_nthreads():
-    """Set TF internal number of threads to default=automatic selection.
+def set_default_nthreads():
+    """Set internal number of threads to default=automatic selection.
 
     Notes
     -----
-    `TF_INTRA_OP_PARALLELISM_THREADS` and `TF_INTER_OP_PARALLELISM_THREADS`
-    control TF configuration of multithreading.
+    `DP_INTRA_OP_PARALLELISM_THREADS` and `DP_INTER_OP_PARALLELISM_THREADS`
+    control configuration of multithreading.
     """
     if (
         "OMP_NUM_THREADS" not in os.environ
-        or "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ
-        or "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ
+        # for backward compatibility
+        or (
+            "DP_INTRA_OP_PARALLELISM_THREADS" not in os.environ
+            and "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ
+        )
+        or (
+            "DP_INTER_OP_PARALLELISM_THREADS" not in os.environ
+            and "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ
+        )
     ):
-        logging.warning(
+        log.warning(
             "To get the best performance, it is recommended to adjust "
             "the number of threads by setting the environment variables "
-            "OMP_NUM_THREADS, TF_INTRA_OP_PARALLELISM_THREADS, and "
-            "TF_INTER_OP_PARALLELISM_THREADS. See "
+            "OMP_NUM_THREADS, DP_INTRA_OP_PARALLELISM_THREADS, and "
+            "DP_INTER_OP_PARALLELISM_THREADS. See "
             "https://deepmd.rtfd.io/parallelism/ for more information."
         )
-    set_env_if_empty("TF_INTRA_OP_PARALLELISM_THREADS", "0", verbose=False)
-    set_env_if_empty("TF_INTER_OP_PARALLELISM_THREADS", "0", verbose=False)
+    if "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ:
+        set_env_if_empty("DP_INTRA_OP_PARALLELISM_THREADS", "0", verbose=False)
+    if "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ:
+        set_env_if_empty("DP_INTER_OP_PARALLELISM_THREADS", "0", verbose=False)
 
 
-def get_tf_default_nthreads() -> Tuple[int, int]:
-    """Get TF paralellism settings.
+def get_default_nthreads() -> Tuple[int, int]:
+    """Get paralellism settings.
 
-    Returns
-    -------
-    Tuple[int, int]
-        number of `TF_INTRA_OP_PARALLELISM_THREADS` and
-        `TF_INTER_OP_PARALLELISM_THREADS`
-    """
-    return int(os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0")), int(
-        os.environ.get("TF_INTER_OP_PARALLELISM_THREADS", "0")
-    )
-
-
-def get_tf_session_config() -> Any:
-    """Configure tensorflow session.
+    The method will first read the environment variables with the prefix `DP_`.
+    If not found, it will read the environment variables with the prefix `TF_`
+    for backward compatibility.
 
     Returns
     -------
-    Any
-        session configure object
+    Tuple[int, int]
+        number of `DP_INTRA_OP_PARALLELISM_THREADS` and
+        `DP_INTER_OP_PARALLELISM_THREADS`
     """
-    set_tf_default_nthreads()
-    intra, inter = get_tf_default_nthreads()
-    if int(os.environ.get("DP_JIT", 0)):
-        set_env_if_empty("TF_XLA_FLAGS", "--tf_xla_auto_jit=2")
-        # pip cuda package
-        if platform.system() == "Linux":
-            try:
-                m = import_module("nvidia.cuda_nvcc")
-            except ModuleNotFoundError:
-                pass
-            else:
-                cuda_data_dir = str(Path(m.__file__).parent.absolute())
-                set_env_if_empty(
-                    "XLA_FLAGS", "--xla_gpu_cuda_data_dir=" + cuda_data_dir
-                )
-    config = tf.ConfigProto(
-        gpu_options=tf.GPUOptions(allow_growth=True),
-        intra_op_parallelism_threads=intra,
-        inter_op_parallelism_threads=inter,
+    return int(
+        os.environ.get(
+            "DP_INTRA_OP_PARALLELISM_THREADS",
+            os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0"),
+        )
+    ), int(
+        os.environ.get(
+            "DP_INTER_OP_PARALLELISM_THREADS",
+            os.environ.get("TF_INTRA_OP_PARALLELISM_THREADS", "0"),
+        )
     )
-    if Version(tf_py_version) >= Version("1.15") and int(
-        os.environ.get("DP_AUTO_PARALLELIZATION", 0)
-    ):
-        config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
-    return config
-
-
-default_tf_session_config = get_tf_session_config()
-
-
-def reset_default_tf_session_config(cpu_only: bool):
-    """Limit tensorflow session to CPU or not.
-
-    Parameters
-    ----------
-    cpu_only : bool
-        If enabled, no GPU device is visible to the TensorFlow Session.
-    """
-    global default_tf_session_config
-    if cpu_only:
-        default_tf_session_config.device_count["GPU"] = 0
-    else:
-        if "GPU" in default_tf_session_config.device_count:
-            del default_tf_session_config.device_count["GPU"]
-
-
-def get_module(module_name: str) -> "ModuleType":
-    """Load force module.
-
-    Returns
-    -------
-    ModuleType
-        loaded force module
-
-    Raises
-    ------
-    FileNotFoundError
-        if module is not found in directory
-    """
-    if platform.system() == "Windows":
-        ext = ".dll"
-        prefix = ""
-    # elif platform.system() == "Darwin":
-    #    ext = ".dylib"
-    else:
-        ext = ".so"
-        prefix = "lib"
-
-    module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve()
-
-    if not module_file.is_file():
-        raise FileNotFoundError(f"module {module_name} does not exist")
-    else:
-        try:
-            module = tf.load_op_library(str(module_file))
-        except tf.errors.NotFoundError as e:
-            # check CXX11_ABI_FLAG is compatiblity
-            # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
-            # ABI should be the same
-            if "CXX11_ABI_FLAG" in tf.__dict__:
-                tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
-            else:
-                tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
-            if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag:
-                raise RuntimeError(
-                    "This deepmd-kit package was compiled with "
-                    "CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled "
-                    "with CXX11_ABI_FLAG=%d. These two library ABIs are "
-                    "incompatible and thus an error is raised when loading %s. "
-                    "You need to rebuild deepmd-kit against this TensorFlow "
-                    "runtime."
-                    % (
-                        TF_CXX11_ABI_FLAG,
-                        tf_cxx11_abi_flag,
-                        module_name,
-                    )
-                ) from e
-
-            # different versions may cause incompatibility
-            # see #406, #447, #557, #774, and #796 for example
-            # throw a message if versions are different
-            if TF_VERSION != tf_py_version:
-                raise RuntimeError(
-                    "The version of TensorFlow used to compile this "
-                    "deepmd-kit package is {}, but the version of TensorFlow "
-                    "runtime you are using is {}. These two versions are "
-                    "incompatible and thus an error is raised when loading {}. "
-                    "You need to install TensorFlow {}, or rebuild deepmd-kit "
-                    "against TensorFlow {}.\nIf you are using a wheel from "
-                    "pypi, you may consider to install deepmd-kit execuating "
-                    "`pip install deepmd-kit --no-binary deepmd-kit` "
-                    "instead.".format(
-                        TF_VERSION,
-                        tf_py_version,
-                        module_name,
-                        TF_VERSION,
-                        tf_py_version,
-                    )
-                ) from e
-            error_message = (
-                "This deepmd-kit package is inconsitent with TensorFlow "
-                f"Runtime, thus an error is raised when loading {module_name}. "
-                "You need to rebuild deepmd-kit against this TensorFlow "
-                "runtime."
-            )
-            if TF_CXX11_ABI_FLAG == 1:
-                # #1791
-                error_message += (
-                    "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. "
-                    "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704"
-                )
-            raise RuntimeError(error_message) from e
-        return module
 
 
 def _get_package_constants(
@@ -466,50 +146,14 @@ def _get_package_constants(
     Dict[str, str]
         dictionary with package constants
     """
+    if not config_file.is_file():
+        raise FileNotFoundError(
+            f"CONFIG file not found at {config_file}. "
+            "Please check if the package is installed correctly."
+        )
     config = ConfigParser()
     config.read(config_file)
     return dict(config.items("CONFIG"))
 
 
 GLOBAL_CONFIG = _get_package_constants()
-MODEL_VERSION = GLOBAL_CONFIG["model_version"]
-TF_VERSION = GLOBAL_CONFIG["tf_version"]
-TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])
-
-op_module = get_module("deepmd_op")
-op_grads_module = get_module("op_grads")
-
-# FLOAT_PREC
-GLOBAL_TF_FLOAT_PRECISION = tf.dtypes.as_dtype(GLOBAL_NP_FLOAT_PRECISION)
-
-
-def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor:
-    """Cast tensor to globally set TF precision.
-
-    Parameters
-    ----------
-    xx : tf.Tensor
-        input tensor
-
-    Returns
-    -------
-    tf.Tensor
-        output tensor cast to `GLOBAL_TF_FLOAT_PRECISION`
-    """
-    return tf.cast(xx, GLOBAL_TF_FLOAT_PRECISION)
-
-
-def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor:
-    """Cast tensor to globally set energy precision.
-
-    Parameters
-    ----------
-    xx : tf.Tensor
-        input tensor
-
-    Returns
-    -------
-    tf.Tensor
-        output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION`
-    """
-    return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION)
diff --git a/deepmd/fit/fitting.py b/deepmd/fit/fitting.py
deleted file mode 100644
index a467ec1201..0000000000
--- a/deepmd/fit/fitting.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from abc import (
-    abstractmethod,
-)
-from typing import (
-    Callable,
-)
-
-from deepmd.env import (
-    tf,
-)
-from deepmd.loss.loss import (
-    Loss,
-)
-from deepmd.utils import (
-    Plugin,
-    PluginVariant,
-)
-
-
-class Fitting(PluginVariant):
-    __plugins = Plugin()
-
-    @staticmethod
-    def register(key: str) -> Callable:
-        """Register a Fitting plugin.
-
-        Parameters
-        ----------
-        key : str
-            the key of a Fitting
-
-        Returns
-        -------
-        Fitting
-            the registered Fitting
-
-        Examples
-        --------
-        >>> @Fitting.register("some_fitting")
-            class SomeFitting(Fitting):
-                pass
-        """
-        return Fitting.__plugins.register(key)
-
-    def __new__(cls, *args, **kwargs):
-        if cls is Fitting:
-            try:
-                fitting_type = kwargs["type"]
-            except KeyError:
-                raise KeyError("the type of fitting should be set by `type`")
-            if fitting_type in Fitting.__plugins.plugins:
-                cls = Fitting.__plugins.plugins[fitting_type]
-            else:
-                raise RuntimeError("Unknown descriptor type: " + fitting_type)
-        return super().__new__(cls)
-
-    @property
-    def precision(self) -> tf.DType:
-        """Precision of fitting network."""
-        return self.fitting_precision
-
-    def init_variables(
-        self,
-        graph: tf.Graph,
-        graph_def: tf.GraphDef,
-        suffix: str = "",
-    ) -> None:
-        """Init the fitting net variables with the given dict.
-
-        Parameters
-        ----------
-        graph : tf.Graph
-            The input frozen model graph
-        graph_def : tf.GraphDef
-            The input frozen model graph_def
-        suffix : str
-            suffix to name scope
-
-        Notes
-        -----
-        This method is called by others when the fitting supported initialization from the given variables.
-        """
-        raise NotImplementedError(
-            "Fitting %s doesn't support initialization from the given variables!"
-            % type(self).__name__
-        )
-
-    @abstractmethod
-    def get_loss(self, loss: dict, lr) -> Loss:
-        """Get the loss function.
-
-        Parameters
-        ----------
-        loss : dict
-            the loss dict
-        lr : LearningRateExp
-            the learning rate
-
-        Returns
-        -------
-        Loss
-            the loss function
-        """
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index c1071af35c..5678494023 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -1,146 +1,35 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Submodule containing all the implemented potentials."""
-
-from pathlib import (
-    Path,
-)
-from typing import (
-    Optional,
-    Union,
-)
-
-from .data_modifier import (
-    DipoleChargeModifier,
-)
-from .deep_dipole import (
-    DeepDipole,
-)
-from .deep_dos import (
-    DeepDOS,
-)
 from .deep_eval import (
     DeepEval,
 )
-from .deep_polar import (
-    DeepGlobalPolar,
-    DeepPolar,
-)
 from .deep_pot import (
     DeepPot,
 )
-from .deep_wfc import (
-    DeepWFC,
-)
-from .ewald_recp import (
-    EwaldRecp,
-)
 from .model_devi import (
     calc_model_devi,
 )
 
 __all__ = [
-    "DeepPotential",
-    "DeepDipole",
-    "DeepEval",
-    "DeepGlobalPolar",
-    "DeepPolar",
     "DeepPot",
-    "DeepDOS",
-    "DeepWFC",
-    "DipoleChargeModifier",
-    "EwaldRecp",
     "calc_model_devi",
+    "DeepEval",
+    "DeepPotential",
 ]
 
 
-def DeepPotential(
-    model_file: Union[str, Path],
-    load_prefix: str = "load",
-    default_tf_graph: bool = False,
-    input_map: Optional[dict] = None,
-    neighbor_list=None,
-) -> Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepDOS, DeepWFC]:
-    """Factory function that will inialize appropriate potential read from `model_file`.
+def DeepPotential(*args, **kwargs) -> "DeepEval":
+    """Factory function that forwards to DeepEval (for compatbility).
 
     Parameters
     ----------
-    model_file : str
-        The name of the frozen model file.
-    load_prefix : str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-    neighbor_list : ase.neighborlist.NeighborList, optional
-        The neighbor list object. If None, then build the native neighbor list.
+    *args
+        positional arguments
+    **kwargs
+        keyword arguments
 
     Returns
     -------
-    Union[DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot, DeepWFC]
-        one of the available potentials
-
-    Raises
-    ------
-    RuntimeError
-        if model file does not correspond to any implementd potential
+    DeepEval
+        potentials
     """
-    mf = Path(model_file)
-
-    model_type = DeepEval(
-        mf,
-        load_prefix=load_prefix,
-        default_tf_graph=default_tf_graph,
-        input_map=input_map,
-    ).model_type
-
-    if model_type == "ener":
-        dp = DeepPot(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-    elif model_type == "dos":
-        dp = DeepDOS(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-        )
-    elif model_type == "dipole":
-        dp = DeepDipole(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-    elif model_type == "polar":
-        dp = DeepPolar(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-    elif model_type == "global_polar":
-        dp = DeepGlobalPolar(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-    elif model_type == "wfc":
-        dp = DeepWFC(
-            mf,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-        )
-    else:
-        raise RuntimeError(f"unknown model type {model_type}")
-
-    return dp
+    return DeepEval(*args, **kwargs)
diff --git a/deepmd/infer/deep_dipole.py b/deepmd/infer/deep_dipole.py
index aba098a9f3..b443b54417 100644
--- a/deepmd/infer/deep_dipole.py
+++ b/deepmd/infer/deep_dipole.py
@@ -1,73 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-)
-
 from deepmd.infer.deep_tensor import (
     DeepTensor,
 )
 
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
 
 class DeepDipole(DeepTensor):
-    """Constructor.
+    """Deep dipole model.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-    neighbor_list : ase.neighborlist.NeighborList, optional
-        The neighbor list object. If None, then build the native neighbor list.
-
-    Warnings
-    --------
-    For developers: `DeepTensor` initializer must be called at the end after
-    `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        input_map: Optional[dict] = None,
-        neighbor_list=None,
-    ) -> None:
-        # use this in favor of dict update to move attribute from class to
-        # instance namespace
-        self.tensors = dict(
-            {
-                # output tensor
-                "t_tensor": "o_dipole:0",
-            },
-            **self.tensors,
-        )
-
-        DeepTensor.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-
-    def get_dim_fparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_dim_aparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
+    @property
+    def output_tensor_name(self) -> str:
+        return "dipole"
diff --git a/deepmd/infer/deep_dos.py b/deepmd/infer/deep_dos.py
index 5f181bd336..7823f02999 100644
--- a/deepmd/infer/deep_dos.py
+++ b/deepmd/infer/deep_dos.py
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
 from typing import (
-    TYPE_CHECKING,
-    Callable,
+    Any,
+    Dict,
     List,
     Optional,
     Tuple,
@@ -11,496 +10,140 @@
 
 import numpy as np
 
-from deepmd.common import (
-    make_default_mesh,
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
 )
-from deepmd.infer.deep_eval import (
+
+from .deep_eval import (
     DeepEval,
 )
-from deepmd.utils.batch_size import (
-    AutoBatchSize,
-)
-from deepmd.utils.sess import (
-    run_sess,
-)
-
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
-log = logging.getLogger(__name__)
 
 
 class DeepDOS(DeepEval):
-    """Constructor.
+    """Deep density of states model.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    auto_batch_size : bool or int or AutomaticBatchSize, default: True
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
         If True, automatic batch size will be used. If int, it will be used
         as the initial batch size.
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-
-    Warnings
-    --------
-    For developers: `DeepTensor` initializer must be called at the end after
-    `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
-        input_map: Optional[dict] = None,
-    ) -> None:
-        # add these tensors on top of what is defined by DeepTensor Class
-        # use this in favor of dict update to move attribute from class to
-        # instance namespace
-        self.tensors = {
-            # descrpt attrs
-            "t_ntypes": "descrpt_attr/ntypes:0",
-            "t_rcut": "descrpt_attr/rcut:0",
-            # fitting attrs
-            "t_dfparam": "fitting_attr/dfparam:0",
-            "t_daparam": "fitting_attr/daparam:0",
-            "t_numb_dos": "fitting_attr/numb_dos:0",
-            # model attrs
-            "t_tmap": "model_attr/tmap:0",
-            # inputs
-            "t_coord": "t_coord:0",
-            "t_type": "t_type:0",
-            "t_natoms": "t_natoms:0",
-            "t_box": "t_box:0",
-            "t_mesh": "t_mesh:0",
-            # add output tensors
-            "t_dos": "o_dos:0",
-            "t_atom_dos": "o_atom_dos:0",
-            "t_descriptor": "o_descriptor:0",
-        }
-        DeepEval.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            auto_batch_size=auto_batch_size,
-            input_map=input_map,
-        )
-
-        # load optional tensors
-        operations = [op.name for op in self.graph.get_operations()]
-        # check if the graph has these operations:
-        # if yes add them
-        if "load/t_fparam" in operations:
-            self.tensors.update({"t_fparam": "t_fparam:0"})
-            self.has_fparam = True
-        else:
-            log.debug("Could not get tensor 't_fparam:0'")
-            self.t_fparam = None
-            self.has_fparam = False
-
-        if "load/t_aparam" in operations:
-            self.tensors.update({"t_aparam": "t_aparam:0"})
-            self.has_aparam = True
-        else:
-            log.debug("Could not get tensor 't_aparam:0'")
-            self.t_aparam = None
-            self.has_aparam = False
-
-        # now load tensors to object attributes
-        for attr_name, tensor_name in self.tensors.items():
-            try:
-                self._get_tensor(tensor_name, attr_name)
-            except KeyError:
-                if attr_name != "t_descriptor":
-                    raise
-
-        self._run_default_sess()
-        self.tmap = self.tmap.decode("UTF-8").split()
-
-        # setup modifier
-        try:
-            t_modifier_type = self._get_tensor("modifier_attr/type:0")
-            self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8")
-        except (ValueError, KeyError):
-            self.modifier_type = None
-
-    def _run_default_sess(self):
-        [
-            self.ntypes,
-            self.rcut,
-            self.numb_dos,
-            self.dfparam,
-            self.daparam,
-            self.tmap,
-        ] = run_sess(
-            self.sess,
-            [
-                self.t_ntypes,
-                self.t_rcut,
-                self.t_numb_dos,
-                self.t_dfparam,
-                self.t_daparam,
-                self.t_tmap,
-            ],
+    @property
+    def output_def(self) -> ModelOutputDef:
+        """Get the output definition of this model."""
+        return ModelOutputDef(
+            FittingOutputDef(
+                [
+                    OutputVariableDef(
+                        "dos",
+                        shape=[-1],
+                        reduciable=True,
+                        atomic=True,
+                    ),
+                ]
+            )
         )
 
-    def get_ntypes(self) -> int:
-        """Get the number of atom types of this model."""
-        return self.ntypes
-
-    def get_rcut(self) -> float:
-        """Get the cut-off radius of this model."""
-        return self.rcut
-
-    def get_numb_dos(self) -> int:
-        """Get the length of DOS output of this DP model."""
-        return self.numb_dos
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map (element name of the atom types) of this model."""
-        return self.tmap
-
-    def get_sel_type(self) -> List[int]:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_dim_fparam(self) -> int:
-        """Get the number (dimension) of frame parameters of this DP."""
-        return self.dfparam
-
-    def get_dim_aparam(self) -> int:
-        """Get the number (dimension) of atomic parameters of this DP."""
-        return self.daparam
-
-    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
-        """Wrapper method with auto batch size.
-
-        Parameters
-        ----------
-        inner_func : Callable
-            the method to be wrapped
-        numb_test : int
-            number of tests
-        natoms : int
-            number of atoms
-
-        Returns
-        -------
-        Callable
-            the wrapper
-        """
-        if self.auto_batch_size is not None:
-
-            def eval_func(*args, **kwargs):
-                return self.auto_batch_size.execute_all(
-                    inner_func, numb_test, natoms, *args, **kwargs
-                )
-
-        else:
-            eval_func = inner_func
-        return eval_func
-
-    def _get_natoms_and_nframes(
-        self,
-        coords: np.ndarray,
-        atom_types: Union[List[int], np.ndarray],
-        mixed_type: bool = False,
-    ) -> Tuple[int, int]:
-        if mixed_type:
-            natoms = len(atom_types[0])
-        else:
-            natoms = len(atom_types)
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
-        nframes = coords.shape[0]
-        return natoms, nframes
+    @property
+    def numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return self.get_numb_dos()
 
     def eval(
         self,
         coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
+        cells: Optional[np.ndarray],
+        atom_types: Union[List[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
+        **kwargs: Dict[str, Any],
     ) -> Tuple[np.ndarray, ...]:
-        """Evaluate the dos, atom_dos by using this model.
+        """Evaluate energy, force, and virial. If atomic is True,
+        also return atomic energy and atomic virial.
 
         Parameters
         ----------
-        coords
-            The coordinates of atoms.
-            The array should be of size nframes x natoms x 3
-        cells
-            The cell of the region.
-            If None then non-PBC is assumed, otherwise using PBC.
-            The array should be of size nframes x 9
-        atom_types
-            The atom types
-            The list should contain natoms ints
-        atomic
-            Calculate the atomic energy and virial
-        fparam
-            The frame parameter.
-            The array can be of size :
-            - nframes x dim_fparam.
-            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
-        aparam
-            The atomic parameter
-            The array can be of size :
-            - nframes x natoms x dim_aparam.
-            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-            - dim_aparam. Then all frames and atoms are provided with the same aparam.
-        mixed_type
-            Whether to perform the mixed_type mode.
-            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
-            in which frames in a system may have different natoms_vec(s), with the same nloc.
+        coords : np.ndarray
+            The coordinates of the atoms, in shape (nframes, natoms, 3).
+        cells : np.ndarray
+            The cell vectors of the system, in shape (nframes, 9). If the system
+            is not periodic, set it to None.
+        atom_types : List[int] or np.ndarray
+            The types of the atoms. If mixed_type is False, the shape is (natoms,);
+            otherwise, the shape is (nframes, natoms).
+        atomic : bool, optional
+            Whether to return atomic energy and atomic virial, by default False.
+        fparam : np.ndarray, optional
+            The frame parameters, by default None.
+        aparam : np.ndarray, optional
+            The atomic parameters, by default None.
+        mixed_type : bool, optional
+            Whether the atom_types is mixed type, by default False.
+        **kwargs : Dict[str, Any]
+            Keyword arguments.
 
         Returns
         -------
-        dos
-            The electron density of state.
-        atom_dos
-            The atom-sited density of state. Only returned when atomic == True
+        energy
+            The energy of the system, in shape (nframes,).
+        force
+            The force of the system, in shape (nframes, natoms, 3).
+        virial
+            The virial of the system, in shape (nframes, 9).
+        atomic_energy
+            The atomic energy of the system, in shape (nframes, natoms). Only returned
+            when atomic is True.
+        atomic_virial
+            The atomic virial of the system, in shape (nframes, natoms, 9). Only returned
+            when atomic is True.
         """
-        # reshape coords before getting shape
-        natoms, numb_test = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        output = self._eval_func(self._eval_inner, numb_test, natoms)(
+        (
             coords,
             cells,
             atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        results = self.deep_eval.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic,
             fparam=fparam,
             aparam=aparam,
-            atomic=atomic,
-            mixed_type=mixed_type,
-        )
-
-        return output
-
-    def _prepare_feed_dict(
-        self,
-        coords,
-        cells,
-        atom_types,
-        fparam=None,
-        aparam=None,
-        atomic=False,
-        mixed_type=False,
-    ):
-        # standarize the shape of inputs
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        if mixed_type:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
-        else:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1])
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
-        if cells is None:
-            pbc = False
-            # make cells to work around the requirement of pbc
-            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
-        else:
-            pbc = True
-            cells = np.array(cells).reshape([nframes, 9])
-
-        if self.has_fparam:
-            assert fparam is not None
-            fparam = np.array(fparam)
-        if self.has_aparam:
-            assert aparam is not None
-            aparam = np.array(aparam)
-
-        # reshape the inputs
-        if self.has_fparam:
-            fdim = self.get_dim_fparam()
-            if fparam.size == nframes * fdim:
-                fparam = np.reshape(fparam, [nframes, fdim])
-            elif fparam.size == fdim:
-                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
-            else:
-                raise RuntimeError(
-                    "got wrong size of frame param, should be either %d x %d or %d"
-                    % (nframes, fdim, fdim)
-                )
-        if self.has_aparam:
-            fdim = self.get_dim_aparam()
-            if aparam.size == nframes * natoms * fdim:
-                aparam = np.reshape(aparam, [nframes, natoms * fdim])
-            elif aparam.size == natoms * fdim:
-                aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
-            elif aparam.size == fdim:
-                aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
-            else:
-                raise RuntimeError(
-                    "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d"
-                    % (nframes, natoms, fdim, natoms, fdim, fdim)
-                )
-
-        # sort inputs
-        coords, atom_types, imap = self.sort_input(
-            coords, atom_types, mixed_type=mixed_type
+            **kwargs,
         )
+        # energy = results["dos_redu"].reshape(nframes, self.get_numb_dos())
+        atomic_energy = results["dos"].reshape(nframes, natoms, self.get_numb_dos())
+        # not same as dos_redu... why?
+        energy = np.sum(atomic_energy, axis=1)
 
-        # make natoms_vec and default_mesh
-        natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert natoms_vec[0] == natoms
-
-        # evaluate
-        feed_dict_test = {}
-        feed_dict_test[self.t_natoms] = natoms_vec
-        if mixed_type:
-            feed_dict_test[self.t_type] = atom_types.reshape([-1])
-        else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
-                [-1]
-            )
-        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-
-        if len(self.t_box.shape) == 1:
-            feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        elif len(self.t_box.shape) == 2:
-            feed_dict_test[self.t_box] = cells
-        else:
-            raise RuntimeError
-        feed_dict_test[self.t_mesh] = make_default_mesh(pbc, mixed_type)
-        if self.has_fparam:
-            feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
-        if self.has_aparam:
-            feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1])
-        return feed_dict_test, imap
-
-    def _eval_inner(
-        self,
-        coords,
-        cells,
-        atom_types,
-        fparam=None,
-        aparam=None,
-        atomic=False,
-        mixed_type=False,
-    ):
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        feed_dict_test, imap = self._prepare_feed_dict(
-            coords, cells, atom_types, fparam, aparam, mixed_type=mixed_type
-        )
-        t_out = [self.t_dos]
-        if atomic:
-            t_out += [self.t_atom_dos]
-
-        v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test)
-        dos = v_out[0]
-        if atomic:
-            atom_dos = v_out[1]
-
-        # reverse map of the outputs
         if atomic:
-            atom_dos = self.reverse_map(
-                np.reshape(atom_dos, [nframes, -1, self.numb_dos]), imap
+            return (
+                energy,
+                atomic_energy,
             )
-            dos = np.sum(atom_dos, axis=1)
-
-        dos = np.reshape(dos, [nframes, self.numb_dos])
-        if atomic:
-            atom_dos = np.reshape(atom_dos, [nframes, natoms, self.numb_dos])
-            return dos, atom_dos
         else:
-            return dos
+            return (energy,)
 
-    def eval_descriptor(
-        self,
-        coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
-        fparam: Optional[np.ndarray] = None,
-        aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
-        mixed_type: bool = False,
-    ) -> np.array:
-        """Evaluate descriptors by using this DP.
-
-        Parameters
-        ----------
-        coords
-            The coordinates of atoms.
-            The array should be of size nframes x natoms x 3
-        cells
-            The cell of the region.
-            If None then non-PBC is assumed, otherwise using PBC.
-            The array should be of size nframes x 9
-        atom_types
-            The atom types
-            The list should contain natoms ints
-        fparam
-            The frame parameter.
-            The array can be of size :
-            - nframes x dim_fparam.
-            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
-        aparam
-            The atomic parameter
-            The array can be of size :
-            - nframes x natoms x dim_aparam.
-            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-            - dim_aparam. Then all frames and atoms are provided with the same aparam.
-        efield
-            The external field on atoms.
-            The array should be of size nframes x natoms x 3
-        mixed_type
-            Whether to perform the mixed_type mode.
-            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
-            in which frames in a system may have different natoms_vec(s), with the same nloc.
+    def get_numb_dos(self) -> int:
+        return self.deep_eval.get_numb_dos()
 
-        Returns
-        -------
-        descriptor
-            Descriptors.
-        """
-        natoms, numb_test = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)(
-            coords,
-            cells,
-            atom_types,
-            fparam=fparam,
-            aparam=aparam,
-            efield=efield,
-            mixed_type=mixed_type,
-        )
-        return descriptor
 
-    def _eval_descriptor_inner(
-        self,
-        coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
-        fparam: Optional[np.ndarray] = None,
-        aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
-        mixed_type: bool = False,
-    ) -> np.array:
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        feed_dict_test, imap = self._prepare_feed_dict(
-            coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
-        )
-        (descriptor,) = run_sess(
-            self.sess, [self.t_descriptor], feed_dict=feed_dict_test
-        )
-        return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
+__all__ = ["DeepDOS"]
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 0ca9f21a77..aae2082e13 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -1,235 +1,207 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from functools import (
-    lru_cache,
+from abc import (
+    ABC,
+    abstractmethod,
 )
 from typing import (
     TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Dict,
     List,
     Optional,
+    Tuple,
     Union,
 )
 
 import numpy as np
 
-from deepmd.env import (
-    MODEL_VERSION,
-    default_tf_session_config,
-    tf,
+from deepmd.backend.backend import (
+    Backend,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
 )
 from deepmd.utils.batch_size import (
     AutoBatchSize,
 )
-from deepmd.utils.sess import (
-    run_sess,
-)
 
 if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
+    import ase.neighborlist
+
 
+class DeepEvalBackend(ABC):
+    """Low-level Deep Evaluator interface.
 
-class DeepEval:
-    """Common methods for DeepPot, DeepWFC, DeepPolar, ...
+    Backends should inherbit implement this interface. High-level interface
+    will be built on top of this.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
         If True, automatic batch size will be used. If int, it will be used
         as the initial batch size.
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
     neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
         The ASE neighbor list class to produce the neighbor list. If None, the
         neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    load_prefix: str  # set by subclass
-
+    _OUTDEF_DP2BACKEND: ClassVar[dict] = {
+        "energy": "atom_energy",
+        "energy_redu": "energy",
+        "energy_derv_r": "force",
+        "energy_derv_r_mag": "force_mag",
+        "energy_derv_c": "atom_virial",
+        "energy_derv_c_mag": "atom_virial_mag",
+        "energy_derv_c_redu": "virial",
+        "polar": "polar",
+        "polar_redu": "global_polar",
+        "polar_derv_r": "force",
+        "polar_derv_c": "atom_virial",
+        "polar_derv_c_redu": "virial",
+        "dipole": "dipole",
+        "dipole_redu": "global_dipole",
+        "dipole_derv_r": "force",
+        "dipole_derv_c": "atom_virial",
+        "dipole_derv_c_redu": "virial",
+        "dos": "atom_dos",
+        "dos_redu": "dos",
+        "mask_mag": "mask_mag",
+        "mask": "mask",
+    }
+
+    @abstractmethod
     def __init__(
         self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        auto_batch_size: Union[bool, int, AutoBatchSize] = False,
-        input_map: Optional[dict] = None,
-        neighbor_list=None,
-    ):
-        self.graph = self._load_graph(
-            model_file,
-            prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-        )
-        self.load_prefix = load_prefix
-
-        # graph_compatable should be called after graph and prefix are set
-        if not self._graph_compatable():
-            raise RuntimeError(
-                f"model in graph (version {self.model_version}) is incompatible"
-                f"with the model (version {MODEL_VERSION}) supported by the current code."
-                "See https://deepmd.rtfd.io/compatability/ for details."
-            )
-
-        # set default to False, as subclasses may not support
-        if isinstance(auto_batch_size, bool):
-            if auto_batch_size:
-                self.auto_batch_size = AutoBatchSize()
-            else:
-                self.auto_batch_size = None
-        elif isinstance(auto_batch_size, int):
-            self.auto_batch_size = AutoBatchSize(auto_batch_size)
-        elif isinstance(auto_batch_size, AutoBatchSize):
-            self.auto_batch_size = auto_batch_size
-        else:
-            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
-
-        self.neighbor_list = neighbor_list
-
-    @property
-    @lru_cache(maxsize=None)
-    def model_type(self) -> str:
-        """Get type of model.
-
-        :type:str
-        """
-        t_mt = self._get_tensor("model_attr/model_type:0")
-        [mt] = run_sess(self.sess, [t_mt], feed_dict={})
-        return mt.decode("utf-8")
+        model_file: str,
+        output_def: ModelOutputDef,
+        *args: List[Any],
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        pass
+
+    def __new__(cls, model_file: str, *args, **kwargs):
+        if cls is DeepEvalBackend:
+            backend = Backend.detect_backend_by_model(model_file)
+            return super().__new__(backend().deep_eval)
+        return super().__new__(cls)
+
+    @abstractmethod
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Dict[str, Any],
+    ) -> Dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
 
-    @property
-    @lru_cache(maxsize=None)
-    def model_version(self) -> str:
-        """Get version of model.
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        **kwargs
+            Other parameters
 
         Returns
         -------
-        str
-            version of model
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
         """
-        try:
-            t_mt = self._get_tensor("model_attr/model_version:0")
-        except KeyError:
-            # For deepmd-kit version 0.x - 1.x, set model version to 0.0
-            return "0.0"
-        else:
-            [mt] = run_sess(self.sess, [t_mt], feed_dict={})
-            return mt.decode("utf-8")
 
-    @property
-    @lru_cache(maxsize=None)
-    def sess(self) -> tf.Session:
-        """Get TF session."""
-        # start a tf session associated to the graph
-        return tf.Session(graph=self.graph, config=default_tf_session_config)
+    @abstractmethod
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
 
-    def _graph_compatable(self) -> bool:
-        """Check the model compatability.
+    @abstractmethod
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
 
-        Returns
-        -------
-        bool
-            If the model stored in the graph file is compatable with the current code
-        """
-        model_version_major = int(self.model_version.split(".")[0])
-        model_version_minor = int(self.model_version.split(".")[1])
-        MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0])
-        MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1])
-        if (model_version_major != MODEL_VERSION_MAJOR) or (
-            model_version_minor > MODEL_VERSION_MINOR
-        ):
-            return False
-        else:
-            return True
+    @abstractmethod
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
 
-    def _get_tensor(
-        self, tensor_name: str, attr_name: Optional[str] = None
-    ) -> tf.Tensor:
-        """Get TF graph tensor and assign it to class namespace.
+    @abstractmethod
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
 
-        Parameters
-        ----------
-        tensor_name : str
-            name of tensor to get
-        attr_name : Optional[str], optional
-            if specified, class attribute with this name will be created and tensor will
-            be assigned to it, by default None
+    @abstractmethod
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
 
-        Returns
-        -------
-        tf.Tensor
-            loaded tensor
-        """
-        # do not use os.path.join as it doesn't work on Windows
-        tensor_path = "/".join((self.load_prefix, tensor_name))
-        tensor = self.graph.get_tensor_by_name(tensor_path)
-        if attr_name:
-            setattr(self, attr_name, tensor)
-            return tensor
-        else:
-            return tensor
-
-    @staticmethod
-    def _load_graph(
-        frozen_graph_filename: "Path",
-        prefix: str = "load",
-        default_tf_graph: bool = False,
-        input_map: Optional[dict] = None,
-    ):
-        # We load the protobuf file from the disk and parse it to retrieve the
-        # unserialized graph_def
-        with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f:
-            graph_def = tf.GraphDef()
-            graph_def.ParseFromString(f.read())
-
-            if default_tf_graph:
-                tf.import_graph_def(
-                    graph_def,
-                    input_map=input_map,
-                    return_elements=None,
-                    name=prefix,
-                    producer_op_list=None,
-                )
-                graph = tf.get_default_graph()
-            else:
-                # Then, we can use again a convenient built-in function to import
-                # a graph_def into the  current default Graph
-                with tf.Graph().as_default() as graph:
-                    tf.import_graph_def(
-                        graph_def,
-                        input_map=None,
-                        return_elements=None,
-                        name=prefix,
-                        producer_op_list=None,
-                    )
-
-            return graph
-
-    @staticmethod
-    def sort_input(
-        coord: np.ndarray,
-        atom_type: np.ndarray,
-        sel_atoms: Optional[List[int]] = None,
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
         mixed_type: bool = False,
-    ):
-        """Sort atoms in the system according their types.
+        **kwargs: Dict[str, Any],
+    ) -> np.ndarray:
+        """Evaluate descriptors by using this DP.
 
         Parameters
         ----------
-        coord
+        coords
             The coordinates of atoms.
-            Should be of shape [nframes, natoms, 3]
-        atom_type
-            The type of atoms
-            Should be of shape [natoms]
-        sel_atoms
-            The selected atoms by type
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        efield
+            The external field on atoms.
+            The array should be of size nframes x natoms x 3
         mixed_type
             Whether to perform the mixed_type mode.
             If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
@@ -237,77 +209,214 @@ def sort_input(
 
         Returns
         -------
-        coord_out
-            The coordinates after sorting
-        atom_type_out
-            The atom types after sorting
-        idx_map
-            The index mapping from the input to the output.
-            For example coord_out = coord[:,idx_map,:]
-        sel_atom_type
-            Only output if sel_atoms is not None
-            The sorted selected atom types
-        sel_idx_map
-            Only output if sel_atoms is not None
-            The index mapping from the selected atoms to sorted selected atoms.
+        descriptor
+            Descriptors.
         """
-        if mixed_type:
-            # mixed_type need not to resort
-            natoms = atom_type[0].size
-            idx_map = np.arange(natoms)
-            return coord, atom_type, idx_map
-        if sel_atoms is not None:
-            selection = [False] * np.size(atom_type)
-            for ii in sel_atoms:
-                selection += atom_type == ii
-            sel_atom_type = atom_type[selection]
-        natoms = atom_type.size
-        idx = np.arange(natoms)
-        idx_map = np.lexsort((idx, atom_type))
-        nframes = coord.shape[0]
-        coord = coord.reshape([nframes, -1, 3])
-        coord = np.reshape(coord[:, idx_map, :], [nframes, -1])
-        atom_type = atom_type[idx_map]
-        if sel_atoms is not None:
-            sel_natoms = np.size(sel_atom_type)
-            sel_idx = np.arange(sel_natoms)
-            sel_idx_map = np.lexsort((sel_idx, sel_atom_type))
-            sel_atom_type = sel_atom_type[sel_idx_map]
-            return coord, atom_type, idx_map, sel_atom_type, sel_idx_map
-        else:
-            return coord, atom_type, idx_map
+        raise NotImplementedError
+
+    def eval_typeebd(self) -> np.ndarray:
+        """Evaluate output of type embedding network by using this model.
+
+        Returns
+        -------
+        np.ndarray
+            The output of type embedding network. The shape is [ntypes, o_size],
+            where ntypes is the number of types, and o_size is the number of nodes
+            in the output layer.
+
+        Raises
+        ------
+        KeyError
+            If the model does not enable type embedding.
+        """
+        raise NotImplementedError
+
+    def _check_mixed_types(self, atom_types: np.ndarray) -> bool:
+        """Check if atom types of all frames are the same.
 
-    @staticmethod
-    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
-        """Reverse mapping of a vector according to the index map.
+        Traditional descriptors like se_e2_a requires all the frames to
+        have the same atom types.
 
         Parameters
         ----------
-        vec
-            Input vector. Be of shape [nframes, natoms, -1]
-        imap
-            Index map. Be of shape [natoms]
+        atom_types : np.ndarray
+            The atom types of all frames, in shape nframes * natoms.
+        """
+        if np.count_nonzero(atom_types[0] == -1) > 0:
+            # assume mixed_types if there are virtual types, even when
+            # the atom types of all frames are the same
+            return False
+        return np.all(np.equal(atom_types, atom_types[0]))
 
-        Returns
-        -------
-        vec_out
-            Reverse mapped vector.
+    @property
+    @abstractmethod
+    def model_type(self) -> "DeepEval":
+        """The the evaluator of the model type."""
+
+    @abstractmethod
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
         """
-        ret = np.zeros(vec.shape)
-        # for idx,ii in enumerate(imap) :
-        #     ret[:,ii,:] = vec[:,idx,:]
-        ret[:, imap, :] = vec
-        return ret
-
-    def make_natoms_vec(
-        self, atom_types: np.ndarray, mixed_type: bool = False
+
+    def get_numb_dos(self) -> int:
+        """Get the number of DOS."""
+        raise NotImplementedError
+
+    def get_has_efield(self):
+        """Check if the model has efield."""
+        return False
+
+    def get_has_spin(self):
+        """Check if the model has spin atom types."""
+        return False
+
+    @abstractmethod
+    def get_ntypes_spin(self) -> int:
+        """Get the number of spin atom types of this model. Only used in old implement."""
+
+
+class DeepEval(ABC):
+    """High-level Deep Evaluator interface.
+
+    The specific DeepEval, such as DeepPot and DeepTensor, should inherit
+    from this class. This class provides a high-level interface on the top
+    of the low-level interface.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __new__(cls, model_file: str, *args, **kwargs):
+        if cls is DeepEval:
+            deep_eval = DeepEvalBackend(
+                model_file,
+                ModelOutputDef(FittingOutputDef([])),
+                *args,
+                **kwargs,
+            )
+            return super().__new__(deep_eval.model_type)
+        return super().__new__(cls)
+
+    def __init__(
+        self,
+        model_file: str,
+        *args: List[Any],
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        self.deep_eval = DeepEvalBackend(
+            model_file,
+            self.output_def,
+            *args,
+            auto_batch_size=auto_batch_size,
+            neighbor_list=neighbor_list,
+            **kwargs,
+        )
+        if self.deep_eval.get_has_spin() and hasattr(self, "output_def_mag"):
+            self.deep_eval.output_def = self.output_def_mag
+
+    @property
+    @abstractmethod
+    def output_def(self) -> ModelOutputDef:
+        """Returns the output variable definitions."""
+
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
+        return self.deep_eval.get_rcut()
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return self.deep_eval.get_ntypes()
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.deep_eval.get_type_map()
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.deep_eval.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.deep_eval.get_dim_aparam()
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: np.ndarray,
+        mixed_type: bool = False,
+    ) -> Tuple[int, int]:
+        if mixed_type or atom_types.ndim > 1:
+            natoms = len(atom_types[0])
+        else:
+            natoms = len(atom_types)
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def _expande_atype(self, atype: np.ndarray, nframes: int, mixed_type: bool):
+        if not mixed_type:
+            atype = np.tile(atype.reshape(1, -1), (nframes, 1))
+        return atype
+
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+        **kwargs: Dict[str, Any],
     ) -> np.ndarray:
-        """Make the natom vector used by deepmd-kit.
+        """Evaluate descriptors by using this DP.
 
         Parameters
         ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
         atom_types
-            The type of atoms
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        efield
+            The external field on atoms.
+            The array should be of size nframes x natoms x 3
         mixed_type
             Whether to perform the mixed_type mode.
             If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
@@ -315,26 +424,27 @@ def make_natoms_vec(
 
         Returns
         -------
-        natoms
-            The number of atoms. This tensor has the length of Ntypes + 2
-            natoms[0]: number of local atoms
-            natoms[1]: total number of atoms held by this processor
-            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
-
+        descriptor
+            Descriptors.
         """
-        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
-        if mixed_type:
-            natoms = atom_types[0].size
-        else:
-            natoms = atom_types.size
-        natoms_vec[0] = natoms
-        natoms_vec[1] = natoms
-        if mixed_type:
-            natoms_vec[2] = natoms
-            return natoms_vec
-        for ii in range(self.ntypes):
-            natoms_vec[ii + 2] = np.count_nonzero(atom_types == ii)
-        return natoms_vec
+        (
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        descriptor = self.deep_eval.eval_descriptor(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        return descriptor
 
     def eval_typeebd(self) -> np.ndarray:
         """Evaluate output of type embedding network by using this model.
@@ -353,105 +463,80 @@ def eval_typeebd(self) -> np.ndarray:
 
         See Also
         --------
-        deepmd.utils.type_embed.TypeEmbedNet : The type embedding network.
+        deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network.
 
         Examples
         --------
         Get the output of type embedding network of `graph.pb`:
 
         >>> from deepmd.infer import DeepPotential
-        >>> dp = DeepPotential('graph.pb')
+        >>> dp = DeepPotential("graph.pb")
         >>> dp.eval_typeebd()
         """
-        t_typeebd = self._get_tensor("t_typeebd:0")
-        [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={})
-        return typeebd
-
-    def build_neighbor_list(
-        self,
-        coords: np.ndarray,
-        cell: Optional[np.ndarray],
-        atype: np.ndarray,
-        imap: np.ndarray,
-        neighbor_list,
-    ):
-        """Make the mesh with neighbor list for a single frame.
+        return self.deep_eval.eval_typeebd()
+
+    def _standard_input(self, coords, cells, atom_types, fparam, aparam, mixed_type):
+        coords = np.array(coords)
+        if cells is not None:
+            cells = np.array(cells)
+        atom_types = np.array(atom_types, dtype=np.int32)
+        if fparam is not None:
+            fparam = np.array(fparam)
+        if aparam is not None:
+            aparam = np.array(aparam)
+        natoms, nframes = self._get_natoms_and_nframes(coords, atom_types, mixed_type)
+        atom_types = self._expande_atype(atom_types, nframes, mixed_type)
+        coords = coords.reshape(nframes, natoms, 3)
+        if cells is not None:
+            cells = cells.reshape(nframes, 3, 3)
+        if fparam is not None:
+            fdim = self.get_dim_fparam()
+            if fparam.size == nframes * fdim:
+                fparam = np.reshape(fparam, [nframes, fdim])
+            elif fparam.size == fdim:
+                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d or %d"
+                    % (nframes, fdim, fdim)
+                )
+        if aparam is not None:
+            fdim = self.get_dim_aparam()
+            if aparam.size == nframes * natoms * fdim:
+                aparam = np.reshape(aparam, [nframes, natoms * fdim])
+            elif aparam.size == natoms * fdim:
+                aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
+            elif aparam.size == fdim:
+                aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d"
+                    % (nframes, natoms, fdim, natoms, fdim, fdim)
+                )
+        return coords, cells, atom_types, fparam, aparam, nframes, natoms
 
-        Parameters
-        ----------
-        coords : np.ndarray
-            The coordinates of atoms. Should be of shape [natoms, 3]
-        cell : Optional[np.ndarray]
-            The cell of the system. Should be of shape [3, 3]
-        atype : np.ndarray
-            The type of atoms. Should be of shape [natoms]
-        imap : np.ndarray
-            The index map of atoms. Should be of shape [natoms]
-        neighbor_list : ase.neighborlist.NewPrimitiveNeighborList
-            ASE neighbor list. The following method or attribute will be
-            used/set: bothways, self_interaction, update, build, first_neigh,
-            pair_second, offset_vec.
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
 
-        Returns
-        -------
-        natoms_vec : np.ndarray
-            The number of atoms. This tensor has the length of Ntypes + 2
-            natoms[0]: nloc
-            natoms[1]: nall
-            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc
-        coords : np.ndarray
-            The coordinates of atoms, including ghost atoms. Should be of
-            shape [nframes, nall, 3]
-        atype : np.ndarray
-            The type of atoms, including ghost atoms. Should be of shape [nall]
-        mesh : np.ndarray
-            The mesh in nei_mode=4.
-        imap : np.ndarray
-            The index map of atoms. Should be of shape [nall]
-        ghost_map : np.ndarray
-            The index map of ghost atoms. Should be of shape [nghost]
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
         """
-        pbc = np.repeat(cell is not None, 3)
-        cell = cell.reshape(3, 3)
-        positions = coords.reshape(-1, 3)
-        neighbor_list.bothways = True
-        neighbor_list.self_interaction = False
-        if neighbor_list.update(pbc, cell, positions):
-            neighbor_list.build(pbc, cell, positions)
-        first_neigh = neighbor_list.first_neigh.copy()
-        pair_second = neighbor_list.pair_second.copy()
-        offset_vec = neighbor_list.offset_vec.copy()
-        # get out-of-box neighbors
-        out_mask = np.any(offset_vec != 0, axis=1)
-        out_idx = pair_second[out_mask]
-        out_offset = offset_vec[out_mask]
-        out_coords = positions[out_idx] + out_offset.dot(cell)
-        atype = np.array(atype, dtype=int)
-        out_atype = atype[out_idx]
-
-        nloc = positions.shape[0]
-        nghost = out_idx.size
-        all_coords = np.concatenate((positions, out_coords), axis=0)
-        all_atype = np.concatenate((atype, out_atype), axis=0)
-        # convert neighbor indexes
-        ghost_map = pair_second[out_mask]
-        pair_second[out_mask] = np.arange(nloc, nloc + nghost)
-        # get the mesh
-        mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int)
-        mesh[0] = nloc
-        # ilist
-        mesh[16 : 16 + nloc] = np.arange(nloc)
-        # numnei
-        mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1]
-        # jlist
-        mesh[16 + nloc * 2 :] = pair_second
-
-        # natoms_vec
-        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
-        natoms_vec[0] = nloc
-        natoms_vec[1] = nloc + nghost
-        for ii in range(self.ntypes):
-            natoms_vec[ii + 2] = np.count_nonzero(atype == ii)
-        # imap append ghost atoms
-        imap = np.concatenate((imap, np.arange(nloc, nloc + nghost)))
-        return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map
+        return self.deep_eval.get_sel_type()
+
+    def _get_sel_natoms(self, atype) -> int:
+        return np.sum(np.isin(atype, self.get_sel_type()).astype(int))
+
+    @property
+    def has_efield(self) -> bool:
+        """Check if the model has efield."""
+        return self.deep_eval.get_has_efield()
+
+    @property
+    def has_spin(self) -> bool:
+        """Check if the model has spin."""
+        return self.deep_eval.get_has_spin()
+
+    def get_ntypes_spin(self) -> int:
+        """Get the number of spin atom types of this model. Only used in old implement."""
+        return self.deep_eval.get_ntypes_spin()
diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py
index c1f981ef86..f857619871 100644
--- a/deepmd/infer/deep_polar.py
+++ b/deepmd/infer/deep_polar.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    TYPE_CHECKING,
     List,
     Optional,
+    Union,
 )
 
 import numpy as np
@@ -11,118 +11,46 @@
     DeepTensor,
 )
 
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
 
 class DeepPolar(DeepTensor):
-    """Constructor.
+    """Deep polar model.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-    neighbor_list : ase.neighborlist.NeighborList, optional
-        The neighbor list object. If None, then build the native neighbor list.
-
-    Warnings
-    --------
-    For developers: `DeepTensor` initializer must be called at the end after
-    `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        input_map: Optional[dict] = None,
-        neighbor_list=None,
-    ) -> None:
-        # use this in favor of dict update to move attribute from class to
-        # instance namespace
-        self.tensors = dict(
-            {
-                # output tensor
-                "t_tensor": "o_polar:0",
-            },
-            **self.tensors,
-        )
-
-        DeepTensor.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-
-    def get_dim_fparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_dim_aparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
+    @property
+    def output_tensor_name(self) -> str:
+        return "polar"
 
 
 class DeepGlobalPolar(DeepTensor):
-    """Constructor.
-
-    Parameters
-    ----------
-    model_file : str
-        The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    neighbor_list : ase.neighborlist.NeighborList, optional
-        The neighbor list object. If None, then build the native neighbor list.
-    """
-
-    def __init__(
-        self,
-        model_file: str,
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        neighbor_list=None,
-    ) -> None:
-        self.tensors.update(
-            {
-                "t_sel_type": "model_attr/sel_type:0",
-                # output tensor
-                "t_tensor": "o_global_polar:0",
-            }
-        )
-
-        DeepTensor.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            neighbor_list=None,
-        )
+    @property
+    def output_tensor_name(self) -> str:
+        return "global_polar"
 
     def eval(
         self,
         coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
+        cells: Optional[np.ndarray],
+        atom_types: Union[List[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+        **kwargs: dict,
     ) -> np.ndarray:
         """Evaluate the model.
 
@@ -135,31 +63,35 @@ def eval(
             The cell of the region.
             If None then non-PBC is assumed, otherwise using PBC.
             The array should be of size nframes x 9
-        atom_types
+        atom_types : list[int] or np.ndarray
             The atom types
             The list should contain natoms ints
         atomic
-            Not used in this model
+            If True (default), return the atomic tensor
+            Otherwise return the global tensor
         fparam
             Not used in this model
         aparam
             Not used in this model
-        efield
-            Not used in this model
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
 
         Returns
         -------
         tensor
             The returned tensor
-            If atomic == False then of size nframes x variable_dof
-            else of size nframes x natoms x variable_dof
+            If atomic == False then of size nframes x output_dim
+            else of size nframes x natoms x output_dim
         """
-        return DeepTensor.eval(self, coords, cells, atom_types, atomic=False)
-
-    def get_dim_fparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_dim_aparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
+        return super().eval(
+            coords,
+            cells,
+            atom_types,
+            atomic=atomic,
+            fparam=fparam,
+            aparam=aparam,
+            mixed_type=mixed_type,
+            **kwargs,
+        )
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index 81cfdde7a8..bc0bfc9599 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
 from typing import (
-    TYPE_CHECKING,
-    Callable,
+    Any,
+    Dict,
     List,
     Optional,
     Tuple,
@@ -11,681 +10,195 @@
 
 import numpy as np
 
-from deepmd.common import (
-    make_default_mesh,
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
 )
-from deepmd.infer.data_modifier import (
-    DipoleChargeModifier,
-)
-from deepmd.infer.deep_eval import (
+
+from .deep_eval import (
     DeepEval,
 )
-from deepmd.utils.batch_size import (
-    AutoBatchSize,
-)
-from deepmd.utils.sess import (
-    run_sess,
-)
-
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
-log = logging.getLogger(__name__)
 
 
 class DeepPot(DeepEval):
-    """Constructor.
+    """Potential energy model.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    auto_batch_size : bool or int or AutomaticBatchSize, default: True
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
         If True, automatic batch size will be used. If int, it will be used
         as the initial batch size.
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
     neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
         The ASE neighbor list class to produce the neighbor list. If None, the
         neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
 
     Examples
     --------
     >>> from deepmd.infer import DeepPot
     >>> import numpy as np
-    >>> dp = DeepPot('graph.pb')
-    >>> coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+    >>> dp = DeepPot("graph.pb")
+    >>> coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
     >>> cell = np.diag(10 * np.ones(3)).reshape([1, -1])
-    >>> atype = [1,0,1]
+    >>> atype = [1, 0, 1]
     >>> e, f, v = dp.eval(coord, cell, atype)
 
     where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
-
-    Warnings
-    --------
-    For developers: `DeepTensor` initializer must be called at the end after
-    `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
     """
 
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
-        input_map: Optional[dict] = None,
-        neighbor_list=None,
-    ) -> None:
-        # add these tensors on top of what is defined by DeepTensor Class
-        # use this in favor of dict update to move attribute from class to
-        # instance namespace
-        self.tensors = {
-            # descrpt attrs
-            "t_ntypes": "descrpt_attr/ntypes:0",
-            "t_rcut": "descrpt_attr/rcut:0",
-            # fitting attrs
-            "t_dfparam": "fitting_attr/dfparam:0",
-            "t_daparam": "fitting_attr/daparam:0",
-            # model attrs
-            "t_tmap": "model_attr/tmap:0",
-            # inputs
-            "t_coord": "t_coord:0",
-            "t_type": "t_type:0",
-            "t_natoms": "t_natoms:0",
-            "t_box": "t_box:0",
-            "t_mesh": "t_mesh:0",
-            # add output tensors
-            "t_energy": "o_energy:0",
-            "t_force": "o_force:0",
-            "t_virial": "o_virial:0",
-            "t_ae": "o_atom_energy:0",
-            "t_av": "o_atom_virial:0",
-            "t_descriptor": "o_descriptor:0",
-        }
-        DeepEval.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            auto_batch_size=auto_batch_size,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-
-        # load optional tensors
-        operations = [op.name for op in self.graph.get_operations()]
-        # check if the graph has these operations:
-        # if yes add them
-
-        if ("%s/t_efield" % load_prefix) in operations:
-            self.tensors.update({"t_efield": "t_efield:0"})
-            self.has_efield = True
-        else:
-            log.debug("Could not get tensor 't_efield:0'")
-            self.t_efield = None
-            self.has_efield = False
-
-        if ("%s/t_fparam" % load_prefix) in operations:
-            self.tensors.update({"t_fparam": "t_fparam:0"})
-            self.has_fparam = True
-        else:
-            log.debug("Could not get tensor 't_fparam:0'")
-            self.t_fparam = None
-            self.has_fparam = False
-
-        if ("%s/t_aparam" % load_prefix) in operations:
-            self.tensors.update({"t_aparam": "t_aparam:0"})
-            self.has_aparam = True
-        else:
-            log.debug("Could not get tensor 't_aparam:0'")
-            self.t_aparam = None
-            self.has_aparam = False
-
-        if ("%s/spin_attr/ntypes_spin" % load_prefix) in operations:
-            self.tensors.update({"t_ntypes_spin": "spin_attr/ntypes_spin:0"})
-            self.has_spin = True
-        else:
-            self.ntypes_spin = 0
-            self.has_spin = False
-
-        # now load tensors to object attributes
-        for attr_name, tensor_name in self.tensors.items():
-            try:
-                self._get_tensor(tensor_name, attr_name)
-            except KeyError:
-                if attr_name != "t_descriptor":
-                    raise
-
-        self._run_default_sess()
-        self.tmap = self.tmap.decode("UTF-8").split()
-
-        # setup modifier
-        try:
-            t_modifier_type = self._get_tensor("modifier_attr/type:0")
-            self.modifier_type = run_sess(self.sess, t_modifier_type).decode("UTF-8")
-        except (ValueError, KeyError):
-            self.modifier_type = None
-
-        try:
-            t_jdata = self._get_tensor("train_attr/training_script:0")
-            jdata = run_sess(self.sess, t_jdata).decode("UTF-8")
-            import json
-
-            jdata = json.loads(jdata)
-            self.descriptor_type = jdata["model"]["descriptor"]["type"]
-        except (ValueError, KeyError):
-            self.descriptor_type = None
-
-        if self.modifier_type == "dipole_charge":
-            t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0")
-            t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0")
-            t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0")
-            t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0")
-            t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0")
-            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess(
-                self.sess,
+    @property
+    def output_def(self) -> ModelOutputDef:
+        """Get the output definition of this model."""
+        return ModelOutputDef(
+            FittingOutputDef(
                 [
-                    t_mdl_name,
-                    t_mdl_charge_map,
-                    t_sys_charge_map,
-                    t_ewald_h,
-                    t_ewald_beta,
-                ],
-            )
-            mdl_name = mdl_name.decode("UTF-8")
-            mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()]
-            sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()]
-            self.dm = DipoleChargeModifier(
-                mdl_name,
-                mdl_charge_map,
-                sys_charge_map,
-                ewald_h=ewald_h,
-                ewald_beta=ewald_beta,
+                    OutputVariableDef(
+                        "energy",
+                        shape=[1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                        atomic=True,
+                    ),
+                ]
             )
+        )
 
-    def _run_default_sess(self):
-        if self.has_spin is True:
-            [
-                self.ntypes,
-                self.ntypes_spin,
-                self.rcut,
-                self.dfparam,
-                self.daparam,
-                self.tmap,
-            ] = run_sess(
-                self.sess,
-                [
-                    self.t_ntypes,
-                    self.t_ntypes_spin,
-                    self.t_rcut,
-                    self.t_dfparam,
-                    self.t_daparam,
-                    self.t_tmap,
-                ],
-            )
-        else:
-            [self.ntypes, self.rcut, self.dfparam, self.daparam, self.tmap] = run_sess(
-                self.sess,
+    @property
+    def output_def_mag(self) -> ModelOutputDef:
+        """Get the output definition of this model with magnetic parts."""
+        return ModelOutputDef(
+            FittingOutputDef(
                 [
-                    self.t_ntypes,
-                    self.t_rcut,
-                    self.t_dfparam,
-                    self.t_daparam,
-                    self.t_tmap,
-                ],
+                    OutputVariableDef(
+                        "energy",
+                        shape=[1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                        atomic=True,
+                        magnetic=True,
+                    ),
+                ]
             )
-
-    def get_ntypes(self) -> int:
-        """Get the number of atom types of this model."""
-        return self.ntypes
-
-    def get_ntypes_spin(self):
-        """Get the number of spin atom types of this model."""
-        return self.ntypes_spin
-
-    def get_rcut(self) -> float:
-        """Get the cut-off radius of this model."""
-        return self.rcut
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map (element name of the atom types) of this model."""
-        return self.tmap
-
-    def get_sel_type(self) -> List[int]:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_descriptor_type(self) -> List[int]:
-        """Get the descriptor type of this model."""
-        return self.descriptor_type
-
-    def get_dim_fparam(self) -> int:
-        """Get the number (dimension) of frame parameters of this DP."""
-        return self.dfparam
-
-    def get_dim_aparam(self) -> int:
-        """Get the number (dimension) of atomic parameters of this DP."""
-        return self.daparam
-
-    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
-        """Wrapper method with auto batch size.
-
-        Parameters
-        ----------
-        inner_func : Callable
-            the method to be wrapped
-        numb_test : int
-            number of tests
-        natoms : int
-            number of atoms
-
-        Returns
-        -------
-        Callable
-            the wrapper
-        """
-        if self.auto_batch_size is not None:
-
-            def eval_func(*args, **kwargs):
-                return self.auto_batch_size.execute_all(
-                    inner_func, numb_test, natoms, *args, **kwargs
-                )
-
-        else:
-            eval_func = inner_func
-        return eval_func
-
-    def _get_natoms_and_nframes(
-        self,
-        coords: np.ndarray,
-        atom_types: Union[List[int], np.ndarray],
-        mixed_type: bool = False,
-    ) -> Tuple[int, int]:
-        if mixed_type:
-            natoms = len(atom_types[0])
-        else:
-            natoms = len(atom_types)
-        if natoms == 0:
-            assert coords.size == 0
-        else:
-            coords = np.reshape(np.array(coords), [-1, natoms * 3])
-        nframes = coords.shape[0]
-        return natoms, nframes
+        )
 
     def eval(
         self,
         coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
+        cells: Optional[np.ndarray],
+        atom_types: Union[List[int], np.ndarray],
         atomic: bool = False,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
         mixed_type: bool = False,
+        **kwargs: Dict[str, Any],
     ) -> Tuple[np.ndarray, ...]:
-        """Evaluate the energy, force and virial by using this DP.
+        """Evaluate energy, force, and virial. If atomic is True,
+        also return atomic energy and atomic virial.
 
         Parameters
         ----------
-        coords
-            The coordinates of atoms.
-            The array should be of size nframes x natoms x 3
-        cells
-            The cell of the region.
-            If None then non-PBC is assumed, otherwise using PBC.
-            The array should be of size nframes x 9
-        atom_types
-            The atom types
-            The list should contain natoms ints
-        atomic
-            Calculate the atomic energy and virial
-        fparam
-            The frame parameter.
-            The array can be of size :
-            - nframes x dim_fparam.
-            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
-        aparam
-            The atomic parameter
-            The array can be of size :
-            - nframes x natoms x dim_aparam.
-            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-            - dim_aparam. Then all frames and atoms are provided with the same aparam.
-        efield
-            The external field on atoms.
-            The array should be of size nframes x natoms x 3
-        mixed_type
-            Whether to perform the mixed_type mode.
-            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
-            in which frames in a system may have different natoms_vec(s), with the same nloc.
+        coords : np.ndarray
+            The coordinates of the atoms, in shape (nframes, natoms, 3).
+        cells : np.ndarray
+            The cell vectors of the system, in shape (nframes, 9). If the system
+            is not periodic, set it to None.
+        atom_types : List[int] or np.ndarray
+            The types of the atoms. If mixed_type is False, the shape is (natoms,);
+            otherwise, the shape is (nframes, natoms).
+        atomic : bool, optional
+            Whether to return atomic energy and atomic virial, by default False.
+        fparam : np.ndarray, optional
+            The frame parameters, by default None.
+        aparam : np.ndarray, optional
+            The atomic parameters, by default None.
+        mixed_type : bool, optional
+            Whether the atom_types is mixed type, by default False.
+        **kwargs : Dict[str, Any]
+            Keyword arguments.
 
         Returns
         -------
         energy
-            The system energy.
+            The energy of the system, in shape (nframes,).
         force
-            The force on each atom
+            The force of the system, in shape (nframes, natoms, 3).
         virial
-            The virial
-        atom_energy
-            The atomic energy. Only returned when atomic == True
-        atom_virial
-            The atomic virial. Only returned when atomic == True
+            The virial of the system, in shape (nframes, 9).
+        atomic_energy
+            The atomic energy of the system, in shape (nframes, natoms). Only returned
+            when atomic is True.
+        atomic_virial
+            The atomic virial of the system, in shape (nframes, natoms, 9). Only returned
+            when atomic is True.
         """
-        # reshape coords before getting shape
-        natoms, numb_test = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        output = self._eval_func(self._eval_inner, numb_test, natoms)(
+        # This method has been used by:
+        # documentation python.md
+        # dp model_devi: +fparam, +aparam, +mixed_type
+        # dp test: +atomic, +fparam, +aparam, +efield, +mixed_type
+        # finetune: +mixed_type
+        # dpdata
+        # ase
+        (
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        results = self.deep_eval.eval(
             coords,
             cells,
             atom_types,
+            atomic,
             fparam=fparam,
             aparam=aparam,
-            atomic=atomic,
-            efield=efield,
-            mixed_type=mixed_type,
+            **kwargs,
         )
+        energy = results["energy_redu"].reshape(nframes, 1)
+        force = results["energy_derv_r"].reshape(nframes, natoms, 3)
+        virial = results["energy_derv_c_redu"].reshape(nframes, 9)
 
-        if self.modifier_type is not None:
-            if atomic:
-                raise RuntimeError("modifier does not support atomic modification")
-            me, mf, mv = self.dm.eval(coords, cells, atom_types)
-            output = list(output)  # tuple to list
-            e, f, v = output[:3]
-            output[0] += me.reshape(e.shape)
-            output[1] += mf.reshape(f.shape)
-            output[2] += mv.reshape(v.shape)
-            output = tuple(output)
-        return output
-
-    def _prepare_feed_dict(
-        self,
-        coords,
-        cells,
-        atom_types,
-        fparam=None,
-        aparam=None,
-        efield=None,
-        mixed_type=False,
-    ):
-        # standarize the shape of inputs
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        if mixed_type:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
-        else:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1])
-        coords = np.reshape(np.array(coords), [nframes, natoms * 3])
-        if cells is None:
-            pbc = False
-            # make cells to work around the requirement of pbc
-            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
-        else:
-            pbc = True
-            cells = np.array(cells).reshape([nframes, 9])
-
-        if self.has_fparam:
-            assert fparam is not None
-            fparam = np.array(fparam)
-        if self.has_aparam:
-            assert aparam is not None
-            aparam = np.array(aparam)
-        if self.has_efield:
-            assert (
-                efield is not None
-            ), "you are using a model with external field, parameter efield should be provided"
-            efield = np.array(efield)
-
-        # reshape the inputs
-        if self.has_fparam:
-            fdim = self.get_dim_fparam()
-            if fparam.size == nframes * fdim:
-                fparam = np.reshape(fparam, [nframes, fdim])
-            elif fparam.size == fdim:
-                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
-            else:
-                raise RuntimeError(
-                    "got wrong size of frame param, should be either %d x %d or %d"
-                    % (nframes, fdim, fdim)
+        if atomic:
+            if self.get_ntypes_spin() > 0:
+                ntypes_real = self.get_ntypes() - self.get_ntypes_spin()
+                natoms_real = sum(
+                    [
+                        np.count_nonzero(np.array(atom_types[0]) == ii)
+                        for ii in range(ntypes_real)
+                    ]
                 )
-        if self.has_aparam:
-            fdim = self.get_dim_aparam()
-            if aparam.size == nframes * natoms * fdim:
-                aparam = np.reshape(aparam, [nframes, natoms * fdim])
-            elif aparam.size == natoms * fdim:
-                aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
-            elif aparam.size == fdim:
-                aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
             else:
-                raise RuntimeError(
-                    "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d"
-                    % (nframes, natoms, fdim, natoms, fdim, fdim)
-                )
-
-        # sort inputs
-        coords, atom_types, imap = self.sort_input(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        if self.has_efield:
-            efield = np.reshape(efield, [nframes, natoms, 3])
-            efield = efield[:, imap, :]
-            efield = np.reshape(efield, [nframes, natoms * 3])
-        if self.has_aparam:
-            aparam = np.reshape(aparam, [nframes, natoms, fdim])
-            aparam = aparam[:, imap, :]
-            aparam = np.reshape(aparam, [nframes, natoms * fdim])
-
-        # make natoms_vec and default_mesh
-        if self.neighbor_list is None:
-            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-            assert natoms_vec[0] == natoms
-            mesh = make_default_mesh(pbc, mixed_type)
-            ghost_map = None
-        else:
-            if nframes > 1:
-                raise NotImplementedError(
-                    "neighbor_list does not support multiple frames"
-                )
-            (
-                natoms_vec,
-                coords,
-                atom_types,
-                mesh,
-                imap,
-                ghost_map,
-            ) = self.build_neighbor_list(
-                coords,
-                cells if cells is not None else None,
-                atom_types,
-                imap,
-                self.neighbor_list,
-            )
-
-        # evaluate
-        feed_dict_test = {}
-        feed_dict_test[self.t_natoms] = natoms_vec
-        if mixed_type:
-            feed_dict_test[self.t_type] = atom_types.reshape([-1])
-        else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
-                [-1]
+                natoms_real = natoms
+            atomic_energy = results["energy"].reshape(nframes, natoms_real, 1)
+            atomic_virial = results["energy_derv_c"].reshape(nframes, natoms, 9)
+            result = (
+                energy,
+                force,
+                virial,
+                atomic_energy,
+                atomic_virial,
             )
-        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-
-        if len(self.t_box.shape) == 1:
-            feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        elif len(self.t_box.shape) == 2:
-            feed_dict_test[self.t_box] = cells
         else:
-            raise RuntimeError
-        if self.has_efield:
-            feed_dict_test[self.t_efield] = np.reshape(efield, [-1])
-        feed_dict_test[self.t_mesh] = mesh
-        if self.has_fparam:
-            feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
-        if self.has_aparam:
-            feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1])
-        return feed_dict_test, imap, natoms_vec, ghost_map
-
-    def _eval_inner(
-        self,
-        coords,
-        cells,
-        atom_types,
-        fparam=None,
-        aparam=None,
-        atomic=False,
-        efield=None,
-        mixed_type=False,
-    ):
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
-            coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
-        )
-
-        nloc = natoms_vec[0]
-        nall = natoms_vec[1]
-
-        t_out = [self.t_energy, self.t_force, self.t_virial]
-        if atomic:
-            t_out += [self.t_ae, self.t_av]
-
-        v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test)
-        energy = v_out[0]
-        force = v_out[1]
-        virial = v_out[2]
-        if atomic:
-            ae = v_out[3]
-            av = v_out[4]
-
-        if self.has_spin:
-            ntypes_real = self.ntypes - self.ntypes_spin
-            natoms_real = sum(
-                [
-                    np.count_nonzero(np.array(atom_types) == ii)
-                    for ii in range(ntypes_real)
-                ]
+            result = (
+                energy,
+                force,
+                virial,
             )
-        else:
-            natoms_real = natoms
-        if ghost_map is not None:
-            # add the value of ghost atoms to real atoms
-            force = np.reshape(force, [nframes, -1, 3])
-            np.add.at(force[0], ghost_map, force[0, nloc:])
-            if atomic:
-                av = np.reshape(av, [nframes, -1, 9])
-                np.add.at(av[0], ghost_map, av[0, nloc:])
+        if self.deep_eval.get_has_spin():
+            force_mag = results["energy_derv_r_mag"].reshape(nframes, natoms, 3)
+            mask_mag = results["mask_mag"].reshape(nframes, natoms, 1)
+            result = (*list(result), force_mag, mask_mag)
+        return result
 
-        # reverse map of the outputs
-        force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap)
-        if atomic:
-            ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap[:natoms_real])
-            av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap)
-
-        energy = np.reshape(energy, [nframes, 1])
-        force = np.reshape(force, [nframes, nall, 3])
-        if nloc < nall:
-            force = force[:, :nloc, :]
-        virial = np.reshape(virial, [nframes, 9])
-        if atomic:
-            ae = np.reshape(ae, [nframes, natoms_real, 1])
-            av = np.reshape(av, [nframes, nall, 9])
-            if nloc < nall:
-                av = av[:, :nloc, :]
-            return energy, force, virial, ae, av
-        else:
-            return energy, force, virial
-
-    def eval_descriptor(
-        self,
-        coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
-        fparam: Optional[np.ndarray] = None,
-        aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
-        mixed_type: bool = False,
-    ) -> np.array:
-        """Evaluate descriptors by using this DP.
-
-        Parameters
-        ----------
-        coords
-            The coordinates of atoms.
-            The array should be of size nframes x natoms x 3
-        cells
-            The cell of the region.
-            If None then non-PBC is assumed, otherwise using PBC.
-            The array should be of size nframes x 9
-        atom_types
-            The atom types
-            The list should contain natoms ints
-        fparam
-            The frame parameter.
-            The array can be of size :
-            - nframes x dim_fparam.
-            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
-        aparam
-            The atomic parameter
-            The array can be of size :
-            - nframes x natoms x dim_aparam.
-            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-            - dim_aparam. Then all frames and atoms are provided with the same aparam.
-        efield
-            The external field on atoms.
-            The array should be of size nframes x natoms x 3
-        mixed_type
-            Whether to perform the mixed_type mode.
-            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
-            in which frames in a system may have different natoms_vec(s), with the same nloc.
-
-        Returns
-        -------
-        descriptor
-            Descriptors.
-        """
-        natoms, numb_test = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)(
-            coords,
-            cells,
-            atom_types,
-            fparam=fparam,
-            aparam=aparam,
-            efield=efield,
-            mixed_type=mixed_type,
-        )
-        return descriptor
 
-    def _eval_descriptor_inner(
-        self,
-        coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
-        fparam: Optional[np.ndarray] = None,
-        aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
-        mixed_type: bool = False,
-    ) -> np.array:
-        natoms, nframes = self._get_natoms_and_nframes(
-            coords, atom_types, mixed_type=mixed_type
-        )
-        feed_dict_test, imap, natoms_vec, ghost_map = self._prepare_feed_dict(
-            coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
-        )
-        (descriptor,) = run_sess(
-            self.sess, [self.t_descriptor], feed_dict=feed_dict_test
-        )
-        imap = imap[:natoms]
-        return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
+__all__ = ["DeepPot"]
diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py
index a803eb0c6b..14e13e7f84 100644
--- a/deepmd/infer/deep_tensor.py
+++ b/deepmd/infer/deep_tensor.py
@@ -1,160 +1,55 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    abstractmethod,
+)
 from typing import (
-    TYPE_CHECKING,
-    ClassVar,
-    Dict,
     List,
     Optional,
     Tuple,
+    Union,
 )
 
 import numpy as np
 
-from deepmd.common import (
-    make_default_mesh,
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
 )
 from deepmd.infer.deep_eval import (
     DeepEval,
 )
-from deepmd.utils.sess import (
-    run_sess,
-)
-
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
 
 
 class DeepTensor(DeepEval):
-    """Evaluates a tensor model.
+    """Deep Tensor Model.
 
     Parameters
     ----------
-    model_file: str
+    model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-    neighbor_list : ase.neighborlist.NeighborList, optional
-        The neighbor list object. If None, then build the native neighbor list.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    tensors: ClassVar[Dict[str, str]] = {
-        # descriptor attrs
-        "t_ntypes": "descrpt_attr/ntypes:0",
-        "t_rcut": "descrpt_attr/rcut:0",
-        # model attrs
-        "t_tmap": "model_attr/tmap:0",
-        "t_sel_type": "model_attr/sel_type:0",
-        "t_ouput_dim": "model_attr/output_dim:0",
-        # inputs
-        "t_coord": "t_coord:0",
-        "t_type": "t_type:0",
-        "t_natoms": "t_natoms:0",
-        "t_box": "t_box:0",
-        "t_mesh": "t_mesh:0",
-    }
-
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        input_map: Optional[dict] = None,
-        neighbor_list=None,
-    ) -> None:
-        """Constructor."""
-        DeepEval.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-            neighbor_list=neighbor_list,
-        )
-        # check model type
-        model_type = self.tensors["t_tensor"][2:-2]
-        assert (
-            self.model_type == model_type
-        ), f"expect {model_type} model but got {self.model_type}"
-
-        # now load tensors to object attributes
-        for attr_name, tensor_name in self.tensors.items():
-            self._get_tensor(tensor_name, attr_name)
-
-        # load optional tensors if possible
-        optional_tensors = {
-            "t_global_tensor": f"o_global_{model_type}:0",
-            "t_force": "o_force:0",
-            "t_virial": "o_virial:0",
-            "t_atom_virial": "o_atom_virial:0",
-        }
-        try:
-            # first make sure these tensor all exists (but do not modify self attr)
-            for attr_name, tensor_name in optional_tensors.items():
-                self._get_tensor(tensor_name)
-            # then put those into self.attrs
-            for attr_name, tensor_name in optional_tensors.items():
-                self._get_tensor(tensor_name, attr_name)
-        except KeyError:
-            self._support_gfv = False
-        else:
-            self.tensors.update(optional_tensors)
-            self._support_gfv = True
-
-        self._run_default_sess()
-        self.tmap = self.tmap.decode("UTF-8").split()
-
-    def _run_default_sess(self):
-        [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] = run_sess(
-            self.sess,
-            [
-                self.t_ntypes,
-                self.t_rcut,
-                self.t_tmap,
-                self.t_sel_type,
-                self.t_ouput_dim,
-            ],
-        )
-
-    def get_ntypes(self) -> int:
-        """Get the number of atom types of this model."""
-        return self.ntypes
-
-    def get_rcut(self) -> float:
-        """Get the cut-off radius of this model."""
-        return self.rcut
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map (element name of the atom types) of this model."""
-        return self.tmap
-
-    def get_sel_type(self) -> List[int]:
-        """Get the selected atom types of this model."""
-        return self.tselt
-
-    def get_dim_fparam(self) -> int:
-        """Get the number (dimension) of frame parameters of this DP."""
-        return self.dfparam
-
-    def get_dim_aparam(self) -> int:
-        """Get the number (dimension) of atomic parameters of this DP."""
-        return self.daparam
-
     def eval(
         self,
         coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
+        cells: Optional[np.ndarray],
+        atom_types: Union[List[int], np.ndarray],
         atomic: bool = True,
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
-        efield: Optional[np.ndarray] = None,
         mixed_type: bool = False,
+        **kwargs: dict,
     ) -> np.ndarray:
         """Evaluate the model.
 
@@ -167,7 +62,7 @@ def eval(
             The cell of the region.
             If None then non-PBC is assumed, otherwise using PBC.
             The array should be of size nframes x 9
-        atom_types
+        atom_types : list[int] or np.ndarray
             The atom types
             The list should contain natoms ints
         atomic
@@ -191,100 +86,39 @@ def eval(
             If atomic == False then of size nframes x output_dim
             else of size nframes x natoms x output_dim
         """
-        # standarize the shape of inputs
-        if mixed_type:
-            natoms = atom_types[0].size
-            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
-        else:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1])
-            natoms = atom_types.size
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
-        nframes = coords.shape[0]
-        if cells is None:
-            pbc = False
-            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
-        else:
-            pbc = True
-            cells = np.array(cells).reshape([nframes, 9])
-
-        # sort inputs
-        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
-            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        (
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        results = self.deep_eval.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
         )
-
-        # make natoms_vec and default_mesh
-        if self.neighbor_list is None:
-            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-            assert natoms_vec[0] == natoms
-            mesh = make_default_mesh(pbc, mixed_type)
-        else:
-            if nframes > 1:
-                raise NotImplementedError(
-                    "neighbor_list does not support multiple frames"
-                )
-            (
-                natoms_vec,
-                coords,
-                atom_types,
-                mesh,
-                imap,
-                _,
-            ) = self.build_neighbor_list(
-                coords,
-                cells if cells is not None else None,
-                atom_types,
-                imap,
-                self.neighbor_list,
-            )
-
-        # evaluate
-        feed_dict_test = {}
-        feed_dict_test[self.t_natoms] = natoms_vec
-        if mixed_type:
-            feed_dict_test[self.t_type] = atom_types.reshape([-1])
-        else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
-                [-1]
-            )
-        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = mesh
-
         if atomic:
-            assert (
-                "global" not in self.model_type
-            ), f"cannot do atomic evaluation with model type {self.model_type}"
-            t_out = [self.t_tensor]
+            return results[self.output_tensor_name].reshape(nframes, natoms, -1)
         else:
-            assert (
-                self._support_gfv or "global" in self.model_type
-            ), f"do not support global tensor evaluation with old {self.model_type} model"
-            t_out = [self.t_global_tensor if self._support_gfv else self.t_tensor]
-        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
-        tensor = v_out[0]
-
-        # reverse map of the outputs
-        if atomic:
-            tensor = np.array(tensor)
-            tensor = self.reverse_map(
-                np.reshape(tensor, [nframes, -1, self.output_dim]), sel_imap
-            )
-            tensor = np.reshape(tensor, [nframes, len(sel_at), self.output_dim])
-        else:
-            tensor = np.reshape(tensor, [nframes, self.output_dim])
-
-        return tensor
+            return results[f"{self.output_tensor_name}_redu"].reshape(nframes, -1)
 
     def eval_full(
         self,
         coords: np.ndarray,
-        cells: np.ndarray,
-        atom_types: List[int],
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
         atomic: bool = False,
-        fparam: Optional[np.array] = None,
-        aparam: Optional[np.array] = None,
-        efield: Optional[np.array] = None,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
         mixed_type: bool = False,
+        **kwargs: dict,
     ) -> Tuple[np.ndarray, ...]:
         """Evaluate the model with interface similar to the energy model.
         Will return global tensor, component-wise force and virial
@@ -308,8 +142,6 @@ def eval_full(
             Not used in this model
         aparam
             Not used in this model
-        efield
-            Not used in this model
         mixed_type
             Whether to perform the mixed_type mode.
             If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
@@ -333,114 +165,72 @@ def eval_full(
             The atomic virial. Only returned when atomic == True
             shape: [nframes x nout x natoms x 9]
         """
-        assert self._support_gfv, "do not support eval_full with old tensor model"
-
-        # standarize the shape of inputs
-        if mixed_type:
-            natoms = atom_types[0].size
-            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
-        else:
-            atom_types = np.array(atom_types, dtype=int).reshape([-1])
-            natoms = atom_types.size
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
-        nframes = coords.shape[0]
-        if cells is None:
-            pbc = False
-            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
-        else:
-            pbc = True
-            cells = np.array(cells).reshape([nframes, 9])
-        nout = self.output_dim
-
-        # sort inputs
-        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
-            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        (
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        results = self.deep_eval.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
         )
 
-        # make natoms_vec and default_mesh
-        if self.neighbor_list is None:
-            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-            assert natoms_vec[0] == natoms
-            mesh = make_default_mesh(pbc, mixed_type)
-            ghost_map = None
-        else:
-            if nframes > 1:
-                raise NotImplementedError(
-                    "neighbor_list does not support multiple frames"
-                )
-            (
-                natoms_vec,
-                coords,
-                atom_types,
-                mesh,
-                imap,
-                ghost_map,
-            ) = self.build_neighbor_list(
-                coords,
-                cells if cells is not None else None,
-                atom_types,
-                imap,
-                self.neighbor_list,
+        energy = results[f"{self.output_tensor_name}_redu"].reshape(nframes, -1)
+        force = results[f"{self.output_tensor_name}_derv_r"].reshape(
+            nframes, -1, natoms, 3
+        )
+        virial = results[f"{self.output_tensor_name}_derv_c_redu"].reshape(
+            nframes, -1, 9
+        )
+        if atomic:
+            atomic_energy = results[self.output_tensor_name].reshape(
+                nframes, natoms, -1
+            )
+            atomic_virial = results[f"{self.output_tensor_name}_derv_c"].reshape(
+                nframes, -1, natoms, 9
+            )
+            return (
+                energy,
+                force,
+                virial,
+                atomic_energy,
+                atomic_virial,
             )
-
-        # evaluate
-        feed_dict_test = {}
-        feed_dict_test[self.t_natoms] = natoms_vec
-        if mixed_type:
-            feed_dict_test[self.t_type] = atom_types.reshape([-1])
         else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
-                [-1]
+            return (
+                energy,
+                force,
+                virial,
             )
-        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
-        feed_dict_test[self.t_mesh] = mesh
-
-        t_out = [self.t_global_tensor, self.t_force, self.t_virial]
-        if atomic:
-            t_out += [self.t_tensor, self.t_atom_virial]
-
-        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
-        gt = v_out[0]  # global tensor
-        force = v_out[1]
-        virial = v_out[2]
-        if atomic:
-            at = v_out[3]  # atom tensor
-            av = v_out[4]  # atom virial
 
-        nloc = natoms_vec[0]
-        nall = natoms_vec[1]
-
-        if ghost_map is not None:
-            # add the value of ghost atoms to real atoms
-            force = np.reshape(force, [nframes * nout, -1, 3])
-            # TODO: is there some way not to use for loop?
-            for ii in range(nframes * nout):
-                np.add.at(force[ii], ghost_map, force[ii, nloc:])
-            if atomic:
-                av = np.reshape(av, [nframes * nout, -1, 9])
-                for ii in range(nframes * nout):
-                    np.add.at(av[ii], ghost_map, av[ii, nloc:])
-
-        # please note here the shape are wrong!
-        force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap)
-        if atomic:
-            at = self.reverse_map(
-                np.reshape(at, [nframes, len(sel_at), nout]), sel_imap
+    @property
+    @abstractmethod
+    def output_tensor_name(self) -> str:
+        """The name of the tensor."""
+
+    @property
+    def output_def(self) -> ModelOutputDef:
+        """Get the output definition of this model."""
+        return ModelOutputDef(
+            FittingOutputDef(
+                [
+                    OutputVariableDef(
+                        self.output_tensor_name,
+                        shape=[-1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                        atomic=True,
+                    ),
+                ]
             )
-            av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap)
-
-        # make sure the shapes are correct here
-        gt = np.reshape(gt, [nframes, nout])
-        force = np.reshape(force, [nframes, nout, nall, 3])
-        if nloc < nall:
-            force = force[:, :, :nloc, :]
-        virial = np.reshape(virial, [nframes, nout, 9])
-        if atomic:
-            at = np.reshape(at, [nframes, len(sel_at), self.output_dim])
-            av = np.reshape(av, [nframes, nout, nall, 9])
-            if nloc < nall:
-                av = av[:, :, :nloc, :]
-            return gt, force, virial, at, av
-        else:
-            return gt, force, virial
+        )
diff --git a/deepmd/infer/deep_wfc.py b/deepmd/infer/deep_wfc.py
index ed682f642b..deed938e04 100644
--- a/deepmd/infer/deep_wfc.py
+++ b/deepmd/infer/deep_wfc.py
@@ -1,68 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-)
-
 from deepmd.infer.deep_tensor import (
     DeepTensor,
 )
 
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
 
 class DeepWFC(DeepTensor):
-    """Constructor.
+    """Deep WFC model.
 
     Parameters
     ----------
     model_file : Path
         The name of the frozen model file.
-    load_prefix: str
-        The prefix in the load computational graph
-    default_tf_graph : bool
-        If uses the default tf graph, otherwise build a new tf graph for evaluation
-    input_map : dict, optional
-        The input map for tf.import_graph_def. Only work with default tf graph
-
-    Warnings
-    --------
-    For developers: `DeepTensor` initializer must be called at the end after
-    `self.tensors` are modified because it uses the data in `self.tensors` dict.
-    Do not chanage the order!
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutoBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
     """
 
-    def __init__(
-        self,
-        model_file: "Path",
-        load_prefix: str = "load",
-        default_tf_graph: bool = False,
-        input_map: Optional[dict] = None,
-    ) -> None:
-        # use this in favor of dict update to move attribute from class to
-        # instance namespace
-        self.tensors = dict(
-            {
-                # output tensor
-                "t_tensor": "o_wfc:0",
-            },
-            **self.tensors,
-        )
-        DeepTensor.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph,
-            input_map=input_map,
-        )
-
-    def get_dim_fparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
-
-    def get_dim_aparam(self) -> int:
-        """Unsupported in this model."""
-        raise NotImplementedError("This model type does not support this attribute")
+    @property
+    def output_tensor_name(self) -> str:
+        return "wfc"
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index 8c329a0845..a37dfd34c5 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -10,16 +10,12 @@
 from deepmd.common import (
     expand_sys_str,
 )
-
-from ..utils.batch_size import (
-    AutoBatchSize,
+from deepmd.infer.deep_pot import (
+    DeepPot,
 )
-from ..utils.data import (
+from deepmd.utils.data import (
     DeepmdData,
 )
-from .deep_pot import (
-    DeepPot,
-)
 
 try:
     from typing import Literal  # python >=3.8
@@ -33,8 +29,7 @@ def calc_model_devi_f(
     real_f: Optional[np.ndarray] = None,
     relative: Optional[float] = None,
     atomic: Literal[False] = False,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    ...
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 @overload
@@ -44,8 +39,7 @@ def calc_model_devi_f(
     relative: Optional[float] = None,
     *,
     atomic: Literal[True],
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    ...
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ...
 
 
 def calc_model_devi_f(
@@ -297,19 +291,19 @@ def calc_model_devi(
 
     Examples
     --------
-    >>> from deepmd.infer import calc_model_devi
-    >>> from deepmd.infer import DeepPot as DP
+    >>> from deepmd.tf.infer import calc_model_devi
+    >>> from deepmd.tf.infer import DeepPot as DP
     >>> import numpy as np
-    >>> coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+    >>> coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
     >>> cell = np.diag(10 * np.ones(3)).reshape([1, -1])
-    >>> atype = [1,0,1]
+    >>> atype = [1, 0, 1]
     >>> graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
     >>> model_devi = calc_model_devi(coord, cell, atype, graphs)
     """
     energies = []
     forces = []
     virials = []
-    natom = atype.shape[-1]
+    natom = np.array(atype).shape[-1]
     for dp in models:
         ret = dp.eval(
             coord,
@@ -396,9 +390,8 @@ def make_model_devi(
     **kwargs
         Arbitrary keyword arguments.
     """
-    auto_batch_size = AutoBatchSize()
     # init models
-    dp_models = [DeepPot(model, auto_batch_size=auto_batch_size) for model in models]
+    dp_models = [DeepPot(model, auto_batch_size=True) for model in models]
 
     # check type maps
     tmaps = [dp.get_type_map() for dp in dp_models]
diff --git a/deepmd/loggers/__init__.py b/deepmd/loggers/__init__.py
index 71057e3056..39aa76139d 100644
--- a/deepmd/loggers/__init__.py
+++ b/deepmd/loggers/__init__.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias of deepmd_utils.loggers for backward compatibility."""
+"""Module taking care of logging duties."""
 
-from deepmd_utils.loggers.loggers import (
+from .loggers import (
     set_log_handles,
 )
 
diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py
index 74ca7de63e..33b9497507 100644
--- a/deepmd/loggers/loggers.py
+++ b/deepmd/loggers/loggers.py
@@ -1,7 +1,277 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias of deepmd_utils.loggers.loggers for backward compatibility."""
-from deepmd_utils.loggers.loggers import (
-    set_log_handles,
+"""Logger initialization for package."""
+
+import logging
+import os
+from typing import (
+    TYPE_CHECKING,
+    Optional,
 )
 
+if TYPE_CHECKING:
+    from pathlib import (
+        Path,
+    )
+
+    from mpi4py import (
+        MPI,
+    )
+
+    _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND
+
+logging.getLogger(__name__)
+
 __all__ = ["set_log_handles"]
+
+# logger formater
+FFORMATTER = logging.Formatter(
+    "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
+)
+CFORMATTER = logging.Formatter(
+    #    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
+    "[%(asctime)s] %(app_name)s %(levelname)-7s %(message)s"
+)
+FFORMATTER_MPI = logging.Formatter(
+    "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s"
+)
+CFORMATTER_MPI = logging.Formatter(
+    #    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
+    "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s"
+)
+
+
+class _AppFilter(logging.Filter):
+    """Add field `app_name` to log messages."""
+
+    def filter(self, record):
+        record.app_name = "DEEPMD"
+        return True
+
+
+class _MPIRankFilter(logging.Filter):
+    """Add MPI rank number to log messages, adds field `rank`."""
+
+    def __init__(self, rank: int) -> None:
+        super().__init__(name="MPI_rank_id")
+        self.mpi_rank = str(rank)
+
+    def filter(self, record):
+        record.rank = self.mpi_rank
+        return True
+
+
+class _MPIMasterFilter(logging.Filter):
+    """Filter that lets through only messages emited from rank==0."""
+
+    def __init__(self, rank: int) -> None:
+        super().__init__(name="MPI_master_log")
+        self.mpi_rank = rank
+
+    def filter(self, record):
+        if self.mpi_rank == 0:
+            return True
+        else:
+            return False
+
+
+class _MPIFileStream:
+    """Wrap MPI.File` so it has the same API as python file streams.
+
+    Parameters
+    ----------
+    filename : Path
+        disk location of the file stream
+    MPI : MPI
+        MPI communicator object
+    mode : str, optional
+        file write mode, by default _MPI_APPEND_MODE
+    """
+
+    def __init__(
+        self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE"
+    ) -> None:
+        self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode)
+        self.stream.Set_atomicity(True)
+        self.name = "MPIfilestream"
+
+    def write(self, msg: str):
+        """Write to MPI shared file stream.
+
+        Parameters
+        ----------
+        msg : str
+            message to write
+        """
+        b = bytearray()
+        b.extend(map(ord, msg))
+        self.stream.Write_shared(b)
+
+    def close(self):
+        """Synchronize and close MPI file stream."""
+        self.stream.Sync()
+        self.stream.Close()
+
+
+class _MPIHandler(logging.FileHandler):
+    """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to.
+
+    Parameters
+    ----------
+    filename : Path
+        file path
+    MPI : MPI
+        MPI communicator object
+    mode : str, optional
+        file access mode, by default "_MPI_APPEND_MODE"
+    """
+
+    def __init__(
+        self,
+        filename: "Path",
+        MPI: "MPI",
+        mode: str = "_MPI_APPEND_MODE",
+    ) -> None:
+        self.MPI = MPI
+        super().__init__(filename, mode=mode, encoding=None, delay=False)
+
+    def _open(self):
+        return _MPIFileStream(self.baseFilename, self.MPI, self.mode)
+
+    def setStream(self, stream):
+        """Stream canot be reasigned in MPI mode."""
+        raise NotImplementedError("Unable to do for MPI file handler!")
+
+
+def set_log_handles(
+    level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None
+):
+    """Set desired level for package loggers and add file handlers.
+
+    Parameters
+    ----------
+    level : int
+        logging level
+    log_path : Optional[str]
+        path to log file, if None logs will be send only to console. If the parent
+        directory does not exist it will be automatically created, by default None
+    mpi_log : Optional[str], optional
+        mpi log type. Has three options. `master` will output logs to file and console
+        only from rank==0. `collect` will write messages from all ranks to one file
+        opened under rank==0 and to console. `workers` will open one log file for each
+        worker designated by its rank, console behaviour is the same as for `collect`.
+        If this argument is specified, package 'mpi4py' must be already installed.
+        by default None
+
+    Raises
+    ------
+    RuntimeError
+        If the argument `mpi_log` is specified, package `mpi4py` is not installed.
+
+    References
+    ----------
+    https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U
+    https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error
+    https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu
+
+    Notes
+    -----
+    Logging levels:
+
+    +---------+--------------+----------------+----------------+----------------+
+    |         | our notation | python logging | tensorflow cpp | OpenMP         |
+    +=========+==============+================+================+================+
+    | debug   | 10           | 10             | 0              | 1/on/true/yes  |
+    +---------+--------------+----------------+----------------+----------------+
+    | info    | 20           | 20             | 1              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+    | warning | 30           | 30             | 2              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+    | error   | 40           | 40             | 3              | 0/off/false/no |
+    +---------+--------------+----------------+----------------+----------------+
+
+    """
+    # silence logging for OpenMP when running on CPU if level is any other than debug
+    if level <= 10:
+        os.environ["KMP_WARNINGS"] = "FALSE"
+
+    # set TF cpp internal logging level
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1))
+
+    # get root logger
+    root_log = logging.getLogger("deepmd")
+    root_log.propagate = False
+
+    root_log.setLevel(level)
+
+    # check if arguments are present
+    MPI = None
+    if mpi_log:
+        try:
+            from mpi4py import (
+                MPI,
+            )
+        except ImportError as e:
+            raise RuntimeError(
+                "You cannot specify 'mpi_log' when mpi4py not installed"
+            ) from e
+
+    # * add console handler ************************************************************
+    ch = logging.StreamHandler()
+    if MPI:
+        rank = MPI.COMM_WORLD.Get_rank()
+        if mpi_log == "master":
+            ch.setFormatter(CFORMATTER)
+            ch.addFilter(_MPIMasterFilter(rank))
+        else:
+            ch.setFormatter(CFORMATTER_MPI)
+            ch.addFilter(_MPIRankFilter(rank))
+    else:
+        ch.setFormatter(CFORMATTER)
+
+    ch.setLevel(level)
+    ch.addFilter(_AppFilter())
+    # clean old handlers before adding new one
+    root_log.handlers.clear()
+    root_log.addHandler(ch)
+
+    # * add file handler ***************************************************************
+    if log_path:
+        # create directory
+        log_path.parent.mkdir(exist_ok=True, parents=True)
+
+        fh = None
+
+        if mpi_log == "master":
+            rank = MPI.COMM_WORLD.Get_rank()
+            if rank == 0:
+                fh = logging.FileHandler(log_path, mode="w")
+                fh.addFilter(_MPIMasterFilter(rank))
+                fh.setFormatter(FFORMATTER)
+        elif mpi_log == "collect":
+            rank = MPI.COMM_WORLD.Get_rank()
+            fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE)
+            fh.addFilter(_MPIRankFilter(rank))
+            fh.setFormatter(FFORMATTER_MPI)
+        elif mpi_log == "workers":
+            rank = MPI.COMM_WORLD.Get_rank()
+            # if file has suffix than inser rank number before suffix
+            # e.g deepmd.log -> deepmd_<rank>.log
+            # if no suffix is present, insert rank as suffix
+            # e.g. deepmdlog -> deepmdlog.<rank>
+            if log_path.suffix:
+                worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix(
+                    log_path.suffix
+                )
+            else:
+                worker_log = log_path.with_suffix(f".{rank}")
+
+            fh = logging.FileHandler(worker_log, mode="w")
+            fh.setFormatter(FFORMATTER)
+        else:
+            fh = logging.FileHandler(log_path, mode="w")
+            fh.setFormatter(FFORMATTER)
+
+        if fh:
+            fh.setLevel(level)
+            fh.addFilter(_AppFilter())
+            root_log.addHandler(fh)
diff --git a/deepmd/loggers/training.py b/deepmd/loggers/training.py
new file mode 100644
index 0000000000..954473e309
--- /dev/null
+++ b/deepmd/loggers/training.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+
+def format_training_message(
+    batch: int,
+    wall_time: float,
+):
+    """Format a training message."""
+    return f"batch {batch:7d}: " f"total wall time = {wall_time:.2f} s"
+
+
+def format_training_message_per_task(
+    batch: int,
+    task_name: str,
+    rmse: Dict[str, float],
+    learning_rate: Optional[float],
+):
+    if task_name:
+        task_name += ": "
+    if learning_rate is None:
+        lr = ""
+    else:
+        lr = f", lr = {learning_rate:8.2e}"
+    # sort rmse
+    rmse = dict(sorted(rmse.items()))
+    return (
+        f"batch {batch:7d}: {task_name}"
+        f"{', '.join([f'{kk} = {vv:8.2e}' for kk, vv in rmse.items()])}"
+        f"{lr}"
+    )
diff --git a/deepmd_utils/main.py b/deepmd/main.py
similarity index 76%
rename from deepmd_utils/main.py
rename to deepmd/main.py
index 19afaeee1f..b503107c73 100644
--- a/deepmd_utils/main.py
+++ b/deepmd/main.py
@@ -4,16 +4,27 @@
 If only printing the help message, this module does not call
 the main DeePMD-kit module to avoid the slow import of TensorFlow.
 """
+
 import argparse
 import logging
+import os
 import textwrap
+from collections import (
+    defaultdict,
+)
 from typing import (
+    Dict,
     List,
     Optional,
+    Type,
+)
+
+from deepmd.backend.backend import (
+    Backend,
 )
 
 try:
-    from deepmd_utils._version import version as __version__
+    from deepmd._version import version as __version__
 except ImportError:
     __version__ = "unknown"
 
@@ -45,6 +56,19 @@ class RawTextArgumentDefaultsHelpFormatter(
     """This formatter is used to print multile-line help message with default value."""
 
 
+BACKENDS: Dict[str, Type[Backend]] = Backend.get_backends_by_feature(
+    Backend.Feature.ENTRY_POINT
+)
+BACKEND_TABLE: Dict[str, str] = {kk: vv.name.lower() for kk, vv in BACKENDS.items()}
+
+
+class BackendOption(argparse.Action):
+    """Map backend alias to unique name."""
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, BACKEND_TABLE[values])
+
+
 def main_parser() -> argparse.ArgumentParser:
     """DeePMD-Kit commandline options argument parser.
 
@@ -56,8 +80,49 @@ def main_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
         description="DeePMD-kit: A deep learning package for many-body potential energy"
         " representation and molecular dynamics",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        formatter_class=RawTextArgumentDefaultsHelpFormatter,
+        epilog=textwrap.dedent(
+            """\
+        Use --tf or --pt to choose the backend:
+            dp --tf train input.json
+            dp --pt train input.json
+        """
+        ),
+    )
+
+    # default backend is TF for compatibility
+    default_backend = os.environ.get("DP_BACKEND", "tensorflow").lower()
+    if default_backend not in BACKEND_TABLE.keys():
+        raise ValueError(
+            f"Unknown backend {default_backend}. "
+            "Please set DP_BACKEND to either tensorflow or pytorch."
+        )
+
+    parser_backend = parser.add_mutually_exclusive_group()
+    parser_backend.add_argument(
+        "-b",
+        "--backend",
+        choices=list(BACKEND_TABLE.keys()),
+        action=BackendOption,
+        default=default_backend,
+        help=(
+            "The backend of the model. Default can be set by environment variable "
+            "DP_BACKEND."
+        ),
     )
+
+    BACKEND_ALIAS: Dict[str, List[str]] = defaultdict(list)
+    for alias, backend in BACKEND_TABLE.items():
+        BACKEND_ALIAS[backend].append(alias)
+    for backend, alias in BACKEND_ALIAS.items():
+        parser_backend.add_argument(
+            *[f"--{aa}" for aa in alias],
+            action="store_const",
+            dest="backend",
+            const=backend,
+            help=f"Alias for --backend {backend}",
+        )
+
     subparsers = parser.add_subparsers(title="Valid subcommands", dest="command")
 
     # * logging options parser *********************************************************
@@ -98,7 +163,9 @@ def main_parser() -> argparse.ArgumentParser:
 
     # * transfer script ****************************************************************
     parser_transfer = subparsers.add_parser(
-        "transfer", parents=[parser_log], help="pass parameters to another model"
+        "transfer",
+        parents=[parser_log],
+        help="(Supported backend: TensorFlow) pass parameters to another model",
     )
     parser_transfer.add_argument(
         "-r",
@@ -181,6 +248,18 @@ def main_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Skip calculating neighbor statistics. Sel checking, automatic sel, and model compression will be disabled.",
     )
+    parser_train.add_argument(
+        # -m has been used by mpi-log
+        "--model-branch",
+        type=str,
+        default="",
+        help="(Supported backend: PyTorch) Model branch chosen for fine-tuning if multi-task. If not specified, it will re-init the fitting net.",
+    )
+    parser_train.add_argument(
+        "--force-load",
+        action="store_true",
+        help="(Supported backend: PyTorch) Force load from ckpt, other missing tensors will init from scratch",
+    )
 
     # * freeze script ******************************************************************
     parser_frz = subparsers.add_parser(
@@ -199,36 +278,43 @@ def main_parser() -> argparse.ArgumentParser:
     parser_frz.add_argument(
         "-c",
         "--checkpoint-folder",
+        "--checkpoint",
         type=str,
         default=".",
-        help="path to checkpoint folder",
+        help="Path to checkpoint, either a folder containing checkpoint or the checkpoint prefix",
     )
     parser_frz.add_argument(
         "-o",
         "--output",
         type=str,
-        default="frozen_model.pb",
-        help="name of graph, will output to the checkpoint folder",
+        default="frozen_model",
+        help="Filename (prefix) of the output model file. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth",
     )
     parser_frz.add_argument(
         "-n",
         "--node-names",
         type=str,
         default=None,
-        help="the frozen nodes, if not set, determined from the model type",
+        help="(Supported backend: TensorFlow) the frozen nodes, if not set, determined from the model type",
     )
     parser_frz.add_argument(
         "-w",
         "--nvnmd-weight",
         type=str,
         default=None,
-        help="the name of weight file (.npy), if set, save the model's weight into the file",
+        help="(Supported backend: TensorFlow) the name of weight file (.npy), if set, save the model's weight into the file",
     )
     parser_frz.add_argument(
         "--united-model",
         action="store_true",
         default=False,
-        help="When in multi-task mode, freeze all nodes into one united model",
+        help="(Supported backend: TensorFlow) When in multi-task mode, freeze all nodes into one united model",
+    )
+    parser_frz.add_argument(
+        "--head",
+        default=None,
+        type=str,
+        help="(Supported backend: PyTorch) Task head to freeze if in multi-task mode.",
     )
 
     # * test script ********************************************************************
@@ -247,9 +333,9 @@ def main_parser() -> argparse.ArgumentParser:
     parser_tst.add_argument(
         "-m",
         "--model",
-        default="frozen_model.pb",
+        default="frozen_model",
         type=str,
-        help="Frozen model file to import",
+        help="Frozen model file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.",
     )
     parser_tst_subgroup = parser_tst.add_mutually_exclusive_group()
     parser_tst_subgroup.add_argument(
@@ -267,7 +353,11 @@ def main_parser() -> argparse.ArgumentParser:
         help="The path to file of test list.",
     )
     parser_tst.add_argument(
-        "-S", "--set-prefix", default="set", type=str, help="The set prefix"
+        "-S",
+        "--set-prefix",
+        default="set",
+        type=str,
+        help="(Supported backend: TensorFlow) The set prefix",
     )
     parser_tst.add_argument(
         "-n",
@@ -277,7 +367,11 @@ def main_parser() -> argparse.ArgumentParser:
         help="The number of data for test. 0 means all data.",
     )
     parser_tst.add_argument(
-        "-r", "--rand-seed", type=int, default=None, help="The random seed"
+        "-r",
+        "--rand-seed",
+        type=int,
+        default=None,
+        help="(Supported backend: TensorFlow) The random seed",
     )
     parser_tst.add_argument(
         "--shuffle-test", action="store_true", default=False, help="Shuffle test data"
@@ -294,7 +388,19 @@ def main_parser() -> argparse.ArgumentParser:
         "--atomic",
         action="store_true",
         default=False,
-        help="Test the accuracy of atomic label, i.e. energy / tensor (dipole, polar)",
+        help="(Supported backend: TensorFlow) Test the accuracy of atomic label, i.e. energy / tensor (dipole, polar)",
+    )
+    parser_tst.add_argument(
+        "-i",
+        "--input_script",
+        type=str,
+        help="(Supported backend: PyTorch) The input script of the model",
+    )
+    parser_tst.add_argument(
+        "--head",
+        default=None,
+        type=str,
+        help="(Supported backend: PyTorch) Task head to test if in multi-task mode.",
     )
 
     # * compress model *****************************************************************
@@ -308,7 +414,7 @@ def main_parser() -> argparse.ArgumentParser:
     parser_compress = subparsers.add_parser(
         "compress",
         parents=[parser_log, parser_mpi_log],
-        help="compress a model",
+        help="(Supported backend: TensorFlow) compress a model",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
         epilog=textwrap.dedent(
             """\
@@ -409,10 +515,10 @@ def main_parser() -> argparse.ArgumentParser:
     parser_model_devi.add_argument(
         "-m",
         "--models",
-        default=["graph.000.pb", "graph.001.pb", "graph.002.pb", "graph.003.pb"],
+        default=["graph.000", "graph.001", "graph.002", "graph.003"],
         nargs="+",
         type=str,
-        help="Frozen models file to import",
+        help="Frozen models file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.",
     )
     parser_model_devi.add_argument(
         "-s",
@@ -465,7 +571,7 @@ def main_parser() -> argparse.ArgumentParser:
     parser_transform = subparsers.add_parser(
         "convert-from",
         parents=[parser_log],
-        help="convert lower model version to supported version",
+        help="(Supported backend: TensorFlow) convert lower model version to supported version",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
         epilog=textwrap.dedent(
             """\
@@ -535,6 +641,7 @@ def main_parser() -> argparse.ArgumentParser:
         help="type map",
     )
     parser_neighbor_stat.add_argument(
+        "--mixed-type",
         "--one-type",
         action="store_true",
         default=False,
@@ -550,7 +657,7 @@ def main_parser() -> argparse.ArgumentParser:
     parser_train_nvnmd = subparsers.add_parser(
         "train-nvnmd",
         parents=[parser_log],
-        help="train nvnmd model",
+        help="(Supported backend: TensorFlow) train nvnmd model",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         epilog=textwrap.dedent(
             """\
@@ -615,6 +722,23 @@ def main_parser() -> argparse.ArgumentParser:
             "to the network on both IPv4 and IPv6 (where available)."
         ),
     )
+
+    # convert_backend
+    parser_convert_backend = subparsers.add_parser(
+        "convert-backend",
+        parents=[parser_log],
+        help="Convert model to another backend.",
+        formatter_class=RawTextArgumentDefaultsHelpFormatter,
+        epilog=textwrap.dedent(
+            """\
+        examples:
+            dp convert-backend model.pb model.pth
+            dp convert-backend model.pb model.dp
+        """
+        ),
+    )
+    parser_convert_backend.add_argument("INPUT", help="The input model file.")
+    parser_convert_backend.add_argument("OUTPUT", help="The output model file.")
     return parser
 
 
@@ -651,6 +775,33 @@ def main():
         if no command was input
     """
     args = parse_args()
-    from deepmd.entrypoints.main import main as deepmd_main
+
+    if args.backend not in BACKEND_TABLE:
+        raise ValueError(f"Unknown backend {args.backend}")
+
+    if args.command in (
+        "test",
+        "doc-train-input",
+        "model-devi",
+        "neighbor-stat",
+        "gui",
+        "convert-backend",
+    ):
+        # common entrypoints
+        from deepmd.entrypoints.main import main as deepmd_main
+    elif args.command in (
+        "train",
+        "freeze",
+        "transfer",
+        "compress",
+        "convert-from",
+        "train-nvnmd",
+    ):
+        deepmd_main = BACKENDS[args.backend]().entry_point_hook
+    elif args.command is None:
+        # help message has been printed in parse_args
+        return
+    else:
+        raise RuntimeError(f"unknown command {args.command}")
 
     deepmd_main(args)
diff --git a/deepmd/pt/__init__.py b/deepmd/pt/__init__.py
new file mode 100644
index 0000000000..ab61736198
--- /dev/null
+++ b/deepmd/pt/__init__.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+# import customized OPs globally
+from deepmd.pt.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+
+__all__ = [
+    "ENABLE_CUSTOMIZED_OP",
+]
diff --git a/deepmd/pt/cxx_op.py b/deepmd/pt/cxx_op.py
new file mode 100644
index 0000000000..7887b5722c
--- /dev/null
+++ b/deepmd/pt/cxx_op.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import platform
+
+import torch
+
+from deepmd.env import (
+    SHARED_LIB_DIR,
+)
+
+
+def load_library(module_name: str) -> bool:
+    """Load OP library.
+
+    Parameters
+    ----------
+    module_name : str
+        Name of the module
+
+    Returns
+    -------
+    bool
+        Whether the library is loaded successfully
+    """
+    if platform.system() == "Windows":
+        ext = ".dll"
+        prefix = ""
+    else:
+        ext = ".so"
+        prefix = "lib"
+
+    module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve()
+
+    if module_file.is_file():
+        torch.ops.load_library(module_file)
+        return True
+    return False
+
+
+ENABLE_CUSTOMIZED_OP = load_library("deepmd_op_pt")
+
+__all__ = [
+    "ENABLE_CUSTOMIZED_OP",
+]
diff --git a/deepmd_utils/entrypoints/__init__.py b/deepmd/pt/entrypoints/__init__.py
similarity index 100%
rename from deepmd_utils/entrypoints/__init__.py
rename to deepmd/pt/entrypoints/__init__.py
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
new file mode 100644
index 0000000000..adaec0968a
--- /dev/null
+++ b/deepmd/pt/entrypoints/main.py
@@ -0,0 +1,324 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import argparse
+import json
+import logging
+import os
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+import h5py
+import torch
+import torch.distributed as dist
+import torch.version
+from torch.distributed.elastic.multiprocessing.errors import (
+    record,
+)
+
+from deepmd import (
+    __version__,
+)
+from deepmd.loggers.loggers import (
+    set_log_handles,
+)
+from deepmd.main import (
+    parse_args,
+)
+from deepmd.pt.cxx_op import (
+    ENABLE_CUSTOMIZED_OP,
+)
+from deepmd.pt.infer import (
+    inference,
+)
+from deepmd.pt.model.model import (
+    BaseModel,
+)
+from deepmd.pt.train import (
+    training,
+)
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+)
+from deepmd.pt.utils.finetune import (
+    change_finetune_model_params,
+)
+from deepmd.pt.utils.multi_task import (
+    preprocess_shared_params,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    process_systems,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.summary import SummaryPrinter as BaseSummaryPrinter
+
+log = logging.getLogger(__name__)
+
+
+def get_trainer(
+    config,
+    init_model=None,
+    restart_model=None,
+    finetune_model=None,
+    model_branch="",
+    force_load=False,
+    init_frz_model=None,
+    shared_links=None,
+):
+    multi_task = "model_dict" in config.get("model", {})
+
+    # Initialize DDP
+    local_rank = os.environ.get("LOCAL_RANK")
+    if local_rank is not None:
+        local_rank = int(local_rank)
+        assert dist.is_nccl_available()
+        dist.init_process_group(backend="nccl")
+
+    ckpt = init_model if init_model is not None else restart_model
+    finetune_links = None
+    if finetune_model is not None:
+        config["model"], finetune_links = change_finetune_model_params(
+            finetune_model,
+            config["model"],
+            model_branch=model_branch,
+        )
+    config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None)
+
+    def prepare_trainer_input_single(
+        model_params_single, data_dict_single, loss_dict_single, suffix="", rank=0
+    ):
+        training_dataset_params = data_dict_single["training_data"]
+        type_split = False
+        if model_params_single["descriptor"]["type"] in ["se_e2_a"]:
+            type_split = True
+        validation_dataset_params = data_dict_single.get("validation_data", None)
+        validation_systems = (
+            validation_dataset_params["systems"] if validation_dataset_params else None
+        )
+        training_systems = training_dataset_params["systems"]
+        training_systems = process_systems(training_systems)
+        if validation_systems is not None:
+            validation_systems = process_systems(validation_systems)
+
+        # stat files
+        stat_file_path_single = data_dict_single.get("stat_file", None)
+        if rank != 0:
+            stat_file_path_single = None
+        elif stat_file_path_single is not None:
+            if Path(stat_file_path_single).is_dir():
+                raise ValueError(
+                    f"stat_file should be a file, not a directory: {stat_file_path_single}"
+                )
+            if not Path(stat_file_path_single).is_file():
+                with h5py.File(stat_file_path_single, "w") as f:
+                    pass
+            stat_file_path_single = DPPath(stat_file_path_single, "a")
+
+        # validation and training data
+        validation_data_single = (
+            DpLoaderSet(
+                validation_systems,
+                validation_dataset_params["batch_size"],
+                model_params_single["type_map"],
+            )
+            if validation_systems
+            else None
+        )
+        if ckpt or finetune_model:
+            train_data_single = DpLoaderSet(
+                training_systems,
+                training_dataset_params["batch_size"],
+                model_params_single["type_map"],
+            )
+        else:
+            train_data_single = DpLoaderSet(
+                training_systems,
+                training_dataset_params["batch_size"],
+                model_params_single["type_map"],
+            )
+        return (
+            train_data_single,
+            validation_data_single,
+            stat_file_path_single,
+        )
+
+    rank = dist.get_rank() if dist.is_initialized() else 0
+    if not multi_task:
+        (
+            train_data,
+            validation_data,
+            stat_file_path,
+        ) = prepare_trainer_input_single(
+            config["model"],
+            config["training"],
+            config["loss"],
+            rank=rank,
+        )
+    else:
+        train_data, validation_data, stat_file_path = {}, {}, {}
+        for model_key in config["model"]["model_dict"]:
+            (
+                train_data[model_key],
+                validation_data[model_key],
+                stat_file_path[model_key],
+            ) = prepare_trainer_input_single(
+                config["model"]["model_dict"][model_key],
+                config["training"]["data_dict"][model_key],
+                config["loss_dict"][model_key],
+                suffix=f"_{model_key}",
+                rank=rank,
+            )
+
+    trainer = training.Trainer(
+        config,
+        train_data,
+        stat_file_path=stat_file_path,
+        validation_data=validation_data,
+        init_model=init_model,
+        restart_model=restart_model,
+        finetune_model=finetune_model,
+        force_load=force_load,
+        shared_links=shared_links,
+        finetune_links=finetune_links,
+        init_frz_model=init_frz_model,
+    )
+    return trainer
+
+
+class SummaryPrinter(BaseSummaryPrinter):
+    """Summary printer for PyTorch."""
+
+    def is_built_with_cuda(self) -> bool:
+        """Check if the backend is built with CUDA."""
+        return torch.version.cuda is not None
+
+    def is_built_with_rocm(self) -> bool:
+        """Check if the backend is built with ROCm."""
+        return torch.version.hip is not None
+
+    def get_compute_device(self) -> str:
+        """Get Compute device."""
+        return str(DEVICE)
+
+    def get_ngpus(self) -> int:
+        """Get the number of GPUs."""
+        return torch.cuda.device_count()
+
+    def get_backend_info(self) -> dict:
+        """Get backend information."""
+        return {
+            "Backend": "PyTorch",
+            "PT ver": f"v{torch.__version__}-g{torch.version.git_version[:11]}",
+            "Enable custom OP": ENABLE_CUSTOMIZED_OP,
+        }
+
+
+def train(FLAGS):
+    log.info("Configuration path: %s", FLAGS.INPUT)
+    SummaryPrinter()()
+    with open(FLAGS.INPUT) as fin:
+        config = json.load(fin)
+
+    # update multitask config
+    multi_task = "model_dict" in config["model"]
+    shared_links = None
+    if multi_task:
+        config["model"], shared_links = preprocess_shared_params(config["model"])
+
+    # argcheck
+    if not multi_task:
+        config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json")
+        config = normalize(config)
+
+    # do neighbor stat
+    if not FLAGS.skip_neighbor_stat:
+        log.info(
+            "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
+        )
+        if not multi_task:
+            config["model"] = BaseModel.update_sel(config, config["model"])
+        else:
+            training_jdata = deepcopy(config["training"])
+            training_jdata.pop("data_dict", {})
+            training_jdata.pop("model_prob", {})
+            for model_item in config["model"]["model_dict"]:
+                fake_global_jdata = {
+                    "model": deepcopy(config["model"]["model_dict"][model_item]),
+                    "training": deepcopy(config["training"]["data_dict"][model_item]),
+                }
+                fake_global_jdata["training"].update(training_jdata)
+                config["model"]["model_dict"][model_item] = BaseModel.update_sel(
+                    fake_global_jdata, config["model"]["model_dict"][model_item]
+                )
+
+    with open(FLAGS.output, "w") as fp:
+        json.dump(config, fp, indent=4)
+
+    trainer = get_trainer(
+        config,
+        FLAGS.init_model,
+        FLAGS.restart,
+        FLAGS.finetune,
+        FLAGS.model_branch,
+        FLAGS.force_load,
+        FLAGS.init_frz_model,
+        shared_links=shared_links,
+    )
+    trainer.run()
+
+
+def freeze(FLAGS):
+    model = torch.jit.script(inference.Tester(FLAGS.model, head=FLAGS.head).model)
+    torch.jit.save(
+        model,
+        FLAGS.output,
+        {},
+    )
+
+
+@record
+def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
+    if not isinstance(args, argparse.Namespace):
+        FLAGS = parse_args(args=args)
+    else:
+        FLAGS = args
+
+    set_log_handles(FLAGS.log_level, FLAGS.log_path, mpi_log=None)
+    log.debug("Log handles were successfully set")
+    log.info("DeepMD version: %s", __version__)
+
+    if FLAGS.command == "train":
+        train(FLAGS)
+    elif FLAGS.command == "freeze":
+        if Path(FLAGS.checkpoint_folder).is_dir():
+            checkpoint_path = Path(FLAGS.checkpoint_folder)
+            latest_ckpt_file = (checkpoint_path / "checkpoint").read_text()
+            FLAGS.model = str(checkpoint_path.joinpath(latest_ckpt_file))
+        else:
+            FLAGS.model = FLAGS.checkpoint_folder
+        FLAGS.output = str(Path(FLAGS.output).with_suffix(".pth"))
+        freeze(FLAGS)
+    else:
+        raise RuntimeError(f"Invalid command {FLAGS.command}!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/deepmd/pt/infer/__init__.py b/deepmd/pt/infer/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pt/infer/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
new file mode 100644
index 0000000000..8a3a61400d
--- /dev/null
+++ b/deepmd/pt/infer/deep_eval.py
@@ -0,0 +1,754 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.output_def import (
+    ModelOutputDef,
+    OutputVariableCategory,
+    OutputVariableDef,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
+from deepmd.infer.deep_eval import (
+    DeepEvalBackend,
+)
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+    GLOBAL_PT_FLOAT_PRECISION,
+)
+from deepmd.pt.utils.utils import (
+    to_torch_tensor,
+)
+
+if TYPE_CHECKING:
+    import ase.neighborlist
+
+
+class DeepEval(DeepEvalBackend):
+    """PyTorch backend implementaion of DeepEval.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    output_def : ModelOutputDef
+        The output definition of the model.
+    *args : list
+        Positional arguments.
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __init__(
+        self,
+        model_file: str,
+        output_def: ModelOutputDef,
+        *args: List[Any],
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
+        neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        head: Optional[str] = None,
+        **kwargs: Dict[str, Any],
+    ):
+        self.output_def = output_def
+        self.model_path = model_file
+        if str(self.model_path).endswith(".pt"):
+            state_dict = torch.load(model_file, map_location=env.DEVICE)
+            if "model" in state_dict:
+                state_dict = state_dict["model"]
+            self.input_param = state_dict["_extra_state"]["model_params"]
+            self.multi_task = "model_dict" in self.input_param
+            if self.multi_task:
+                model_keys = list(self.input_param["model_dict"].keys())
+                assert (
+                    head is not None
+                ), f"Head must be set for multitask model! Available heads are: {model_keys}"
+                assert (
+                    head in model_keys
+                ), f"No head named {head} in model! Available heads are: {model_keys}"
+                self.input_param = self.input_param["model_dict"][head]
+                state_dict_head = {"_extra_state": state_dict["_extra_state"]}
+                for item in state_dict:
+                    if f"model.{head}." in item:
+                        state_dict_head[
+                            item.replace(f"model.{head}.", "model.Default.")
+                        ] = state_dict[item].clone()
+                state_dict = state_dict_head
+            self.input_param["resuming"] = True
+            model = get_model(self.input_param).to(DEVICE)
+            model = torch.jit.script(model)
+            self.dp = ModelWrapper(model)
+            self.dp.load_state_dict(state_dict)
+        elif str(self.model_path).endswith(".pth"):
+            model = torch.jit.load(model_file, map_location=env.DEVICE)
+            self.dp = ModelWrapper(model)
+        else:
+            raise ValueError("Unknown model file format!")
+        self.rcut = self.dp.model["Default"].get_rcut()
+        self.type_map = self.dp.model["Default"].get_type_map()
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+        self._has_spin = getattr(self.dp.model["Default"], "has_spin", False)
+        if callable(self._has_spin):
+            self._has_spin = self._has_spin()
+
+    def get_rcut(self) -> float:
+        """Get the cutoff radius of this model."""
+        return self.rcut
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return len(self.type_map)
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.type_map
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dp.model["Default"].get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.dp.model["Default"].get_dim_aparam()
+
+    @property
+    def model_type(self) -> "DeepEvalWrapper":
+        """The the evaluator of the model type."""
+        model_output_type = self.dp.model["Default"].model_output_type()
+        if "energy" in model_output_type:
+            return DeepPot
+        elif "dos" in model_output_type:
+            return DeepDOS
+        elif "dipole" in model_output_type:
+            return DeepDipole
+        elif "polar" in model_output_type:
+            return DeepPolar
+        elif "global_polar" in model_output_type:
+            return DeepGlobalPolar
+        elif "wfc" in model_output_type:
+            return DeepWFC
+        else:
+            raise RuntimeError("Unknown model type")
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.dp.model["Default"].get_sel_type()
+
+    def get_numb_dos(self) -> int:
+        """Get the number of DOS."""
+        return self.dp.model["Default"].get_numb_dos()
+
+    def get_has_efield(self):
+        """Check if the model has efield."""
+        return False
+
+    def get_ntypes_spin(self):
+        """Get the number of spin atom types of this model. Only used in old implement."""
+        return 0
+
+    def get_has_spin(self):
+        """Check if the model has spin atom types."""
+        return self._has_spin
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Dict[str, Any],
+    ) -> Dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        **kwargs
+            Other parameters
+
+        Returns
+        -------
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
+        """
+        # convert all of the input to numpy array
+        atom_types = np.array(atom_types, dtype=np.int32)
+        coords = np.array(coords)
+        if cells is not None:
+            cells = np.array(cells)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, len(atom_types.shape) > 1
+        )
+        request_defs = self._get_request_defs(atomic)
+        if "spin" not in kwargs or kwargs["spin"] is None:
+            out = self._eval_func(self._eval_model, numb_test, natoms)(
+                coords, cells, atom_types, fparam, aparam, request_defs
+            )
+        else:
+            out = self._eval_func(self._eval_model_spin, numb_test, natoms)(
+                coords,
+                cells,
+                atom_types,
+                np.array(kwargs["spin"]),
+                fparam,
+                aparam,
+                request_defs,
+            )
+        return dict(
+            zip(
+                [x.name for x in request_defs],
+                out,
+            )
+        )
+
+    def _get_request_defs(self, atomic: bool) -> List[OutputVariableDef]:
+        """Get the requested output definitions.
+
+        When atomic is True, all output_def are requested.
+        When atomic is False, only energy (tensor), force, and virial
+        are requested.
+
+        Parameters
+        ----------
+        atomic : bool
+            Whether to request the atomic output.
+
+        Returns
+        -------
+        list[OutputVariableDef]
+            The requested output definitions.
+        """
+        if atomic:
+            return list(self.output_def.var_defs.values())
+        else:
+            return [
+                x
+                for x in self.output_def.var_defs.values()
+                if x.category
+                in (
+                    OutputVariableCategory.OUT,
+                    OutputVariableCategory.REDU,
+                    OutputVariableCategory.DERV_R,
+                    OutputVariableCategory.DERV_C_REDU,
+                )
+            ]
+
+    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
+        """Wrapper method with auto batch size.
+
+        Parameters
+        ----------
+        inner_func : Callable
+            the method to be wrapped
+        numb_test : int
+            number of tests
+        natoms : int
+            number of atoms
+
+        Returns
+        -------
+        Callable
+            the wrapper
+        """
+        if self.auto_batch_size is not None:
+
+            def eval_func(*args, **kwargs):
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
+        else:
+            eval_func = inner_func
+        return eval_func
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: np.ndarray,
+        mixed_type: bool = False,
+    ) -> Tuple[int, int]:
+        if mixed_type:
+            natoms = len(atom_types[0])
+        else:
+            natoms = len(atom_types)
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def _eval_model(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray],
+        aparam: Optional[np.ndarray],
+        request_defs: List[OutputVariableDef],
+    ):
+        model = self.dp.to(DEVICE)
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = torch.tensor(
+            coords.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=DEVICE,
+        )
+        type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
+        if cells is not None:
+            box_input = torch.tensor(
+                cells.reshape([-1, 3, 3]),
+                dtype=GLOBAL_PT_FLOAT_PRECISION,
+                device=DEVICE,
+            )
+        else:
+            box_input = None
+        if fparam is not None:
+            fparam_input = to_torch_tensor(fparam.reshape(-1, self.get_dim_fparam()))
+        else:
+            fparam_input = None
+        if aparam is not None:
+            aparam_input = to_torch_tensor(
+                aparam.reshape(-1, natoms, self.get_dim_aparam())
+            )
+        else:
+            aparam_input = None
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C for x in request_defs
+        )
+        batch_output = model(
+            coord_input,
+            type_input,
+            box=box_input,
+            do_atomic_virial=do_atomic_virial,
+            fparam=fparam_input,
+            aparam=aparam_input,
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            pt_name = self._OUTDEF_DP2BACKEND[odef.name]
+            if pt_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                out = batch_output[pt_name].reshape(shape).detach().cpu().numpy()
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(np.full(np.abs(shape), np.nan))  # this is kinda hacky
+        return tuple(results)
+
+    def _eval_model_spin(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        spins: np.ndarray,
+        fparam: Optional[np.ndarray],
+        aparam: Optional[np.ndarray],
+        request_defs: List[OutputVariableDef],
+    ):
+        model = self.dp.to(DEVICE)
+
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+        else:
+            natoms = len(atom_types[0])
+
+        coord_input = torch.tensor(
+            coords.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=DEVICE,
+        )
+        type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
+        spin_input = torch.tensor(
+            spins.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=DEVICE,
+        )
+        if cells is not None:
+            box_input = torch.tensor(
+                cells.reshape([-1, 3, 3]),
+                dtype=GLOBAL_PT_FLOAT_PRECISION,
+                device=DEVICE,
+            )
+        else:
+            box_input = None
+        if fparam is not None:
+            fparam_input = to_torch_tensor(fparam.reshape(-1, self.get_dim_fparam()))
+        else:
+            fparam_input = None
+        if aparam is not None:
+            aparam_input = to_torch_tensor(
+                aparam.reshape(-1, natoms, self.get_dim_aparam())
+            )
+        else:
+            aparam_input = None
+
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
+        )
+        batch_output = model(
+            coord_input,
+            type_input,
+            spin=spin_input,
+            box=box_input,
+            do_atomic_virial=do_atomic_virial,
+            fparam=fparam_input,
+            aparam=aparam_input,
+        )
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            pt_name = self._OUTDEF_DP2BACKEND[odef.name]
+            if pt_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                out = batch_output[pt_name].reshape(shape).detach().cpu().numpy()
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(np.full(np.abs(shape), np.nan))  # this is kinda hacky
+        return tuple(results)
+
+    def _get_output_shape(self, odef, nframes, natoms):
+        if odef.category == OutputVariableCategory.DERV_C_REDU:
+            # virial
+            return [nframes, *odef.shape[:-1], 9]
+        elif odef.category == OutputVariableCategory.REDU:
+            # energy
+            return [nframes, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_C:
+            # atom_virial
+            return [nframes, *odef.shape[:-1], natoms, 9]
+        elif odef.category == OutputVariableCategory.DERV_R:
+            # force
+            return [nframes, *odef.shape[:-1], natoms, 3]
+        elif odef.category == OutputVariableCategory.OUT:
+            # atom_energy, atom_tensor
+            # Something wrong here?
+            # return [nframes, *shape, natoms, 1]
+            return [nframes, natoms, *odef.shape, 1]
+        else:
+            raise RuntimeError("unknown category")
+
+
+# For tests only
+def eval_model(
+    model,
+    coords: Union[np.ndarray, torch.Tensor],
+    cells: Optional[Union[np.ndarray, torch.Tensor]],
+    atom_types: Union[np.ndarray, torch.Tensor, List[int]],
+    spins: Optional[Union[np.ndarray, torch.Tensor]] = None,
+    atomic: bool = False,
+    infer_batch_size: int = 2,
+    denoise: bool = False,
+):
+    model = model.to(DEVICE)
+    energy_out = []
+    atomic_energy_out = []
+    force_out = []
+    force_mag_out = []
+    virial_out = []
+    atomic_virial_out = []
+    updated_coord_out = []
+    logits_out = []
+    err_msg = (
+        f"All inputs should be the same format, "
+        f"but found {type(coords)}, {type(cells)}, {type(atom_types)} instead! "
+    )
+    return_tensor = True
+    if isinstance(coords, torch.Tensor):
+        if cells is not None:
+            assert isinstance(cells, torch.Tensor), err_msg
+        if spins is not None:
+            assert isinstance(spins, torch.Tensor), err_msg
+        assert isinstance(atom_types, torch.Tensor) or isinstance(atom_types, list)
+        atom_types = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
+    elif isinstance(coords, np.ndarray):
+        if cells is not None:
+            assert isinstance(cells, np.ndarray), err_msg
+        if spins is not None:
+            assert isinstance(spins, np.ndarray), err_msg
+        assert isinstance(atom_types, np.ndarray) or isinstance(atom_types, list)
+        atom_types = np.array(atom_types, dtype=np.int32)
+        return_tensor = False
+
+    nframes = coords.shape[0]
+    if len(atom_types.shape) == 1:
+        natoms = len(atom_types)
+        if isinstance(atom_types, torch.Tensor):
+            atom_types = torch.tile(atom_types.unsqueeze(0), [nframes, 1]).reshape(
+                nframes, -1
+            )
+        else:
+            atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
+    else:
+        natoms = len(atom_types[0])
+
+    coord_input = torch.tensor(
+        coords.reshape([-1, natoms, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+    )
+    spin_input = None
+    if spins is not None:
+        spin_input = torch.tensor(
+            spins.reshape([-1, natoms, 3]),
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=DEVICE,
+        )
+    has_spin = getattr(model, "has_spin", False)
+    if callable(has_spin):
+        has_spin = has_spin()
+    type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
+    box_input = None
+    if cells is None:
+        pbc = False
+    else:
+        pbc = True
+        box_input = torch.tensor(
+            cells.reshape([-1, 3, 3]), dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+        )
+    num_iter = int((nframes + infer_batch_size - 1) / infer_batch_size)
+
+    for ii in range(num_iter):
+        batch_coord = coord_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_atype = type_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        batch_box = None
+        batch_spin = None
+        if spin_input is not None:
+            batch_spin = spin_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        if pbc:
+            batch_box = box_input[ii * infer_batch_size : (ii + 1) * infer_batch_size]
+        input_dict = {
+            "coord": batch_coord,
+            "atype": batch_atype,
+            "box": batch_box,
+            "do_atomic_virial": atomic,
+        }
+        if has_spin:
+            input_dict["spin"] = batch_spin
+        batch_output = model(**input_dict)
+        if isinstance(batch_output, tuple):
+            batch_output = batch_output[0]
+        if not return_tensor:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"].detach().cpu().numpy())
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(
+                    batch_output["atom_energy"].detach().cpu().numpy()
+                )
+            if "force" in batch_output:
+                force_out.append(batch_output["force"].detach().cpu().numpy())
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"].detach().cpu().numpy())
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"].detach().cpu().numpy())
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(
+                    batch_output["atom_virial"].detach().cpu().numpy()
+                )
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(
+                    batch_output["updated_coord"].detach().cpu().numpy()
+                )
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"].detach().cpu().numpy())
+        else:
+            if "energy" in batch_output:
+                energy_out.append(batch_output["energy"])
+            if "atom_energy" in batch_output:
+                atomic_energy_out.append(batch_output["atom_energy"])
+            if "force" in batch_output:
+                force_out.append(batch_output["force"])
+            if "force_mag" in batch_output:
+                force_mag_out.append(batch_output["force_mag"])
+            if "virial" in batch_output:
+                virial_out.append(batch_output["virial"])
+            if "atom_virial" in batch_output:
+                atomic_virial_out.append(batch_output["atom_virial"])
+            if "updated_coord" in batch_output:
+                updated_coord_out.append(batch_output["updated_coord"])
+            if "logits" in batch_output:
+                logits_out.append(batch_output["logits"])
+    if not return_tensor:
+        energy_out = (
+            np.concatenate(energy_out) if energy_out else np.zeros([nframes, 1])
+        )
+        atomic_energy_out = (
+            np.concatenate(atomic_energy_out)
+            if atomic_energy_out
+            else np.zeros([nframes, natoms, 1])
+        )
+        force_out = (
+            np.concatenate(force_out) if force_out else np.zeros([nframes, natoms, 3])
+        )
+        force_mag_out = (
+            np.concatenate(force_mag_out)
+            if force_mag_out
+            else np.zeros([nframes, natoms, 3])
+        )
+        virial_out = (
+            np.concatenate(virial_out) if virial_out else np.zeros([nframes, 3, 3])
+        )
+        atomic_virial_out = (
+            np.concatenate(atomic_virial_out)
+            if atomic_virial_out
+            else np.zeros([nframes, natoms, 3, 3])
+        )
+        updated_coord_out = (
+            np.concatenate(updated_coord_out) if updated_coord_out else None
+        )
+        logits_out = np.concatenate(logits_out) if logits_out else None
+    else:
+        energy_out = (
+            torch.cat(energy_out)
+            if energy_out
+            else torch.zeros(
+                [nframes, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        atomic_energy_out = (
+            torch.cat(atomic_energy_out)
+            if atomic_energy_out
+            else torch.zeros(
+                [nframes, natoms, 1], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        force_out = (
+            torch.cat(force_out)
+            if force_out
+            else torch.zeros(
+                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        force_mag_out = (
+            torch.cat(force_mag_out)
+            if force_mag_out
+            else torch.zeros(
+                [nframes, natoms, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        virial_out = (
+            torch.cat(virial_out)
+            if virial_out
+            else torch.zeros(
+                [nframes, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        atomic_virial_out = (
+            torch.cat(atomic_virial_out)
+            if atomic_virial_out
+            else torch.zeros(
+                [nframes, natoms, 3, 3], dtype=GLOBAL_PT_FLOAT_PRECISION, device=DEVICE
+            )
+        )
+        updated_coord_out = torch.cat(updated_coord_out) if updated_coord_out else None
+        logits_out = torch.cat(logits_out) if logits_out else None
+    if denoise:
+        return updated_coord_out, logits_out
+    else:
+        results_dict = {
+            "energy": energy_out,
+            "force": force_out,
+            "virial": virial_out,
+        }
+        if has_spin:
+            results_dict["force_mag"] = force_mag_out
+        if atomic:
+            results_dict["atom_energy"] = atomic_energy_out
+            results_dict["atom_virial"] = atomic_virial_out
+        return results_dict
diff --git a/deepmd/pt/infer/inference.py b/deepmd/pt/infer/inference.py
new file mode 100644
index 0000000000..6c13b363bc
--- /dev/null
+++ b/deepmd/pt/infer/inference.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from copy import (
+    deepcopy,
+)
+
+import torch
+
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+    JIT,
+)
+
+if torch.__version__.startswith("2"):
+    import torch._dynamo
+log = logging.getLogger(__name__)
+
+
+class Tester:
+    def __init__(
+        self,
+        model_ckpt,
+        head=None,
+    ):
+        """Construct a DeePMD tester.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        # Model
+        state_dict = torch.load(model_ckpt, map_location=DEVICE)
+        if "model" in state_dict:
+            state_dict = state_dict["model"]
+        model_params = state_dict["_extra_state"]["model_params"]
+        self.multi_task = "model_dict" in model_params
+        if self.multi_task:
+            assert head is not None, "Head must be specified in multitask mode!"
+            self.head = head
+            assert head in model_params["model_dict"], (
+                f"Specified head {head} not found in model {model_ckpt}! "
+                f"Available ones are {list(model_params['model_dict'].keys())}."
+            )
+            model_params = model_params["model_dict"][head]
+            state_dict_head = {"_extra_state": state_dict["_extra_state"]}
+            for item in state_dict:
+                if f"model.{head}." in item:
+                    state_dict_head[
+                        item.replace(f"model.{head}.", "model.Default.")
+                    ] = state_dict[item].clone()
+            state_dict = state_dict_head
+
+        self.model_params = deepcopy(model_params)
+        model_params["resuming"] = True
+        self.model = get_model(model_params).to(DEVICE)
+
+        # Model Wrapper
+        self.wrapper = ModelWrapper(self.model)  # inference only
+        if JIT:
+            self.wrapper = torch.jit.script(self.wrapper)
+        self.wrapper.load_state_dict(state_dict)
diff --git a/deepmd/pt/loss/__init__.py b/deepmd/pt/loss/__init__.py
new file mode 100644
index 0000000000..e64a129d51
--- /dev/null
+++ b/deepmd/pt/loss/__init__.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .denoise import (
+    DenoiseLoss,
+)
+from .dos import (
+    DOSLoss,
+)
+from .ener import (
+    EnergyStdLoss,
+)
+from .ener_spin import (
+    EnergySpinLoss,
+)
+from .loss import (
+    TaskLoss,
+)
+from .tensor import (
+    TensorLoss,
+)
+
+__all__ = [
+    "DenoiseLoss",
+    "EnergyStdLoss",
+    "EnergySpinLoss",
+    "TensorLoss",
+    "TaskLoss",
+    "DOSLoss",
+]
diff --git a/deepmd/pt/loss/denoise.py b/deepmd/pt/loss/denoise.py
new file mode 100644
index 0000000000..57691558cb
--- /dev/null
+++ b/deepmd/pt/loss/denoise.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+import torch.nn.functional as F
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+
+class DenoiseLoss(TaskLoss):
+    def __init__(
+        self,
+        ntypes,
+        masked_token_loss=1.0,
+        masked_coord_loss=1.0,
+        norm_loss=0.01,
+        use_l1=True,
+        beta=1.00,
+        mask_loss_coord=True,
+        mask_loss_token=True,
+        **kwargs,
+    ):
+        """Construct a layer to compute loss on coord, and type reconstruction."""
+        super().__init__()
+        self.ntypes = ntypes
+        self.masked_token_loss = masked_token_loss
+        self.masked_coord_loss = masked_coord_loss
+        self.norm_loss = norm_loss
+        self.has_coord = self.masked_coord_loss > 0.0
+        self.has_token = self.masked_token_loss > 0.0
+        self.has_norm = self.norm_loss > 0.0
+        self.use_l1 = use_l1
+        self.beta = beta
+        self.frac_beta = 1.00 / self.beta
+        self.mask_loss_coord = mask_loss_coord
+        self.mask_loss_token = mask_loss_token
+
+    def forward(self, model_pred, label, natoms, learning_rate, mae=False):
+        """Return loss on coord and type denoise.
+
+        Returns
+        -------
+        - loss: Loss to minimize.
+        """
+        updated_coord = model_pred["updated_coord"]
+        logits = model_pred["logits"]
+        clean_coord = label["clean_coord"]
+        clean_type = label["clean_type"]
+        coord_mask = label["coord_mask"]
+        type_mask = label["type_mask"]
+
+        loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
+        more_loss = {}
+        if self.has_coord:
+            if self.mask_loss_coord:
+                masked_updated_coord = updated_coord[coord_mask]
+                masked_clean_coord = clean_coord[coord_mask]
+                if masked_updated_coord.size(0) > 0:
+                    coord_loss = F.smooth_l1_loss(
+                        masked_updated_coord.view(-1, 3),
+                        masked_clean_coord.view(-1, 3),
+                        reduction="mean",
+                        beta=self.beta,
+                    )
+                else:
+                    coord_loss = torch.zeros(
+                        1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+                    )[0]
+            else:
+                coord_loss = F.smooth_l1_loss(
+                    updated_coord.view(-1, 3),
+                    clean_coord.view(-1, 3),
+                    reduction="mean",
+                    beta=self.beta,
+                )
+            loss += self.masked_coord_loss * coord_loss
+            more_loss["coord_l1_error"] = coord_loss.detach()
+        if self.has_token:
+            if self.mask_loss_token:
+                masked_logits = logits[type_mask]
+                masked_target = clean_type[type_mask]
+                if masked_logits.size(0) > 0:
+                    token_loss = F.nll_loss(
+                        F.log_softmax(masked_logits, dim=-1),
+                        masked_target,
+                        reduction="mean",
+                    )
+                else:
+                    token_loss = torch.zeros(
+                        1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+                    )[0]
+            else:
+                token_loss = F.nll_loss(
+                    F.log_softmax(logits.view(-1, self.ntypes - 1), dim=-1),
+                    clean_type.view(-1),
+                    reduction="mean",
+                )
+            loss += self.masked_token_loss * token_loss
+            more_loss["token_error"] = token_loss.detach()
+        if self.has_norm:
+            norm_x = model_pred["norm_x"]
+            norm_delta_pair_rep = model_pred["norm_delta_pair_rep"]
+            loss += self.norm_loss * (norm_x + norm_delta_pair_rep)
+            more_loss["norm_loss"] = norm_x.detach() + norm_delta_pair_rep.detach()
+
+        return loss, more_loss
diff --git a/deepmd/pt/loss/dos.py b/deepmd/pt/loss/dos.py
new file mode 100644
index 0000000000..7fd2e04ff2
--- /dev/null
+++ b/deepmd/pt/loss/dos.py
@@ -0,0 +1,256 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
+import torch
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class DOSLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate: float,
+        numb_dos: int,
+        start_pref_dos: float = 1.00,
+        limit_pref_dos: float = 1.00,
+        start_pref_cdf: float = 1000,
+        limit_pref_cdf: float = 1.00,
+        start_pref_ados: float = 0.0,
+        limit_pref_ados: float = 0.0,
+        start_pref_acdf: float = 0.0,
+        limit_pref_acdf: float = 0.0,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a loss for local and global tensors.
+
+        Parameters
+        ----------
+        tensor_name : str
+            The name of the tensor in the model predictions to compute the loss.
+        tensor_size : int
+            The size (dimension) of the tensor.
+        label_name : str
+            The name of the tensor in the labels to compute the loss.
+        pref_atomic : float
+            The prefactor of the weight of atomic loss. It should be larger than or equal to 0.
+        pref : float
+            The prefactor of the weight of global loss. It should be larger than or equal to 0.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.numb_dos = numb_dos
+        self.inference = inference
+
+        self.start_pref_dos = start_pref_dos
+        self.limit_pref_dos = limit_pref_dos
+        self.start_pref_cdf = start_pref_cdf
+        self.limit_pref_cdf = limit_pref_cdf
+
+        self.start_pref_ados = start_pref_ados
+        self.limit_pref_ados = limit_pref_ados
+        self.start_pref_acdf = start_pref_acdf
+        self.limit_pref_acdf = limit_pref_acdf
+
+        assert (
+            self.start_pref_dos >= 0.0
+            and self.limit_pref_dos >= 0.0
+            and self.start_pref_cdf >= 0.0
+            and self.limit_pref_cdf >= 0.0
+            and self.start_pref_ados >= 0.0
+            and self.limit_pref_ados >= 0.0
+            and self.start_pref_acdf >= 0.0
+            and self.limit_pref_acdf >= 0.0
+        ), "Can not assign negative weight to `pref` and `pref_atomic`"
+
+        self.has_dos = (start_pref_dos != 0.0 and limit_pref_dos != 0.0) or inference
+        self.has_cdf = (start_pref_cdf != 0.0 and limit_pref_cdf != 0.0) or inference
+        self.has_ados = (start_pref_ados != 0.0 and limit_pref_ados != 0.0) or inference
+        self.has_acdf = (start_pref_acdf != 0.0 and limit_pref_acdf != 0.0) or inference
+
+        assert (
+            self.has_dos or self.has_cdf or self.has_ados or self.has_acdf
+        ), AssertionError("Can not assian zero weight both to `pref` and `pref_atomic`")
+
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
+        """Return loss on local and global tensors.
+
+        Parameters
+        ----------
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, torch.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
+        loss: torch.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, torch.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+
+        coef = learning_rate / self.starter_learning_rate
+        pref_dos = (
+            self.limit_pref_dos + (self.start_pref_dos - self.limit_pref_dos) * coef
+        )
+        pref_cdf = (
+            self.limit_pref_cdf + (self.start_pref_cdf - self.limit_pref_cdf) * coef
+        )
+        pref_ados = (
+            self.limit_pref_ados + (self.start_pref_ados - self.limit_pref_ados) * coef
+        )
+        pref_acdf = (
+            self.limit_pref_acdf + (self.start_pref_acdf - self.limit_pref_acdf) * coef
+        )
+
+        loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
+        more_loss = {}
+        if self.has_ados and "atom_dos" in model_pred and "atom_dos" in label:
+            find_local = label.get("find_atom_dos", 0.0)
+            pref_ados = pref_ados * find_local
+            local_tensor_pred_dos = model_pred["atom_dos"].reshape(
+                [-1, natoms, self.numb_dos]
+            )
+            local_tensor_label_dos = label["atom_dos"].reshape(
+                [-1, natoms, self.numb_dos]
+            )
+            diff = (local_tensor_pred_dos - local_tensor_label_dos).reshape(
+                [-1, self.numb_dos]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).bool()]
+            l2_local_loss_dos = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss["l2_local_dos_loss"] = self.display_if_exist(
+                    l2_local_loss_dos.detach(), find_local
+                )
+            loss += pref_ados * l2_local_loss_dos
+            rmse_local_dos = l2_local_loss_dos.sqrt()
+            more_loss["rmse_local_dos"] = self.display_if_exist(
+                rmse_local_dos.detach(), find_local
+            )
+        if self.has_acdf and "atom_dos" in model_pred and "atom_dos" in label:
+            find_local = label.get("find_atom_dos", 0.0)
+            pref_acdf = pref_acdf * find_local
+            local_tensor_pred_cdf = torch.cusum(
+                model_pred["atom_dos"].reshape([-1, natoms, self.numb_dos]), dim=-1
+            )
+            local_tensor_label_cdf = torch.cusum(
+                label["atom_dos"].reshape([-1, natoms, self.numb_dos]), dim=-1
+            )
+            diff = (local_tensor_pred_cdf - local_tensor_label_cdf).reshape(
+                [-1, self.numb_dos]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).bool()]
+            l2_local_loss_cdf = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss["l2_local_cdf_loss"] = self.display_if_exist(
+                    l2_local_loss_cdf.detach(), find_local
+                )
+            loss += pref_acdf * l2_local_loss_cdf
+            rmse_local_cdf = l2_local_loss_cdf.sqrt()
+            more_loss["rmse_local_cdf"] = self.display_if_exist(
+                rmse_local_cdf.detach(), find_local
+            )
+        if self.has_dos and "dos" in model_pred and "dos" in label:
+            find_global = label.get("find_dos", 0.0)
+            pref_dos = pref_dos * find_global
+            global_tensor_pred_dos = model_pred["dos"].reshape([-1, self.numb_dos])
+            global_tensor_label_dos = label["dos"].reshape([-1, self.numb_dos])
+            diff = global_tensor_pred_dos - global_tensor_label_dos
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss_dos = torch.mean(
+                    torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum()
+                )
+                atom_num = torch.mean(atom_num.float())
+            else:
+                atom_num = natoms
+                l2_global_loss_dos = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss["l2_global_dos_loss"] = self.display_if_exist(
+                    l2_global_loss_dos.detach(), find_global
+                )
+            loss += pref_dos * l2_global_loss_dos
+            rmse_global_dos = l2_global_loss_dos.sqrt() / atom_num
+            more_loss["rmse_global_dos"] = self.display_if_exist(
+                rmse_global_dos.detach(), find_global
+            )
+        if self.has_cdf and "dos" in model_pred and "dos" in label:
+            find_global = label.get("find_dos", 0.0)
+            pref_cdf = pref_cdf * find_global
+            global_tensor_pred_cdf = torch.cusum(
+                model_pred["dos"].reshape([-1, self.numb_dos]), dim=-1
+            )
+            global_tensor_label_cdf = torch.cusum(
+                label["dos"].reshape([-1, self.numb_dos]), dim=-1
+            )
+            diff = global_tensor_pred_cdf - global_tensor_label_cdf
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss_cdf = torch.mean(
+                    torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum()
+                )
+                atom_num = torch.mean(atom_num.float())
+            else:
+                atom_num = natoms
+                l2_global_loss_cdf = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss["l2_global_cdf_loss"] = self.display_if_exist(
+                    l2_global_loss_cdf.detach(), find_global
+                )
+            loss += pref_cdf * l2_global_loss_cdf
+            rmse_global_dos = l2_global_loss_cdf.sqrt() / atom_num
+            more_loss["rmse_global_cdf"] = self.display_if_exist(
+                rmse_global_dos.detach(), find_global
+            )
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_ados or self.has_acdf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_dos",
+                    ndof=self.numb_dos,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_dos or self.has_cdf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "dos",
+                    ndof=self.numb_dos,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
new file mode 100644
index 0000000000..ccc23b690c
--- /dev/null
+++ b/deepmd/pt/loss/ener.py
@@ -0,0 +1,291 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
+import torch
+import torch.nn.functional as F
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    GLOBAL_PT_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class EnergyStdLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate=1.0,
+        start_pref_e=0.0,
+        limit_pref_e=0.0,
+        start_pref_f=0.0,
+        limit_pref_f=0.0,
+        start_pref_v=0.0,
+        limit_pref_v=0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        start_pref_pf: float = 0.0,
+        limit_pref_pf: float = 0.0,
+        use_l1_all: bool = False,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a layer to compute loss on energy, force and virial.
+
+        Parameters
+        ----------
+        starter_learning_rate : float
+            The learning rate at the start of the training.
+        start_pref_e : float
+            The prefactor of energy loss at the start of the training.
+        limit_pref_e : float
+            The prefactor of energy loss at the end of the training.
+        start_pref_f : float
+            The prefactor of force loss at the start of the training.
+        limit_pref_f : float
+            The prefactor of force loss at the end of the training.
+        start_pref_v : float
+            The prefactor of virial loss at the start of the training.
+        limit_pref_v : float
+            The prefactor of virial loss at the end of the training.
+        start_pref_ae : float
+            The prefactor of atomic energy loss at the start of the training.
+        limit_pref_ae : float
+            The prefactor of atomic energy loss at the end of the training.
+        start_pref_pf : float
+            The prefactor of atomic prefactor force loss at the start of the training.
+        limit_pref_pf : float
+            The prefactor of atomic prefactor force loss at the end of the training.
+        use_l1_all : bool
+            Whether to use L1 loss, if False (default), it will use L2 loss.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference
+        self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference
+        self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
+
+        # TODO EnergyStdLoss need support for atomic energy and atomic pref
+        self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
+        self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
+
+        self.start_pref_e = start_pref_e
+        self.limit_pref_e = limit_pref_e
+        self.start_pref_f = start_pref_f
+        self.limit_pref_f = limit_pref_f
+        self.start_pref_v = start_pref_v
+        self.limit_pref_v = limit_pref_v
+        self.use_l1_all = use_l1_all
+        self.inference = inference
+
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
+        """Return loss on energy and force.
+
+        Parameters
+        ----------
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, torch.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
+        loss: torch.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, torch.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        coef = learning_rate / self.starter_learning_rate
+        pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
+        pref_f = self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * coef
+        pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef
+        loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
+        more_loss = {}
+        # more_loss['log_keys'] = []  # showed when validation on the fly
+        # more_loss['test_keys'] = []  # showed when doing dp test
+        atom_norm = 1.0 / natoms
+        if self.has_e and "energy" in model_pred and "energy" in label:
+            find_energy = label.get("find_energy", 0.0)
+            pref_e = pref_e * find_energy
+            if not self.use_l1_all:
+                l2_ener_loss = torch.mean(
+                    torch.square(model_pred["energy"] - label["energy"])
+                )
+                if not self.inference:
+                    more_loss["l2_ener_loss"] = self.display_if_exist(
+                        l2_ener_loss.detach(), find_energy
+                    )
+                loss += atom_norm * (pref_e * l2_ener_loss)
+                rmse_e = l2_ener_loss.sqrt() * atom_norm
+                more_loss["rmse_e"] = self.display_if_exist(
+                    rmse_e.detach(), find_energy
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            else:  # use l1 and for all atoms
+                l1_ener_loss = F.l1_loss(
+                    model_pred["energy"].reshape(-1),
+                    label["energy"].reshape(-1),
+                    reduction="sum",
+                )
+                loss += pref_e * l1_ener_loss
+                more_loss["mae_e"] = self.display_if_exist(
+                    F.l1_loss(
+                        model_pred["energy"].reshape(-1),
+                        label["energy"].reshape(-1),
+                        reduction="mean",
+                    ).detach(),
+                    find_energy,
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            if mae:
+                mae_e = (
+                    torch.mean(torch.abs(model_pred["energy"] - label["energy"]))
+                    * atom_norm
+                )
+                more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy)
+                mae_e_all = torch.mean(
+                    torch.abs(model_pred["energy"] - label["energy"])
+                )
+                more_loss["mae_e_all"] = self.display_if_exist(
+                    mae_e_all.detach(), find_energy
+                )
+
+        if self.has_f and "force" in model_pred and "force" in label:
+            find_force = label.get("find_force", 0.0)
+            pref_f = pref_f * find_force
+            if "force_target_mask" in model_pred:
+                force_target_mask = model_pred["force_target_mask"]
+            else:
+                force_target_mask = None
+            if not self.use_l1_all:
+                if force_target_mask is not None:
+                    diff_f = (label["force"] - model_pred["force"]) * force_target_mask
+                    force_cnt = force_target_mask.squeeze(-1).sum(-1)
+                    l2_force_loss = torch.mean(
+                        torch.square(diff_f).mean(-1).sum(-1) / force_cnt
+                    )
+                else:
+                    diff_f = label["force"] - model_pred["force"]
+                    l2_force_loss = torch.mean(torch.square(diff_f))
+                if not self.inference:
+                    more_loss["l2_force_loss"] = self.display_if_exist(
+                        l2_force_loss.detach(), find_force
+                    )
+                loss += (pref_f * l2_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+                rmse_f = l2_force_loss.sqrt()
+                more_loss["rmse_f"] = self.display_if_exist(rmse_f.detach(), find_force)
+            else:
+                l1_force_loss = F.l1_loss(
+                    label["force"], model_pred["force"], reduction="none"
+                )
+                if force_target_mask is not None:
+                    l1_force_loss *= force_target_mask
+                    force_cnt = force_target_mask.squeeze(-1).sum(-1)
+                    more_loss["mae_f"] = self.display_if_exist(
+                        (l1_force_loss.mean(-1).sum(-1) / force_cnt).mean(), find_force
+                    )
+                    l1_force_loss = (l1_force_loss.sum(-1).sum(-1) / force_cnt).sum()
+                else:
+                    more_loss["mae_f"] = self.display_if_exist(
+                        l1_force_loss.mean().detach(), find_force
+                    )
+                    l1_force_loss = l1_force_loss.sum(-1).mean(-1).sum()
+                loss += (pref_f * l1_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+            if mae:
+                mae_f = torch.mean(torch.abs(diff_f))
+                more_loss["mae_f"] = self.display_if_exist(mae_f.detach(), find_force)
+
+        if self.has_v and "virial" in model_pred and "virial" in label:
+            find_virial = label.get("find_virial", 0.0)
+            pref_v = pref_v * find_virial
+            diff_v = label["virial"] - model_pred["virial"].reshape(-1, 9)
+            l2_virial_loss = torch.mean(torch.square(diff_v))
+            if not self.inference:
+                more_loss["l2_virial_loss"] = self.display_if_exist(
+                    l2_virial_loss.detach(), find_virial
+                )
+            loss += atom_norm * (pref_v * l2_virial_loss)
+            rmse_v = l2_virial_loss.sqrt() * atom_norm
+            more_loss["rmse_v"] = self.display_if_exist(rmse_v.detach(), find_virial)
+            if mae:
+                mae_v = torch.mean(torch.abs(diff_v)) * atom_norm
+                more_loss["mae_v"] = self.display_if_exist(mae_v.detach(), find_virial)
+        if not self.inference:
+            more_loss["rmse"] = torch.sqrt(loss.detach())
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_e:
+            label_requirement.append(
+                DataRequirementItem(
+                    "energy",
+                    ndof=1,
+                    atomic=False,
+                    must=False,
+                    high_prec=True,
+                )
+            )
+        if self.has_f:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_v:
+            label_requirement.append(
+                DataRequirementItem(
+                    "virial",
+                    ndof=9,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_ae:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_pf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_pref",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                    repeat=3,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pt/loss/ener_spin.py b/deepmd/pt/loss/ener_spin.py
new file mode 100644
index 0000000000..3bd81adf77
--- /dev/null
+++ b/deepmd/pt/loss/ener_spin.py
@@ -0,0 +1,281 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
+import torch
+import torch.nn.functional as F
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    GLOBAL_PT_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class EnergySpinLoss(TaskLoss):
+    def __init__(
+        self,
+        starter_learning_rate=1.0,
+        start_pref_e=0.0,
+        limit_pref_e=0.0,
+        start_pref_fr=0.0,
+        limit_pref_fr=0.0,
+        start_pref_fm=0.0,
+        limit_pref_fm=0.0,
+        start_pref_v=0.0,
+        limit_pref_v=0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        start_pref_pf: float = 0.0,
+        limit_pref_pf: float = 0.0,
+        use_l1_all: bool = False,
+        inference=False,
+        **kwargs,
+    ):
+        """Construct a layer to compute loss on energy, real force, magnetic force and virial."""
+        super().__init__()
+        self.starter_learning_rate = starter_learning_rate
+        self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference
+        self.has_fr = (start_pref_fr != 0.0 and limit_pref_fr != 0.0) or inference
+        self.has_fm = (start_pref_fm != 0.0 and limit_pref_fm != 0.0) or inference
+
+        # TODO EnergySpinLoss needs support for virial, atomic energy and atomic pref
+        self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
+        self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
+        self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
+
+        self.start_pref_e = start_pref_e
+        self.limit_pref_e = limit_pref_e
+        self.start_pref_fr = start_pref_fr
+        self.limit_pref_fr = limit_pref_fr
+        self.start_pref_fm = start_pref_fm
+        self.limit_pref_fm = limit_pref_fm
+        self.start_pref_v = start_pref_v
+        self.limit_pref_v = limit_pref_v
+        self.use_l1_all = use_l1_all
+        self.inference = inference
+
+    def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
+        """Return energy loss with magnetic labels.
+
+        Parameters
+        ----------
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, torch.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
+        loss: torch.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, torch.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        coef = learning_rate / self.starter_learning_rate
+        pref_e = self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * coef
+        pref_fr = self.limit_pref_fr + (self.start_pref_fr - self.limit_pref_fr) * coef
+        pref_fm = self.limit_pref_fm + (self.start_pref_fm - self.limit_pref_fm) * coef
+        pref_v = self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * coef
+        loss = torch.tensor(0.0, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)
+        more_loss = {}
+        # more_loss['log_keys'] = []  # showed when validation on the fly
+        # more_loss['test_keys'] = []  # showed when doing dp test
+        atom_norm = 1.0 / natoms
+        if self.has_e and "energy" in model_pred and "energy" in label:
+            find_energy = label.get("find_energy", 0.0)
+            pref_e = pref_e * find_energy
+            if not self.use_l1_all:
+                l2_ener_loss = torch.mean(
+                    torch.square(model_pred["energy"] - label["energy"])
+                )
+                if not self.inference:
+                    more_loss["l2_ener_loss"] = self.display_if_exist(
+                        l2_ener_loss.detach(), find_energy
+                    )
+                loss += atom_norm * (pref_e * l2_ener_loss)
+                rmse_e = l2_ener_loss.sqrt() * atom_norm
+                more_loss["rmse_e"] = self.display_if_exist(
+                    rmse_e.detach(), find_energy
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            else:  # use l1 and for all atoms
+                l1_ener_loss = F.l1_loss(
+                    model_pred["energy"].reshape(-1),
+                    label["energy"].reshape(-1),
+                    reduction="sum",
+                )
+                loss += pref_e * l1_ener_loss
+                more_loss["mae_e"] = self.display_if_exist(
+                    F.l1_loss(
+                        model_pred["energy"].reshape(-1),
+                        label["energy"].reshape(-1),
+                        reduction="mean",
+                    ).detach(),
+                    find_energy,
+                )
+                # more_loss['log_keys'].append('rmse_e')
+            if mae:
+                mae_e = (
+                    torch.mean(torch.abs(model_pred["energy"] - label["energy"]))
+                    * atom_norm
+                )
+                more_loss["mae_e"] = self.display_if_exist(mae_e.detach(), find_energy)
+                mae_e_all = torch.mean(
+                    torch.abs(model_pred["energy"] - label["energy"])
+                )
+                more_loss["mae_e_all"] = self.display_if_exist(
+                    mae_e_all.detach(), find_energy
+                )
+
+        if self.has_fr and "force" in model_pred and "force" in label:
+            find_force_r = label.get("find_force", 0.0)
+            pref_fr = pref_fr * find_force_r
+            if not self.use_l1_all:
+                diff_fr = label["force"] - model_pred["force"]
+                l2_force_real_loss = torch.mean(torch.square(diff_fr))
+                if not self.inference:
+                    more_loss["l2_force_r_loss"] = self.display_if_exist(
+                        l2_force_real_loss.detach(), find_force_r
+                    )
+                loss += (pref_fr * l2_force_real_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+                rmse_fr = l2_force_real_loss.sqrt()
+                more_loss["rmse_fr"] = self.display_if_exist(
+                    rmse_fr.detach(), find_force_r
+                )
+                if mae:
+                    mae_fr = torch.mean(torch.abs(diff_fr))
+                    more_loss["mae_fr"] = self.display_if_exist(
+                        mae_fr.detach(), find_force_r
+                    )
+            else:
+                l1_force_real_loss = F.l1_loss(
+                    label["force"], model_pred["force"], reduction="none"
+                )
+                more_loss["mae_fr"] = self.display_if_exist(
+                    l1_force_real_loss.mean().detach(), find_force_r
+                )
+                l1_force_real_loss = l1_force_real_loss.sum(-1).mean(-1).sum()
+                loss += (pref_fr * l1_force_real_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+
+        if self.has_fm and "force_mag" in model_pred and "force_mag" in label:
+            find_force_m = label.get("find_force_mag", 0.0)
+            pref_fm = pref_fm * find_force_m
+            nframes = model_pred["force_mag"].shape[0]
+            atomic_mask = model_pred["mask_mag"].expand([-1, -1, 3])
+            label_force_mag = label["force_mag"][atomic_mask].view(nframes, -1, 3)
+            model_pred_force_mag = model_pred["force_mag"][atomic_mask].view(
+                nframes, -1, 3
+            )
+            if not self.use_l1_all:
+                diff_fm = label_force_mag - model_pred_force_mag
+                l2_force_mag_loss = torch.mean(torch.square(diff_fm))
+                if not self.inference:
+                    more_loss["l2_force_m_loss"] = self.display_if_exist(
+                        l2_force_mag_loss.detach(), find_force_m
+                    )
+                loss += (pref_fm * l2_force_mag_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+                rmse_fm = l2_force_mag_loss.sqrt()
+                more_loss["rmse_fm"] = self.display_if_exist(
+                    rmse_fm.detach(), find_force_m
+                )
+                if mae:
+                    mae_fm = torch.mean(torch.abs(diff_fm))
+                    more_loss["mae_fm"] = self.display_if_exist(
+                        mae_fm.detach(), find_force_m
+                    )
+            else:
+                l1_force_mag_loss = F.l1_loss(
+                    label_force_mag, model_pred_force_mag, reduction="none"
+                )
+                more_loss["mae_fm"] = self.display_if_exist(
+                    l1_force_mag_loss.mean().detach(), find_force_m
+                )
+                l1_force_mag_loss = l1_force_mag_loss.sum(-1).mean(-1).sum()
+                loss += (pref_fm * l1_force_mag_loss).to(GLOBAL_PT_FLOAT_PRECISION)
+
+        if not self.inference:
+            more_loss["rmse"] = torch.sqrt(loss.detach())
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_e:
+            label_requirement.append(
+                DataRequirementItem(
+                    "energy",
+                    ndof=1,
+                    atomic=False,
+                    must=False,
+                    high_prec=True,
+                )
+            )
+        if self.has_fr:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_fm:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force_mag",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_v:
+            label_requirement.append(
+                DataRequirementItem(
+                    "virial",
+                    ndof=9,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_ae:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_pf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_pref",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                    repeat=3,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py
new file mode 100644
index 0000000000..7e26f6571a
--- /dev/null
+++ b/deepmd/pt/loss/loss.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    List,
+)
+
+import torch
+
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class TaskLoss(torch.nn.Module, ABC):
+    def __init__(self, **kwargs):
+        """Construct loss."""
+        super().__init__()
+
+    def forward(self, input_dict, model, label, natoms, learning_rate):
+        """Return loss ."""
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        pass
+
+    @staticmethod
+    def display_if_exist(loss: torch.Tensor, find_property: float) -> torch.Tensor:
+        """Display NaN if labeled property is not found.
+
+        Parameters
+        ----------
+        loss : torch.Tensor
+            the loss tensor
+        find_property : float
+            whether the property is found
+        """
+        return loss if bool(find_property) else torch.nan
diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py
new file mode 100644
index 0000000000..3dd91d203e
--- /dev/null
+++ b/deepmd/pt/loss/tensor.py
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
+import torch
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+
+class TensorLoss(TaskLoss):
+    def __init__(
+        self,
+        tensor_name: str,
+        tensor_size: int,
+        label_name: str,
+        pref_atomic: float = 0.0,
+        pref: float = 0.0,
+        inference=False,
+        **kwargs,
+    ):
+        r"""Construct a loss for local and global tensors.
+
+        Parameters
+        ----------
+        tensor_name : str
+            The name of the tensor in the model predictions to compute the loss.
+        tensor_size : int
+            The size (dimension) of the tensor.
+        label_name : str
+            The name of the tensor in the labels to compute the loss.
+        pref_atomic : float
+            The prefactor of the weight of atomic loss. It should be larger than or equal to 0.
+        pref : float
+            The prefactor of the weight of global loss. It should be larger than or equal to 0.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
+        super().__init__()
+        self.tensor_name = tensor_name
+        self.tensor_size = tensor_size
+        self.label_name = label_name
+        self.local_weight = pref_atomic
+        self.global_weight = pref
+        self.inference = inference
+
+        assert (
+            self.local_weight >= 0.0 and self.global_weight >= 0.0
+        ), "Can not assign negative weight to `pref` and `pref_atomic`"
+        self.has_local_weight = self.local_weight > 0.0 or inference
+        self.has_global_weight = self.global_weight > 0.0 or inference
+        assert self.has_local_weight or self.has_global_weight, AssertionError(
+            "Can not assian zero weight both to `pref` and `pref_atomic`"
+        )
+
+    def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
+        """Return loss on local and global tensors.
+
+        Parameters
+        ----------
+        input_dict : dict[str, torch.Tensor]
+            Model inputs.
+        model : torch.nn.Module
+            Model to be used to output the predictions.
+        label : dict[str, torch.Tensor]
+            Labels.
+        natoms : int
+            The local atom number.
+
+        Returns
+        -------
+        model_pred: dict[str, torch.Tensor]
+            Model predictions.
+        loss: torch.Tensor
+            Loss for model to minimize.
+        more_loss: dict[str, torch.Tensor]
+            Other losses for display.
+        """
+        model_pred = model(**input_dict)
+        del learning_rate, mae
+        loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
+        more_loss = {}
+        if (
+            self.has_local_weight
+            and self.tensor_name in model_pred
+            and "atomic_" + self.label_name in label
+        ):
+            find_local = label.get("find_" + "atomic_" + self.label_name, 0.0)
+            local_weight = self.local_weight * find_local
+            local_tensor_pred = model_pred[self.tensor_name].reshape(
+                [-1, natoms, self.tensor_size]
+            )
+            local_tensor_label = label["atomic_" + self.label_name].reshape(
+                [-1, natoms, self.tensor_size]
+            )
+            diff = (local_tensor_pred - local_tensor_label).reshape(
+                [-1, self.tensor_size]
+            )
+            if "mask" in model_pred:
+                diff = diff[model_pred["mask"].reshape([-1]).bool()]
+            l2_local_loss = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss[f"l2_local_{self.tensor_name}_loss"] = self.display_if_exist(
+                    l2_local_loss.detach(), find_local
+                )
+            loss += local_weight * l2_local_loss
+            rmse_local = l2_local_loss.sqrt()
+            more_loss[f"rmse_local_{self.tensor_name}"] = self.display_if_exist(
+                rmse_local.detach(), find_local
+            )
+        if (
+            self.has_global_weight
+            and "global_" + self.tensor_name in model_pred
+            and self.label_name in label
+        ):
+            find_global = label.get("find_" + self.label_name, 0.0)
+            global_weight = self.global_weight * find_global
+            global_tensor_pred = model_pred["global_" + self.tensor_name].reshape(
+                [-1, self.tensor_size]
+            )
+            global_tensor_label = label[self.label_name].reshape([-1, self.tensor_size])
+            diff = global_tensor_pred - global_tensor_label
+            if "mask" in model_pred:
+                atom_num = model_pred["mask"].sum(-1, keepdim=True)
+                l2_global_loss = torch.mean(
+                    torch.sum(torch.square(diff) * atom_num, dim=0) / atom_num.sum()
+                )
+                atom_num = torch.mean(atom_num.float())
+            else:
+                atom_num = natoms
+                l2_global_loss = torch.mean(torch.square(diff))
+            if not self.inference:
+                more_loss[f"l2_global_{self.tensor_name}_loss"] = self.display_if_exist(
+                    l2_global_loss.detach(), find_global
+                )
+            loss += global_weight * l2_global_loss
+            rmse_global = l2_global_loss.sqrt() / atom_num
+            more_loss[f"rmse_global_{self.tensor_name}"] = self.display_if_exist(
+                rmse_global.detach(), find_global
+            )
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        label_requirement = []
+        if self.has_local_weight:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atomic_" + self.label_name,
+                    ndof=self.tensor_size,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_global_weight:
+            label_requirement.append(
+                DataRequirementItem(
+                    self.label_name,
+                    ndof=self.tensor_size,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        return label_requirement
diff --git a/deepmd/pt/model/__init__.py b/deepmd/pt/model/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pt/model/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pt/model/atomic_model/__init__.py b/deepmd/pt/model/atomic_model/__init__.py
new file mode 100644
index 0000000000..a747f28556
--- /dev/null
+++ b/deepmd/pt/model/atomic_model/__init__.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The atomic model provides the prediction of some property on each
+atom.  All the atomic models are not supposed to be directly accessed
+by users, but it provides a convenient interface for the
+implementation of models.
+
+Taking the energy models for example, the developeres only needs to
+implement the atomic energy prediction via an atomic model, and the
+model can be automatically made by the `deepmd.dpmodel.make_model`
+method. The `DPModel` is made by
+```
+DPModel = make_model(DPAtomicModel)
+```
+
+"""
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .linear_atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+    LinearEnergyAtomicModel,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+__all__ = [
+    "BaseAtomicModel",
+    "DPAtomicModel",
+    "PairTabAtomicModel",
+    "LinearEnergyAtomicModel",
+    "DPZBLLinearEnergyAtomicModel",
+]
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
new file mode 100644
index 0000000000..129b8dc11d
--- /dev/null
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -0,0 +1,304 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+import logging
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.atomic_model import (
+    make_base_atomic_model,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.utils import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+BaseAtomicModel_ = make_base_atomic_model(torch.Tensor)
+
+
+class BaseAtomicModel(BaseAtomicModel_):
+    def __init__(
+        self,
+        atom_exclude_types: List[int] = [],
+        pair_exclude_types: List[Tuple[int, int]] = [],
+    ):
+        super().__init__()
+        self.reinit_atom_exclude(atom_exclude_types)
+        self.reinit_pair_exclude(pair_exclude_types)
+
+    def reinit_atom_exclude(
+        self,
+        exclude_types: List[int] = [],
+    ):
+        self.atom_exclude_types = exclude_types
+        if exclude_types == []:
+            self.atom_excl = None
+        else:
+            self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types)
+
+    def reinit_pair_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.pair_exclude_types = exclude_types
+        if exclude_types == []:
+            self.pair_excl = None
+        else:
+            self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types)
+
+    # to make jit happy...
+    def make_atom_mask(
+        self,
+        atype: torch.Tensor,
+    ) -> torch.Tensor:
+        """The atoms with type < 0 are treated as virutal atoms,
+        which serves as place-holders for multi-frame calculations
+        with different number of atoms in different frames.
+
+        Parameters
+        ----------
+        atype
+            Atom types. >= 0 for real atoms <0 for virtual atoms.
+
+        Returns
+        -------
+        mask
+            True for real atoms and False for virutal atoms.
+
+        """
+        # supposed to be supported by all backends
+        return atype >= 0
+
+    def atomic_output_def(self) -> FittingOutputDef:
+        old_def = self.fitting_output_def()
+        old_list = list(old_def.get_data().values())
+        return FittingOutputDef(
+            old_list  # noqa:RUF005
+            + [
+                OutputVariableDef(
+                    name="mask",
+                    shape=[1],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                )
+            ]
+        )
+
+    def forward_common_atomic(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """Common interface for atomic inference.
+
+        This method accept extended coordinates, extended atom typs, neighbor list,
+        and predict the atomic contribution of the fit property.
+
+        Parameters
+        ----------
+        extended_coord
+            extended coodinates, shape: nf x (nall x 3)
+        extended_atype
+            extended atom typs, shape: nf x nall
+            for a type < 0 indicating the atomic is virtual.
+        nlist
+            neighbor list, shape: nf x nloc x nsel
+        mapping
+            extended to local index mapping, shape: nf x nall
+        fparam
+            frame parameters, shape: nf x dim_fparam
+        aparam
+            atomic parameter, shape: nf x nloc x dim_aparam
+
+        Returns
+        -------
+        ret_dict
+            dict of output atomic properties.
+            should implement the definition of `fitting_output_def`.
+            ret_dict["mask"] of shape nf x nloc will be provided.
+            ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real.
+            ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual.
+
+        """
+        _, nloc, _ = nlist.shape
+        atype = extended_atype[:, :nloc]
+
+        if self.pair_excl is not None:
+            pair_mask = self.pair_excl(nlist, extended_atype)
+            # exclude neighbors in the nlist
+            nlist = torch.where(pair_mask == 1, nlist, -1)
+
+        ext_atom_mask = self.make_atom_mask(extended_atype)
+        ret_dict = self.forward_atomic(
+            extended_coord,
+            torch.where(ext_atom_mask, extended_atype, 0),
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+        )
+
+        # nf x nloc
+        atom_mask = ext_atom_mask[:, :nloc].to(torch.int32)
+        if self.atom_excl is not None:
+            atom_mask *= self.atom_excl(atype)
+
+        for kk in ret_dict.keys():
+            out_shape = ret_dict[kk].shape
+            ret_dict[kk] = (
+                ret_dict[kk].reshape([out_shape[0], out_shape[1], -1])
+                * atom_mask[:, :, None]
+            ).view(out_shape)
+        ret_dict["mask"] = atom_mask
+
+        return ret_dict
+
+    def serialize(self) -> dict:
+        return {
+            "atom_exclude_types": self.atom_exclude_types,
+            "pair_exclude_types": self.pair_exclude_types,
+        }
+
+    def get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
+        """Get a forward wrapper of the atomic model for output bias calculation."""
+
+        def model_forward(coord, atype, box, fparam=None, aparam=None):
+            with torch.no_grad():  # it's essential for pure torch forward function to use auto_batchsize
+                (
+                    extended_coord,
+                    extended_atype,
+                    mapping,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    coord,
+                    atype,
+                    self.get_rcut(),
+                    self.get_sel(),
+                    mixed_types=self.mixed_types(),
+                    box=box,
+                )
+                atomic_ret = self.forward_common_atomic(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                    mapping=mapping,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                return {kk: vv.detach() for kk, vv in atomic_ret.items()}
+
+        return model_forward
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The sampled data frames from different data systems.
+        stat_file_path
+            The path to the statistics files.
+        """
+        raise NotImplementedError
+
+    def change_out_bias(
+        self,
+        merged,
+        origin_type_map,
+        full_type_map,
+        bias_adjust_mode="change-by-statistic",
+    ) -> None:
+        """Change the output bias according to the input data and the pretrained model.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        origin_type_map : List[str]
+            The original type_map in dataset, they are targets to change the output bias.
+        full_type_map : List[str]
+            The full type_map in pre-trained model
+        bias_adjust_mode : str
+            The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on labels of target dataset,
+                    and do least square on the errors to obtain the target shift as bias.
+            'set-by-statistic' : directly use the statistic output bias in the target dataset.
+        """
+        sorter = np.argsort(full_type_map)
+        missing_types = [t for t in origin_type_map if t not in full_type_map]
+        assert (
+            not missing_types
+        ), f"Some types are not in the pre-trained model: {list(missing_types)} !"
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        original_bias = self.get_out_bias()
+        if bias_adjust_mode == "change-by-statistic":
+            delta_bias = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+                keys=["energy"],
+                model_forward=self.get_forward_wrapper_func(),
+            )["energy"]
+            self.set_out_bias(delta_bias, add=True)
+        elif bias_adjust_mode == "set-by-statistic":
+            bias_atom = compute_output_stats(
+                merged,
+                self.get_ntypes(),
+                keys=["energy"],
+            )["energy"]
+            self.set_out_bias(bias_atom)
+        else:
+            raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
+        bias_atom = self.get_out_bias()
+        log.info(
+            f"Change output bias of {origin_type_map!s} "
+            f"from {to_numpy_array(original_bias[idx_type_map]).reshape(-1)!s} "
+            f"to {to_numpy_array(bias_atom[idx_type_map]).reshape(-1)!s}."
+        )
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
new file mode 100644
index 0000000000..13b8f09a79
--- /dev/null
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -0,0 +1,269 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import functools
+import logging
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+)
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+log = logging.getLogger(__name__)
+
+
+@BaseAtomicModel.register("standard")
+class DPAtomicModel(torch.nn.Module, BaseAtomicModel):
+    """Model give atomic prediction of some physical property.
+
+    Parameters
+    ----------
+    descriptor
+            Descriptor
+    fitting_net
+            Fitting net
+    type_map
+            Mapping atom type to the name (str) of the type.
+            For example `type_map[1]` gives the name of the type 1.
+    """
+
+    def __init__(
+        self,
+        descriptor,
+        fitting,
+        type_map: List[str],
+        **kwargs,
+    ):
+        torch.nn.Module.__init__(self)
+        ntypes = len(type_map)
+        self.type_map = type_map
+        self.ntypes = ntypes
+        self.descriptor = descriptor
+        self.rcut = self.descriptor.get_rcut()
+        self.sel = self.descriptor.get_sel()
+        self.fitting_net = fitting
+        # order matters ntypes and type_map should be initialized first.
+        BaseAtomicModel.__init__(self, **kwargs)
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of the fitting net."""
+        return (
+            self.fitting_net.output_def()
+            if self.fitting_net is not None
+            else self.coord_denoise_net.output_def()
+        )
+
+    @torch.jit.export
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.rcut
+
+    @torch.jit.export
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_sel(self) -> List[int]:
+        """Get the neighbor selection."""
+        return self.sel
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.descriptor.mixed_types()
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 1,
+                "type": "standard",
+                "type_map": self.type_map,
+                "descriptor": self.descriptor.serialize(),
+                "fitting": self.fitting_net.serialize(),
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        descriptor_obj = BaseDescriptor.deserialize(data.pop("descriptor"))
+        fitting_obj = BaseFitting.deserialize(data.pop("fitting"))
+        type_map = data.pop("type_map", None)
+        obj = cls(descriptor_obj, fitting_obj, type_map=type_map, **data)
+        return obj
+
+    def forward_atomic(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """Return atomic prediction.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region
+        extended_atype
+            atomic type in extended region
+        nlist
+            neighbor list. nf x nloc x nsel
+        mapping
+            mapps the extended indices to local indices
+        fparam
+            frame parameter. nf x ndf
+        aparam
+            atomic parameter. nf x nloc x nda
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the `FittingOutputDef`.
+
+        """
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.requires_grad_(True)
+        descriptor, rot_mat, g2, h2, sw = self.descriptor(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        assert descriptor is not None
+        # energy, force
+        fit_ret = self.fitting_net(
+            descriptor,
+            atype,
+            gr=rot_mat,
+            g2=g2,
+            h2=h2,
+            fparam=fparam,
+            aparam=aparam,
+        )
+        return fit_ret
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+        if stat_file_path is not None and self.type_map is not None:
+            # descriptors and fitting net with different type_map
+            # should not share the same parameters
+            stat_file_path /= " ".join(self.type_map)
+
+        @functools.lru_cache
+        def wrapped_sampler():
+            sampled = sampled_func()
+            if self.pair_excl is not None:
+                pair_exclude_types = self.pair_excl.get_exclude_types()
+                for sample in sampled:
+                    sample["pair_exclude_types"] = list(pair_exclude_types)
+            if self.atom_excl is not None:
+                atom_exclude_types = self.atom_excl.get_exclude_types()
+                for sample in sampled:
+                    sample["atom_exclude_types"] = list(atom_exclude_types)
+            return sampled
+
+        self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
+        if self.fitting_net is not None:
+            self.fitting_net.compute_output_stats(wrapped_sampler, stat_file_path)
+
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.fitting_net["bias_atom_e"] = (
+            out_bias + self.fitting_net["bias_atom_e"] if add else out_bias
+        )
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the output bias of the atomic model."""
+        return self.fitting_net["bias_atom_e"]
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.fitting_net.get_dim_fparam()
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.fitting_net.get_dim_aparam()
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.fitting_net.get_sel_type()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/pt/model/atomic_model/linear_atomic_model.py b/deepmd/pt/model/atomic_model/linear_atomic_model.py
new file mode 100644
index 0000000000..f599399e66
--- /dev/null
+++ b/deepmd/pt/model/atomic_model/linear_atomic_model.py
@@ -0,0 +1,506 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+    nlist_distinguish_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+from .dp_atomic_model import (
+    DPAtomicModel,
+)
+from .pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+
+class LinearEnergyAtomicModel(torch.nn.Module, BaseAtomicModel):
+    """Linear model make linear combinations of several existing models.
+
+    Parameters
+    ----------
+    models : list[DPAtomicModel or PairTabAtomicModel]
+        A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel.
+    type_map : list[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    """
+
+    def __init__(
+        self,
+        models: List[BaseAtomicModel],
+        type_map: List[str],
+        **kwargs,
+    ):
+        torch.nn.Module.__init__(self)
+        self.models = torch.nn.ModuleList(models)
+        sub_model_type_maps = [md.get_type_map() for md in models]
+        err_msg = []
+        self.mapping_list = []
+        common_type_map = set(type_map)
+        self.type_map = type_map
+        for tpmp in sub_model_type_maps:
+            if not common_type_map.issubset(set(tpmp)):
+                err_msg.append(
+                    f"type_map {tpmp} is not a subset of type_map {type_map}"
+                )
+            self.mapping_list.append(self.remap_atype(tpmp, self.type_map))
+        assert len(err_msg) == 0, "\n".join(err_msg)
+
+        self.atomic_bias = None
+        self.mixed_types_list = [model.mixed_types() for model in self.models]
+        self.rcuts = torch.tensor(
+            self.get_model_rcuts(), dtype=torch.float64, device=env.DEVICE
+        )
+        self.nsels = torch.tensor(self.get_model_nsels(), device=env.DEVICE)
+        BaseAtomicModel.__init__(self, **kwargs)
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return max(self.get_model_rcuts())
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_model_rcuts(self) -> List[float]:
+        """Get the cut-off radius for each individual models."""
+        return [model.get_rcut() for model in self.models]
+
+    def get_sel(self) -> List[int]:
+        return [max([model.get_nsel() for model in self.models])]
+
+    def get_model_nsels(self) -> List[int]:
+        """Get the processed sels for each individual models. Not distinguishing types."""
+        return [model.get_nsel() for model in self.models]
+
+    def get_model_sels(self) -> List[List[int]]:
+        """Get the sels for each individual models."""
+        return [model.get_sel() for model in self.models]
+
+    def _sort_rcuts_sels(self) -> Tuple[List[float], List[int]]:
+        # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut.
+        zipped = torch.stack(
+            [
+                self.rcuts,
+                self.nsels,
+            ],
+            dim=0,
+        ).T
+        inner_sorting = torch.argsort(zipped[:, 1], dim=0)
+        inner_sorted = zipped[inner_sorting]
+        outer_sorting = torch.argsort(inner_sorted[:, 0], stable=True)
+        outer_sorted = inner_sorted[outer_sorting]
+        sorted_rcuts: List[float] = outer_sorted[:, 0].tolist()
+        sorted_sels: List[int] = outer_sorted[:, 1].to(torch.int64).tolist()
+        return sorted_rcuts, sorted_sels
+
+    def forward_atomic(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """Return atomic prediction.
+
+        Parameters
+        ----------
+        extended_coord
+            coodinates in extended region, (nframes, nall * 3)
+        extended_atype
+            atomic type in extended region, (nframes, nall)
+        nlist
+            neighbor list, (nframes, nloc, nsel).
+        mapping
+            mapps the extended indices to local indices.
+        fparam
+            frame parameter. (nframes, ndf)
+        aparam
+            atomic parameter. (nframes, nloc, nda)
+
+        Returns
+        -------
+        result_dict
+            the result dict, defined by the fitting net output def.
+        """
+        nframes, nloc, nnei = nlist.shape
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.requires_grad_(True)
+        extended_coord = extended_coord.view(nframes, -1, 3)
+        sorted_rcuts, sorted_sels = self._sort_rcuts_sels()
+        nlists = build_multiple_neighbor_list(
+            extended_coord,
+            nlist,
+            sorted_rcuts,
+            sorted_sels,
+        )
+        raw_nlists = [
+            nlists[get_multiple_nlist_key(rcut, sel)]
+            for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels())
+        ]
+        nlists_ = [
+            nl if mt else nlist_distinguish_types(nl, extended_atype, sel)
+            for mt, nl, sel in zip(
+                self.mixed_types_list, raw_nlists, self.get_model_sels()
+            )
+        ]
+        ener_list = []
+
+        for i, model in enumerate(self.models):
+            mapping = self.mapping_list[i]
+            ener_list.append(
+                model.forward_atomic(
+                    extended_coord,
+                    mapping[extended_atype],
+                    nlists_[i],
+                    mapping,
+                    fparam,
+                    aparam,
+                )["energy"]
+            )
+
+        weights = self._compute_weight(extended_coord, extended_atype, nlists_)
+
+        atype = extended_atype[:, :nloc]
+        for idx, model in enumerate(self.models):
+            # TODO: provide interfaces for atomic models to access bias_atom_e
+            if isinstance(model, DPAtomicModel):
+                bias_atom_e = model.fitting_net.bias_atom_e
+            elif isinstance(model, PairTabAtomicModel):
+                bias_atom_e = model.bias_atom_e
+            else:
+                bias_atom_e = None
+            if bias_atom_e is not None:
+                ener_list[idx] += bias_atom_e[atype]
+
+        fit_ret = {
+            "energy": torch.sum(torch.stack(ener_list) * torch.stack(weights), dim=0),
+        }  # (nframes, nloc, 1)
+        return fit_ret
+
+    @staticmethod
+    def remap_atype(ori_map: List[str], new_map: List[str]) -> torch.Tensor:
+        """
+        This method is used to map the atype from the common type_map to the original type_map of
+        indivial AtomicModels. It creates a index mapping for the conversion.
+
+        Parameters
+        ----------
+        ori_map : List[str]
+            The original type map of an AtomicModel.
+        new_map : List[str]
+            The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method,
+            must be a subset of the ori_map.
+
+        Returns
+        -------
+        torch.Tensor
+        """
+        type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)}
+        # this maps the atype in the new map to the original map
+        mapping = torch.tensor(
+            [type_2_idx[new_map[idx]] for idx in range(len(new_map))], device=env.DEVICE
+        )
+        return mapping
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Model",
+            "@version": 1,
+            "type": "linear",
+            "models": [model.serialize() for model in self.models],
+            "type_map": self.type_map,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        type_map = data.pop("type_map")
+        models = [
+            BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model)
+            for model in data["models"]
+        ]
+        data.pop("models")
+        return cls(models, type_map, **data)
+
+    def _compute_weight(
+        self, extended_coord, extended_atype, nlists_
+    ) -> List[torch.Tensor]:
+        """This should be a list of user defined weights that matches the number of models to be combined."""
+        nmodels = len(self.models)
+        return [
+            torch.ones(1, dtype=torch.float64, device=env.DEVICE) / nmodels
+            for _ in range(nmodels)
+        ]
+
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for all the models in the linear atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        for model in self.models:
+            model.set_out_bias(out_bias, add=add)
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the weighted output bias of the linear atomic model."""
+        # TODO add get_out_bias for linear atomic model
+        raise NotImplementedError
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        # tricky...
+        return max([model.get_dim_fparam() for model in self.models])
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return max([model.get_dim_aparam() for model in self.models])
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        if any(model.get_sel_type() == [] for model in self.models):
+            return []
+        # join all the selected types
+        # make torch.jit happy...
+        return torch.unique(
+            torch.cat(
+                [
+                    torch.as_tensor(model.get_sel_type(), dtype=torch.int32)
+                    for model in self.models
+                ]
+            )
+        ).tolist()
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
+
+
+class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel):
+    """Model linearly combine a list of AtomicModels.
+
+    Parameters
+    ----------
+    dp_model
+        The DPAtomicModel being combined.
+    zbl_model
+        The PairTable model being combined.
+    sw_rmin
+        The lower boundary of the interpolation between short-range tabulated interaction and DP.
+    sw_rmax
+        The upper boundary of the interpolation between short-range tabulated interaction and DP.
+    type_map
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    smin_alpha
+        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor.
+        This distance is calculated by softmin.
+    """
+
+    def __init__(
+        self,
+        dp_model: DPAtomicModel,
+        zbl_model: PairTabAtomicModel,
+        sw_rmin: float,
+        sw_rmax: float,
+        type_map: List[str],
+        smin_alpha: Optional[float] = 0.1,
+        **kwargs,
+    ):
+        models = [dp_model, zbl_model]
+        super().__init__(models, type_map, **kwargs)
+
+        self.sw_rmin = sw_rmin
+        self.sw_rmax = sw_rmax
+        self.smin_alpha = smin_alpha
+
+        # this is a placeholder being updated in _compute_weight, to handle Jit attribute init error.
+        self.zbl_weight = torch.empty(0, dtype=torch.float64, device=env.DEVICE)
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+        self.models[0].compute_or_load_stat(sampled_func, stat_file_path)
+        self.models[1].compute_or_load_stat(sampled_func, stat_file_path)
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 2,
+                "type": "zbl",
+                "models": LinearEnergyAtomicModel(
+                    models=[self.models[0], self.models[1]], type_map=self.type_map
+                ).serialize(),
+                "sw_rmin": self.sw_rmin,
+                "sw_rmax": self.sw_rmax,
+                "smin_alpha": self.smin_alpha,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        sw_rmin = data.pop("sw_rmin")
+        sw_rmax = data.pop("sw_rmax")
+        smin_alpha = data.pop("smin_alpha")
+        linear_model = LinearEnergyAtomicModel.deserialize(data.pop("models"))
+        dp_model, zbl_model = linear_model.models
+        type_map = linear_model.type_map
+
+        data.pop("@class", None)
+        data.pop("type", None)
+        return cls(
+            dp_model=dp_model,
+            zbl_model=zbl_model,
+            sw_rmin=sw_rmin,
+            sw_rmax=sw_rmax,
+            type_map=type_map,
+            smin_alpha=smin_alpha,
+            **data,
+        )
+
+    def _compute_weight(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlists_: List[torch.Tensor],
+    ) -> List[torch.Tensor]:
+        """ZBL weight.
+
+        Returns
+        -------
+        List[torch.Tensor]
+            the atomic ZBL weight for interpolation. (nframes, nloc, 1)
+        """
+        assert (
+            self.sw_rmax > self.sw_rmin
+        ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`."
+
+        dp_nlist = nlists_[0]
+        zbl_nlist = nlists_[1]
+
+        zbl_nnei = zbl_nlist.shape[-1]
+        dp_nnei = dp_nlist.shape[-1]
+
+        # use the larger rr based on nlist
+        nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist
+        masked_nlist = torch.clamp(nlist_larger, 0)
+        pairwise_rr = PairTabAtomicModel._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )
+        numerator = torch.sum(
+            pairwise_rr * torch.exp(-pairwise_rr / self.smin_alpha), dim=-1
+        )  # masked nnei will be zero, no need to handle
+        denominator = torch.sum(
+            torch.where(
+                nlist_larger != -1,
+                torch.exp(-pairwise_rr / self.smin_alpha),
+                torch.zeros_like(nlist_larger),
+            ),
+            dim=-1,
+        )  # handle masked nnei.
+
+        sigma = numerator / torch.clamp(denominator, 1e-20)  # nfrmes, nloc
+        u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
+        coef = torch.zeros_like(u)
+        left_mask = sigma < self.sw_rmin
+        mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax)
+        right_mask = sigma >= self.sw_rmax
+        coef[left_mask] = 1
+        smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1
+        coef[mid_mask] = smooth[mid_mask]
+        coef[right_mask] = 0
+        self.zbl_weight = coef  # nframes, nloc
+        return [1 - coef.unsqueeze(-1), coef.unsqueeze(-1)]  # to match the model order.
diff --git a/deepmd/pt/model/atomic_model/pairtab_atomic_model.py b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
new file mode 100644
index 0000000000..4db77790e9
--- /dev/null
+++ b/deepmd/pt/model/atomic_model/pairtab_atomic_model.py
@@ -0,0 +1,505 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_atomic_model import (
+    BaseAtomicModel,
+)
+
+
+@BaseAtomicModel.register("pairtab")
+class PairTabAtomicModel(torch.nn.Module, BaseAtomicModel):
+    """Pairwise tabulation energy model.
+
+    This model can be used to tabulate the pairwise energy between atoms for either
+    short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not
+    be used alone, but rather as one submodel of a linear (sum) model, such as
+    DP+D3.
+
+    Do not put the model on the first model of a linear model, since the linear
+    model fetches the type map from the first model.
+
+    At this moment, the model does not smooth the energy at the cutoff radius, so
+    one needs to make sure the energy has been smoothed to zero.
+
+    Parameters
+    ----------
+    tab_file : str
+        The path to the tabulation file.
+    rcut : float
+        The cutoff radius.
+    sel : int or list[int]
+        The maxmum number of atoms in the cut-off radius.
+    type_map : List[str]
+        Mapping atom type to the name (str) of the type.
+        For example `type_map[1]` gives the name of the type 1.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    atom_ener
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+
+    """
+
+    def __init__(
+        self,
+        tab_file: str,
+        rcut: float,
+        sel: Union[int, List[int]],
+        type_map: List[str],
+        rcond: Optional[float] = None,
+        atom_ener: Optional[List[float]] = None,
+        **kwargs,
+    ):
+        torch.nn.Module.__init__(self)
+        self.tab_file = tab_file
+        self.rcut = rcut
+        self.tab = self._set_pairtab(tab_file, rcut)
+
+        BaseAtomicModel.__init__(self, **kwargs)
+        self.rcond = rcond
+        self.atom_ener = atom_ener
+        self.type_map = type_map
+        self.ntypes = len(type_map)
+
+        # handle deserialization with no input file
+        if self.tab_file is not None:
+            (
+                tab_info,
+                tab_data,
+            ) = self.tab.get()  # this returns -> Tuple[np.array, np.array]
+            nspline, ntypes_tab = tab_info[-2:].astype(int)
+            self.register_buffer("tab_info", torch.from_numpy(tab_info))
+            self.register_buffer(
+                "tab_data",
+                torch.from_numpy(tab_data).reshape(ntypes_tab, ntypes_tab, nspline, 4),
+            )
+            if self.ntypes != ntypes_tab:
+                raise ValueError(
+                    "The `type_map` provided does not match the number of columns in the table."
+                )
+        else:
+            self.register_buffer("tab_info", None)
+            self.register_buffer("tab_data", None)
+        self.bias_atom_e = torch.zeros(
+            self.ntypes, 1, dtype=env.GLOBAL_PT_ENER_FLOAT_PRECISION, device=env.DEVICE
+        )
+
+        # self.model_type = "ener"
+        # self.model_version = MODEL_VERSION ## this shoud be in the parent class
+
+        if isinstance(sel, int):
+            self.sel = sel
+        elif isinstance(sel, list):
+            self.sel = sum(sel)
+        else:
+            raise TypeError("sel must be int or list[int]")
+
+    @torch.jit.ignore
+    def _set_pairtab(self, tab_file: str, rcut: float) -> PairTab:
+        return PairTab(tab_file, rcut)
+
+    def fitting_output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    name="energy",
+                    shape=[1],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                )
+            ]
+        )
+
+    def get_rcut(self) -> float:
+        return self.rcut
+
+    def get_type_map(self) -> List[str]:
+        return self.type_map
+
+    def get_sel(self) -> List[int]:
+        return [self.sel]
+
+    def get_nsel(self) -> int:
+        return self.sel
+
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        # to match DPA1 and DPA2.
+        return True
+
+    def serialize(self) -> dict:
+        dd = BaseAtomicModel.serialize(self)
+        dd.update(
+            {
+                "@class": "Model",
+                "@version": 1,
+                "type": "pairtab",
+                "tab": self.tab.serialize(),
+                "rcut": self.rcut,
+                "sel": self.sel,
+                "type_map": self.type_map,
+                "rcond": self.rcond,
+                "atom_ener": self.atom_ener,
+            }
+        )
+        return dd
+
+    @classmethod
+    def deserialize(cls, data) -> "PairTabAtomicModel":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        rcut = data.pop("rcut")
+        sel = data.pop("sel")
+        type_map = data.pop("type_map")
+        rcond = data.pop("rcond")
+        atom_ener = data.pop("atom_ener")
+        tab = PairTab.deserialize(data.pop("tab"))
+        data.pop("@class", None)
+        data.pop("type", None)
+        tab_model = cls(None, rcut, sel, type_map, rcond, atom_ener, **data)
+
+        tab_model.tab = tab
+        tab_model.register_buffer("tab_info", torch.from_numpy(tab_model.tab.tab_info))
+        nspline, ntypes = tab_model.tab.tab_info[-2:].astype(int)
+        tab_model.register_buffer(
+            "tab_data",
+            torch.from_numpy(tab_model.tab.tab_data).reshape(
+                ntypes, ntypes, nspline, 4
+            ),
+        )
+        return tab_model
+
+    def compute_or_load_stat(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        bias_atom_e = compute_output_stats(
+            merged,
+            self.ntypes,
+            keys=["energy"],
+            stat_file_path=stat_file_path,
+            rcond=self.rcond,
+            atom_ener=self.atom_ener,
+        )["energy"]
+        self.bias_atom_e.copy_(
+            torch.tensor(bias_atom_e, device=env.DEVICE).view([self.ntypes, 1])
+        )
+
+    def set_out_bias(self, out_bias: torch.Tensor, add=False) -> None:
+        """
+        Modify the output bias for the atomic model.
+
+        Parameters
+        ----------
+        out_bias : torch.Tensor
+            The new bias to be applied.
+        add : bool, optional
+            Whether to add the new bias to the existing one.
+            If False, the output bias will be directly replaced by the new bias.
+            If True, the new bias will be added to the existing one.
+        """
+        self.bias_atom_e = out_bias + self.bias_atom_e if add else out_bias
+
+    def get_out_bias(self) -> torch.Tensor:
+        """Return the output bias of the atomic model."""
+        return self.bias_atom_e
+
+    def forward_atomic(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        nframes, nloc, nnei = nlist.shape
+        extended_coord = extended_coord.view(nframes, -1, 3)
+        if self.do_grad_r() or self.do_grad_c():
+            extended_coord.requires_grad_(True)
+
+        # this will mask all -1 in the nlist
+        mask = nlist >= 0
+        masked_nlist = nlist * mask
+
+        atype = extended_atype[:, :nloc]  # (nframes, nloc)
+        pairwise_rr = self._get_pairwise_dist(
+            extended_coord, masked_nlist
+        )  # (nframes, nloc, nnei)
+        self.tab_data = self.tab_data.to(device=extended_coord.device).view(
+            int(self.tab_info[-1]), int(self.tab_info[-1]), int(self.tab_info[2]), 4
+        )
+
+        # to calculate the atomic_energy, we need 3 tensors, i_type, j_type, pairwise_rr
+        # i_type : (nframes, nloc), this is atype.
+        # j_type : (nframes, nloc, nnei)
+        j_type = extended_atype[
+            torch.arange(extended_atype.size(0), device=extended_coord.device)[
+                :, None, None
+            ],
+            masked_nlist,
+        ]
+
+        raw_atomic_energy = self._pair_tabulated_inter(
+            nlist, atype, j_type, pairwise_rr
+        )
+
+        atomic_energy = 0.5 * torch.sum(
+            torch.where(
+                nlist != -1, raw_atomic_energy, torch.zeros_like(raw_atomic_energy)
+            ),
+            dim=-1,
+        ).unsqueeze(-1)
+
+        return {"energy": atomic_energy}
+
+    def _pair_tabulated_inter(
+        self,
+        nlist: torch.Tensor,
+        i_type: torch.Tensor,
+        j_type: torch.Tensor,
+        rr: torch.Tensor,
+    ) -> torch.Tensor:
+        """Pairwise tabulated energy.
+
+        Parameters
+        ----------
+        nlist : torch.Tensor
+            The unmasked neighbour list. (nframes, nloc)
+        i_type : torch.Tensor
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : torch.Tensor
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        rr : torch.Tensor
+            The salar distance vector between two atoms. (nframes, nloc, nnei)
+
+        Returns
+        -------
+        torch.Tensor
+            The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+
+        Raises
+        ------
+        Exception
+            If the distance is beyond the table.
+
+        Notes
+        -----
+        This function is used to calculate the pairwise energy between two atoms.
+        It uses a table containing cubic spline coefficients calculated in PairTab.
+        """
+        nframes, nloc, nnei = nlist.shape
+        rmin = self.tab_info[0]
+        hh = self.tab_info[1]
+        hi = 1.0 / hh
+
+        nspline = int(self.tab_info[2] + 0.1)
+
+        uu = (rr - rmin) * hi  # this is broadcasted to (nframes,nloc,nnei)
+
+        # if nnei of atom 0 has -1 in the nlist, uu would be 0.
+        # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms.
+        uu = torch.where(nlist != -1, uu, nspline + 1)
+
+        if torch.any(uu < 0):
+            raise Exception("coord go beyond table lower boundary")
+
+        idx = uu.to(torch.int)
+
+        uu -= idx
+
+        table_coef = self._extract_spline_coefficient(
+            i_type, j_type, idx, self.tab_data, nspline
+        )
+        table_coef = table_coef.view(nframes, nloc, nnei, 4)
+        ener = self._calculate_ener(table_coef, uu)
+
+        # here we need to overwrite energy to zero at rcut and beyond.
+        mask_beyond_rcut = rr >= self.rcut
+        # also overwrite values beyond extrapolation to zero
+        extrapolation_mask = rr >= rmin + nspline * hh
+        ener[mask_beyond_rcut] = 0
+        ener[extrapolation_mask] = 0
+
+        return ener
+
+    @staticmethod
+    def _get_pairwise_dist(coords: torch.Tensor, nlist: torch.Tensor) -> torch.Tensor:
+        """Get pairwise distance `dr`.
+
+        Parameters
+        ----------
+        coords : torch.Tensor
+            The coordinate of the atoms, shape of (nframes, nall, 3).
+        nlist
+            The masked nlist, shape of (nframes, nloc, nnei)
+
+        Returns
+        -------
+        torch.Tensor
+            The pairwise distance between the atoms (nframes, nloc, nnei).
+        """
+        nframes, nloc, nnei = nlist.shape
+        coord_l = coords[:, :nloc].view(nframes, -1, 1, 3)
+        index = nlist.view(nframes, -1).unsqueeze(-1).expand(-1, -1, 3)
+        coord_r = torch.gather(coords, 1, index)
+        coord_r = coord_r.view(nframes, nloc, nnei, 3)
+        diff = coord_r - coord_l
+        pairwise_rr = torch.linalg.norm(diff, dim=-1, keepdim=True).squeeze(-1)
+        return pairwise_rr
+
+    @staticmethod
+    def _extract_spline_coefficient(
+        i_type: torch.Tensor,
+        j_type: torch.Tensor,
+        idx: torch.Tensor,
+        tab_data: torch.Tensor,
+        nspline: int,
+    ) -> torch.Tensor:
+        """Extract the spline coefficient from the table.
+
+        Parameters
+        ----------
+        i_type : torch.Tensor
+            The integer representation of atom type for all local atoms for all frames. (nframes, nloc)
+        j_type : torch.Tensor
+            The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei)
+        idx : torch.Tensor
+            The index of the spline coefficient. (nframes, nloc, nnei)
+        tab_data : torch.Tensor
+            The table storing all the spline coefficient. (ntype, ntype, nspline, 4)
+        nspline : int
+            The number of splines in the table.
+
+        Returns
+        -------
+        torch.Tensor
+            The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed.
+
+        """
+        # (nframes, nloc, nnei)
+        expanded_i_type = i_type.unsqueeze(-1).expand(-1, -1, j_type.shape[-1])
+
+        # handle the case where idx is beyond the number of splines
+        clipped_indices = torch.clamp(idx, 0, nspline - 1).to(torch.int64)
+
+        nframes = i_type.shape[0]
+        nloc = i_type.shape[1]
+        nnei = j_type.shape[2]
+        ntypes = tab_data.shape[0]
+        # tab_data_idx: (nframes, nloc, nnei)
+        tab_data_idx = (
+            expanded_i_type * ntypes * nspline + j_type * nspline + clipped_indices
+        )
+        # tab_data: (ntype, ntype, nspline, 4)
+        tab_data = tab_data.view(ntypes * ntypes * nspline, 4)
+        # tab_data_idx: (nframes * nloc * nnei, 4)
+        tab_data_idx = tab_data_idx.view(nframes * nloc * nnei, 1).expand(-1, 4)
+        # (nframes, nloc, nnei, 4)
+        final_coef = torch.gather(tab_data, 0, tab_data_idx).view(
+            nframes, nloc, nnei, 4
+        )
+
+        # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`.
+        final_coef[idx > nspline] = 0
+        return final_coef
+
+    @staticmethod
+    def _calculate_ener(coef: torch.Tensor, uu: torch.Tensor) -> torch.Tensor:
+        """Calculate energy using spline coeeficients.
+
+        Parameters
+        ----------
+        coef : torch.Tensor
+            The spline coefficients. (nframes, nloc, nnei, 4)
+        uu : torch.Tensor
+            The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei)
+
+        Returns
+        -------
+        torch.Tensor
+            The atomic energy for all local atoms for all frames. (nframes, nloc, nnei)
+        """
+        a3, a2, a1, a0 = torch.unbind(coef, dim=-1)
+        etmp = (a3 * uu + a2) * uu + a1  # this should be elementwise operations.
+        ener = etmp * uu + a0  # this energy has the extrapolated value when rcut > rmax
+        return ener
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return 0
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return 0
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return []
+
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return False
diff --git a/deepmd/pt/model/backbone/__init__.py b/deepmd/pt/model/backbone/__init__.py
new file mode 100644
index 0000000000..a76bdb2a2d
--- /dev/null
+++ b/deepmd/pt/model/backbone/__init__.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .backbone import (
+    BackBone,
+)
+from .evoformer2b import (
+    Evoformer2bBackBone,
+)
+
+__all__ = [
+    "BackBone",
+    "Evoformer2bBackBone",
+]
diff --git a/deepmd/pt/model/backbone/backbone.py b/deepmd/pt/model/backbone/backbone.py
new file mode 100644
index 0000000000..ddeedfeff5
--- /dev/null
+++ b/deepmd/pt/model/backbone/backbone.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+
+
+class BackBone(torch.nn.Module):
+    def __init__(self, **kwargs):
+        """BackBone base method."""
+        super().__init__()
+
+    def forward(self, **kwargs):
+        """Calculate backBone."""
+        raise NotImplementedError
diff --git a/deepmd/pt/model/backbone/evoformer2b.py b/deepmd/pt/model/backbone/evoformer2b.py
new file mode 100644
index 0000000000..1146b3a298
--- /dev/null
+++ b/deepmd/pt/model/backbone/evoformer2b.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.pt.model.backbone import (
+    BackBone,
+)
+from deepmd.pt.model.network.network import (
+    Evoformer2bEncoder,
+)
+
+
+class Evoformer2bBackBone(BackBone):
+    def __init__(
+        self,
+        nnei,
+        layer_num=6,
+        attn_head=8,
+        atomic_dim=1024,
+        pair_dim=100,
+        feature_dim=1024,
+        ffn_dim=2048,
+        post_ln=False,
+        final_layer_norm=True,
+        final_head_layer_norm=False,
+        emb_layer_norm=False,
+        atomic_residual=False,
+        evo_residual=False,
+        residual_factor=1.0,
+        activation_function="gelu",
+        **kwargs,
+    ):
+        """Construct an evoformer backBone."""
+        super().__init__()
+        self.nnei = nnei
+        self.layer_num = layer_num
+        self.attn_head = attn_head
+        self.atomic_dim = atomic_dim
+        self.pair_dim = pair_dim
+        self.feature_dim = feature_dim
+        self.head_dim = feature_dim // attn_head
+        assert (
+            feature_dim % attn_head == 0
+        ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!"
+        self.ffn_dim = ffn_dim
+        self.post_ln = post_ln
+        self.final_layer_norm = final_layer_norm
+        self.final_head_layer_norm = final_head_layer_norm
+        self.emb_layer_norm = emb_layer_norm
+        self.activation_function = activation_function
+        self.atomic_residual = atomic_residual
+        self.evo_residual = evo_residual
+        self.residual_factor = float(residual_factor)
+        self.encoder = Evoformer2bEncoder(
+            nnei=self.nnei,
+            layer_num=self.layer_num,
+            attn_head=self.attn_head,
+            atomic_dim=self.atomic_dim,
+            pair_dim=self.pair_dim,
+            feature_dim=self.feature_dim,
+            ffn_dim=self.ffn_dim,
+            post_ln=self.post_ln,
+            final_layer_norm=self.final_layer_norm,
+            final_head_layer_norm=self.final_head_layer_norm,
+            emb_layer_norm=self.emb_layer_norm,
+            atomic_residual=self.atomic_residual,
+            evo_residual=self.evo_residual,
+            residual_factor=self.residual_factor,
+            activation_function=self.activation_function,
+        )
+
+    def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask):
+        """Encoder the atomic and pair representations.
+
+        Args:
+        - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim].
+        - nlist: Neighbor list with shape [nframes, nloc, nnei].
+        - nlist_type: Neighbor types with shape [nframes, nloc, nnei].
+        - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank.
+
+        Returns
+        -------
+        - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim].
+        - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - norm_x: Normalization loss of atomic_rep.
+        - norm_delta_pair_rep: Normalization loss of delta_pair_rep.
+        """
+        (
+            atomic_rep,
+            transformed_atomic_rep,
+            pair_rep,
+            delta_pair_rep,
+            norm_x,
+            norm_delta_pair_rep,
+        ) = self.encoder(atomic_rep, pair_rep, nlist, nlist_type, nlist_mask)
+        return (
+            atomic_rep,
+            transformed_atomic_rep,
+            pair_rep,
+            delta_pair_rep,
+            norm_x,
+            norm_delta_pair_rep,
+        )
diff --git a/deepmd/pt/model/descriptor/__init__.py b/deepmd/pt/model/descriptor/__init__.py
new file mode 100644
index 0000000000..325cf29e42
--- /dev/null
+++ b/deepmd/pt/model/descriptor/__init__.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    DescriptorBlock,
+    make_default_type_embedding,
+)
+from .dpa1 import (
+    DescrptBlockSeAtten,
+    DescrptDPA1,
+)
+from .dpa2 import (
+    DescrptDPA2,
+)
+from .env_mat import (
+    prod_env_mat,
+)
+from .gaussian_lcc import (
+    DescrptGaussianLcc,
+)
+from .hybrid import (
+    DescrptBlockHybrid,
+    DescrptHybrid,
+)
+from .repformers import (
+    DescrptBlockRepformers,
+)
+from .se_a import (
+    DescrptBlockSeA,
+    DescrptSeA,
+)
+from .se_r import (
+    DescrptSeR,
+)
+
+__all__ = [
+    "BaseDescriptor",
+    "DescriptorBlock",
+    "make_default_type_embedding",
+    "DescrptBlockSeA",
+    "DescrptBlockSeAtten",
+    "DescrptSeA",
+    "DescrptSeR",
+    "DescrptDPA1",
+    "DescrptDPA2",
+    "DescrptHybrid",
+    "prod_env_mat",
+    "DescrptGaussianLcc",
+    "DescrptBlockHybrid",
+    "DescrptBlockRepformers",
+]
diff --git a/deepmd/pt/model/descriptor/base_descriptor.py b/deepmd/pt/model/descriptor/base_descriptor.py
new file mode 100644
index 0000000000..aa142b3acb
--- /dev/null
+++ b/deepmd/pt/model/descriptor/base_descriptor.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+
+from deepmd.dpmodel.descriptor import (
+    make_base_descriptor,
+)
+
+BaseDescriptor = make_base_descriptor(torch.Tensor, "forward")
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
new file mode 100644
index 0000000000..5aae848aa4
--- /dev/null
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.pt.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.plugin import (
+    make_plugin_registry,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DescriptorBlock(torch.nn.Module, ABC, make_plugin_registry("DescriptorBlock")):
+    """The building block of descriptor.
+    Given the input descriptor, provide with the atomic coordinates,
+    atomic types and neighbor list, calculate the new descriptor.
+    """
+
+    local_cluster = False
+
+    def __new__(cls, *args, **kwargs):
+        if cls is DescriptorBlock:
+            try:
+                descrpt_type = kwargs["type"]
+            except KeyError:
+                raise KeyError("the type of DescriptorBlock should be set by `type`")
+            cls = cls.get_class_by_type(descrpt_type)
+        return super().__new__(cls)
+
+    @abstractmethod
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        pass
+
+    @abstractmethod
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        pass
+
+    @abstractmethod
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        pass
+
+    @abstractmethod
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        pass
+
+    @abstractmethod
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        pass
+
+    @abstractmethod
+    def get_dim_in(self) -> int:
+        """Returns the output dimension."""
+        pass
+
+    @abstractmethod
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension."""
+        pass
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        raise NotImplementedError
+
+    def get_stats(self) -> Dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        raise NotImplementedError
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "mean"):
+                if not resume:
+                    # in case of change params during resume
+                    base_env = EnvMatStatSe(base_class)
+                    base_env.stats = base_class.stats
+                    for kk in base_class.get_stats():
+                        base_env.stats[kk] += self.get_stats()[kk]
+                    mean, stddev = base_env()
+                    if not base_class.set_davg_zero:
+                        base_class.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+                    base_class.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+                # must share, even if not do stat
+                self.mean = base_class.mean
+                self.stddev = base_class.stddev
+            # self.load_state_dict(base_class.state_dict()) # this does not work, because it only inits the model
+            # the following will successfully link all the params except buffers
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        else:
+            raise NotImplementedError
+
+    @abstractmethod
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        extended_atype_embd: Optional[torch.Tensor] = None,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Calculate DescriptorBlock."""
+        pass
+
+
+def make_default_type_embedding(
+    ntypes,
+):
+    aux = {}
+    aux["tebd_dim"] = 8
+    return TypeEmbedNet(ntypes, aux["tebd_dim"]), aux
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
new file mode 100644
index 0000000000..21275317dc
--- /dev/null
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -0,0 +1,282 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import torch
+
+from deepmd.pt.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pt.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .se_atten import (
+    DescrptBlockSeAtten,
+)
+
+
+@BaseDescriptor.register("dpa1")
+@BaseDescriptor.register("se_atten")
+class DescrptDPA1(BaseDescriptor, torch.nn.Module):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        # set_davg_zero: bool = False,
+        set_davg_zero: bool = True,  # TODO
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        post_ln=True,
+        ffn=False,
+        ffn_embed_dim=1024,
+        activation_function="tanh",
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+        return_rot=False,
+        concat_output_tebd: bool = True,
+        env_protection: float = 0.0,
+        type: Optional[str] = None,
+        # not implemented
+        resnet_dt: bool = False,
+        type_one_side: bool = True,
+        precision: str = "default",
+        trainable: bool = True,
+        exclude_types: List[Tuple[int, int]] = [],
+        stripped_type_embedding: bool = False,
+        smooth_type_embdding: bool = False,
+    ):
+        super().__init__()
+        if resnet_dt:
+            raise NotImplementedError("resnet_dt is not supported.")
+        if not type_one_side:
+            raise NotImplementedError("type_one_side is not supported.")
+        if precision != "default" and precision != "float64":
+            raise NotImplementedError("precison is not supported.")
+        if stripped_type_embedding:
+            raise NotImplementedError("stripped_type_embedding is not supported.")
+        if smooth_type_embdding:
+            raise NotImplementedError("smooth_type_embdding is not supported.")
+        del type
+        self.se_atten = DescrptBlockSeAtten(
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode=tebd_input_mode,
+            set_davg_zero=set_davg_zero,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=attn_mask,
+            post_ln=post_ln,
+            ffn=ffn,
+            ffn_embed_dim=ffn_embed_dim,
+            activation_function=activation_function,
+            scaling_factor=scaling_factor,
+            head_num=head_num,
+            normalize=normalize,
+            temperature=temperature,
+            return_rot=return_rot,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+        )
+        self.type_embedding = TypeEmbedNet(ntypes, tebd_dim)
+        self.tebd_dim = tebd_dim
+        self.concat_output_tebd = concat_output_tebd
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.se_atten.get_rcut()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.se_atten.get_nsel()
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.se_atten.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.se_atten.get_ntypes()
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        ret = self.se_atten.get_dim_out()
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        return self.se_atten.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.se_atten.mixed_types()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA1 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in both type_embedding and se_atten
+        if shared_level == 0:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.se_atten.share_params(base_class.se_atten, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.se_atten.compute_input_stats(merged, path)
+
+    def serialize(self) -> dict:
+        """Serialize the obj to dict."""
+        raise NotImplementedError
+
+    @classmethod
+    def deserialize(cls) -> "DescrptDPA1":
+        """Deserialize from a dict."""
+        raise NotImplementedError
+
+    def forward(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.view(nframes, -1).shape[1] // 3
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        g1, g2, h2, rot_mat, sw = self.se_atten(
+            nlist,
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping=None,
+        )
+        if self.concat_output_tebd:
+            g1 = torch.cat([g1, g1_inp], dim=-1)
+
+        return g1, rot_mat, g2, h2, sw
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True)
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
new file mode 100644
index 0000000000..fb792a51e2
--- /dev/null
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -0,0 +1,501 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import torch
+
+from deepmd.pt.model.network.network import (
+    Identity,
+    Linear,
+    TypeEmbedNet,
+)
+from deepmd.pt.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+)
+from deepmd.pt.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .repformers import (
+    DescrptBlockRepformers,
+)
+from .se_atten import (
+    DescrptBlockSeAtten,
+)
+
+
+@BaseDescriptor.register("dpa2")
+class DescrptDPA2(torch.nn.Module, BaseDescriptor):
+    def __init__(
+        self,
+        ntypes: int,
+        repinit_rcut: float,
+        repinit_rcut_smth: float,
+        repinit_nsel: int,
+        repformer_rcut: float,
+        repformer_rcut_smth: float,
+        repformer_nsel: int,
+        # kwargs
+        tebd_dim: int = 8,
+        concat_output_tebd: bool = True,
+        repinit_neuron: List[int] = [25, 50, 100],
+        repinit_axis_neuron: int = 16,
+        repinit_set_davg_zero: bool = True,  # TODO
+        repinit_activation="tanh",
+        # repinit still unclear:
+        # ffn, ffn_embed_dim, scaling_factor, normalize,
+        repformer_nlayers: int = 3,
+        repformer_g1_dim: int = 128,
+        repformer_g2_dim: int = 16,
+        repformer_axis_dim: int = 4,
+        repformer_do_bn_mode: str = "no",
+        repformer_bn_momentum: float = 0.1,
+        repformer_update_g1_has_conv: bool = True,
+        repformer_update_g1_has_drrd: bool = True,
+        repformer_update_g1_has_grrg: bool = True,
+        repformer_update_g1_has_attn: bool = True,
+        repformer_update_g2_has_g1g1: bool = True,
+        repformer_update_g2_has_attn: bool = True,
+        repformer_update_h2: bool = False,
+        repformer_attn1_hidden: int = 64,
+        repformer_attn1_nhead: int = 4,
+        repformer_attn2_hidden: int = 16,
+        repformer_attn2_nhead: int = 4,
+        repformer_attn2_has_gate: bool = False,
+        repformer_activation: str = "tanh",
+        repformer_update_style: str = "res_avg",
+        repformer_set_davg_zero: bool = True,  # TODO
+        repformer_add_type_ebd_to_seq: bool = False,
+        env_protection: float = 0.0,
+        trainable: bool = True,
+        exclude_types: List[Tuple[int, int]] = [],
+        type: Optional[
+            str
+        ] = None,  # work around the bad design in get_trainer and DpLoaderSet!
+        rcut: Optional[
+            float
+        ] = None,  # work around the bad design in get_trainer and DpLoaderSet!
+        rcut_smth: Optional[
+            float
+        ] = None,  # work around the bad design in get_trainer and DpLoaderSet!
+        sel: Optional[
+            int
+        ] = None,  # work around the bad design in get_trainer and DpLoaderSet!
+    ):
+        r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
+
+        Parameters
+        ----------
+        ntypes : int
+            Number of atom types
+        repinit_rcut : float
+            The cut-off radius of the repinit block
+        repinit_rcut_smth : float
+            From this position the inverse distance smoothly decays
+            to 0 at the cut-off. Use in the repinit block.
+        repinit_nsel : int
+            Maximally possible number of neighbors for repinit block.
+        repformer_rcut : float
+            The cut-off radius of the repformer block
+        repformer_rcut_smth : float
+            From this position the inverse distance smoothly decays
+            to 0 at the cut-off. Use in the repformer block.
+        repformer_nsel : int
+            Maximally possible number of neighbors for repformer block.
+        tebd_dim : int
+            The dimension of atom type embedding
+        concat_output_tebd : bool
+            Whether to concat type embedding at the output of the descriptor.
+        repinit_neuron : List[int]
+            repinit block: the number of neurons in the embedding net.
+        repinit_axis_neuron : int
+            repinit block: the number of dimension of split  in the
+            symmetrization op.
+        repinit_activation : str
+            repinit block: the activation function in the embedding net
+        repformer_nlayers : int
+            repformers block: the number of repformer layers
+        repformer_g1_dim : int
+            repformers block: the dimension of single-atom rep
+        repformer_g2_dim : int
+            repformers block: the dimension of invariant pair-atom rep
+        repformer_axis_dim : int
+            repformers block: the number of dimension of split  in the
+            symmetrization ops.
+        repformer_do_bn_mode : bool
+            repformers block: do batch norm in the repformer layers
+        repformer_bn_momentum : float
+            repformers block: moment in the batch normalization
+        repformer_update_g1_has_conv : bool
+            repformers block: update the g1 rep with convolution term
+        repformer_update_g1_has_drrd : bool
+            repformers block: update the g1 rep with the drrd term
+        repformer_update_g1_has_grrg : bool
+            repformers block: update the g1 rep with the grrg term
+        repformer_update_g1_has_attn : bool
+            repformers block: update the g1 rep with the localized
+            self-attention
+        repformer_update_g2_has_g1g1 : bool
+            repformers block: update the g2 rep with the g1xg1 term
+        repformer_update_g2_has_attn : bool
+            repformers block: update the g2 rep with the gated self-attention
+        repformer_update_h2 : bool
+            repformers block: update the h2 rep
+        repformer_attn1_hidden : int
+            repformers block: the hidden dimension of localized self-attention
+        repformer_attn1_nhead : int
+            repformers block: the number of heads in localized self-attention
+        repformer_attn2_hidden : int
+            repformers block: the hidden dimension of gated self-attention
+        repformer_attn2_nhead : int
+            repformers block: the number of heads in gated self-attention
+        repformer_attn2_has_gate : bool
+            repformers block: has gate in the gated self-attention
+        repformer_activation : str
+            repformers block: the activation function in the MLPs.
+        repformer_update_style : str
+            repformers block: style of update a rep.
+            can be res_avg or res_incr.
+            res_avg updates a rep `u` with:
+                    u = 1/\sqrt{n+1} (u + u_1 + u_2 + ... + u_n)
+            res_incr updates a rep `u` with:
+                    u = u + 1/\sqrt{n} (u_1 + u_2 + ... + u_n)
+        repformer_set_davg_zero : bool
+            repformers block: set the avg to zero in statistics
+        repformer_add_type_ebd_to_seq : bool
+            repformers block: concatenate the type embedding at the output.
+        trainable : bool
+            If the parameters in the descriptor are trainable.
+        exclude_types : List[Tuple[int, int]] = [],
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+
+        Returns
+        -------
+        descriptor:         torch.Tensor
+            the descriptor of shape nb x nloc x g1_dim.
+            invariant single-atom representation.
+        g2:                 torch.Tensor
+            invariant pair-atom representation.
+        h2:                 torch.Tensor
+            equivariant pair-atom representation.
+        rot_mat:            torch.Tensor
+            rotation matrix for equivariant fittings
+        sw:                 torch.Tensor
+            The switch function for decaying inverse distance.
+
+        """
+        super().__init__()
+        del type, rcut, rcut_smth, sel
+        self.repinit = DescrptBlockSeAtten(
+            repinit_rcut,
+            repinit_rcut_smth,
+            repinit_nsel,
+            ntypes,
+            attn_layer=0,
+            neuron=repinit_neuron,
+            axis_neuron=repinit_axis_neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode="concat",
+            # tebd_input_mode='dot_residual_s',
+            set_davg_zero=repinit_set_davg_zero,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            activation_function=repinit_activation,
+        )
+        self.repformers = DescrptBlockRepformers(
+            repformer_rcut,
+            repformer_rcut_smth,
+            repformer_nsel,
+            ntypes,
+            nlayers=repformer_nlayers,
+            g1_dim=repformer_g1_dim,
+            g2_dim=repformer_g2_dim,
+            axis_dim=repformer_axis_dim,
+            direct_dist=False,
+            do_bn_mode=repformer_do_bn_mode,
+            bn_momentum=repformer_bn_momentum,
+            update_g1_has_conv=repformer_update_g1_has_conv,
+            update_g1_has_drrd=repformer_update_g1_has_drrd,
+            update_g1_has_grrg=repformer_update_g1_has_grrg,
+            update_g1_has_attn=repformer_update_g1_has_attn,
+            update_g2_has_g1g1=repformer_update_g2_has_g1g1,
+            update_g2_has_attn=repformer_update_g2_has_attn,
+            update_h2=repformer_update_h2,
+            attn1_hidden=repformer_attn1_hidden,
+            attn1_nhead=repformer_attn1_nhead,
+            attn2_hidden=repformer_attn2_hidden,
+            attn2_nhead=repformer_attn2_nhead,
+            attn2_has_gate=repformer_attn2_has_gate,
+            activation_function=repformer_activation,
+            update_style=repformer_update_style,
+            set_davg_zero=repformer_set_davg_zero,
+            smooth=True,
+            add_type_ebd_to_seq=repformer_add_type_ebd_to_seq,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+        )
+        self.type_embedding = TypeEmbedNet(ntypes, tebd_dim)
+        if self.repinit.dim_out == self.repformers.dim_in:
+            self.g1_shape_tranform = Identity()
+        else:
+            self.g1_shape_tranform = Linear(
+                self.repinit.dim_out,
+                self.repformers.dim_in,
+                bias=False,
+                init="glorot",
+            )
+        assert self.repinit.rcut > self.repformers.rcut
+        assert self.repinit.sel[0] > self.repformers.sel[0]
+        self.concat_output_tebd = concat_output_tebd
+        self.tebd_dim = tebd_dim
+        self.rcut = self.repinit.get_rcut()
+        self.ntypes = ntypes
+        self.sel = self.repinit.sel
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension of this descriptor."""
+        ret = self.repformers.dim_out
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension of this descriptor."""
+        return self.repformers.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA2 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in type_embedding, repinit and repformers
+        if shared_level == 0:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+            self._modules["g1_shape_tranform"] = base_class._modules[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding and repinit
+        elif shared_level == 1:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+        # shared_level: 2
+        # share all parameters in type_embedding and repformers
+        elif shared_level == 2:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self._modules["g1_shape_tranform"] = base_class._modules[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 3
+        # share all parameters in type_embedding
+        elif shared_level == 3:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        for ii, descrpt in enumerate([self.repinit, self.repformers]):
+            descrpt.compute_input_stats(merged, path)
+
+    def serialize(self) -> dict:
+        """Serialize the obj to dict."""
+        raise NotImplementedError
+
+    @classmethod
+    def deserialize(cls) -> "DescrptDPA2":
+        """Deserialize from a dict."""
+        raise NotImplementedError
+
+    def forward(
+        self,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, mapps extended region index to local region.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.view(nframes, -1).shape[1] // 3
+        # nlists
+        nlist_dict = build_multiple_neighbor_list(
+            extended_coord,
+            nlist,
+            [self.repformers.get_rcut(), self.repinit.get_rcut()],
+            [self.repformers.get_nsel(), self.repinit.get_nsel()],
+        )
+        # repinit
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        g1, _, _, _, _ = self.repinit(
+            nlist_dict[
+                get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel())
+            ],
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping,
+        )
+        # linear to change shape
+        g1 = self.g1_shape_tranform(g1)
+        # mapping g1
+        assert mapping is not None
+        mapping_ext = (
+            mapping.view(nframes, nall).unsqueeze(-1).expand(-1, -1, g1.shape[-1])
+        )
+        g1_ext = torch.gather(g1, 1, mapping_ext)
+        # repformer
+        g1, g2, h2, rot_mat, sw = self.repformers(
+            nlist_dict[
+                get_multiple_nlist_key(
+                    self.repformers.get_rcut(), self.repformers.get_nsel()
+                )
+            ],
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping,
+        )
+        if self.concat_output_tebd:
+            g1 = torch.cat([g1, g1_inp], dim=-1)
+        return g1, rot_mat, g2, h2, sw
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        update_sel = UpdateSel()
+        local_jdata_cpy = update_sel.update_one_sel(
+            global_jdata,
+            local_jdata_cpy,
+            True,
+            rcut_key="repinit_rcut",
+            sel_key="repinit_nsel",
+        )
+        local_jdata_cpy = update_sel.update_one_sel(
+            global_jdata,
+            local_jdata_cpy,
+            True,
+            rcut_key="repformer_rcut",
+            sel_key="repformer_nsel",
+        )
+        return local_jdata_cpy
diff --git a/deepmd/pt/model/descriptor/env_mat.py b/deepmd/pt/model/descriptor/env_mat.py
new file mode 100644
index 0000000000..e89e7467d3
--- /dev/null
+++ b/deepmd/pt/model/descriptor/env_mat.py
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import torch
+
+from deepmd.pt.utils.preprocess import (
+    compute_smooth_weight,
+)
+
+
+def _make_env_mat(
+    nlist,
+    coord,
+    rcut: float,
+    ruct_smth: float,
+    radial_only: bool = False,
+    protection: float = 0.0,
+):
+    """Make smooth environment matrix."""
+    bsz, natoms, nnei = nlist.shape
+    coord = coord.view(bsz, -1, 3)
+    nall = coord.shape[1]
+    mask = nlist >= 0
+    # nlist = nlist * mask  ## this impl will contribute nans in Hessian calculation.
+    nlist = torch.where(mask, nlist, nall - 1)
+    coord_l = coord[:, :natoms].view(bsz, -1, 1, 3)
+    index = nlist.view(bsz, -1).unsqueeze(-1).expand(-1, -1, 3)
+    coord_r = torch.gather(coord, 1, index)
+    coord_r = coord_r.view(bsz, natoms, nnei, 3)
+    diff = coord_r - coord_l
+    length = torch.linalg.norm(diff, dim=-1, keepdim=True)
+    # for index 0 nloc atom
+    length = length + ~mask.unsqueeze(-1)
+    t0 = 1 / (length + protection)
+    t1 = diff / (length + protection) ** 2
+    weight = compute_smooth_weight(length, ruct_smth, rcut)
+    weight = weight * mask.unsqueeze(-1)
+    if radial_only:
+        env_mat = t0 * weight
+    else:
+        env_mat = torch.cat([t0, t1], dim=-1) * weight
+    return env_mat, diff * mask.unsqueeze(-1), weight
+
+
+def prod_env_mat(
+    extended_coord,
+    nlist,
+    atype,
+    mean,
+    stddev,
+    rcut: float,
+    rcut_smth: float,
+    radial_only: bool = False,
+    protection: float = 0.0,
+):
+    """Generate smooth environment matrix from atom coordinates and other context.
+
+    Args:
+    - extended_coord: Copied atom coordinates with shape [nframes, nall*3].
+    - atype: Atom types with shape [nframes, nloc].
+    - mean: Average value of descriptor per element type with shape [len(sec), nnei, 4 or 1].
+    - stddev: Standard deviation of descriptor per element type with shape [len(sec), nnei, 4 or 1].
+    - rcut: Cut-off radius.
+    - rcut_smth: Smooth hyper-parameter for pair force & energy.
+    - radial_only: Whether to return a full description or a radial-only descriptor.
+    - protection: Protection parameter to prevent division by zero errors during calculations.
+
+    Returns
+    -------
+    - env_mat: Shape is [nframes, natoms[1]*nnei*4].
+    """
+    _env_mat_se_a, diff, switch = _make_env_mat(
+        nlist,
+        extended_coord,
+        rcut,
+        rcut_smth,
+        radial_only,
+        protection=protection,
+    )  # shape [n_atom, dim, 4 or 1]
+    t_avg = mean[atype]  # [n_atom, dim, 4 or 1]
+    t_std = stddev[atype]  # [n_atom, dim, 4 or 1]
+    env_mat_se_a = (_env_mat_se_a - t_avg) / t_std
+    return env_mat_se_a, diff, switch
diff --git a/deepmd/pt/model/descriptor/gaussian_lcc.py b/deepmd/pt/model/descriptor/gaussian_lcc.py
new file mode 100644
index 0000000000..e0708dd9e0
--- /dev/null
+++ b/deepmd/pt/model/descriptor/gaussian_lcc.py
@@ -0,0 +1,320 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+)
+
+import torch
+import torch.nn as nn
+
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.network.network import (
+    Evoformer3bEncoder,
+    GaussianEmbedding,
+    TypeEmbedNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+class DescrptGaussianLcc(torch.nn.Module, BaseDescriptor):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        num_pair: int,
+        embed_dim: int = 768,
+        kernel_num: int = 128,
+        pair_embed_dim: int = 64,
+        num_block: int = 1,
+        layer_num: int = 12,
+        attn_head: int = 48,
+        pair_hidden_dim: int = 16,
+        ffn_embedding_dim: int = 768,
+        dropout: float = 0.0,
+        droppath_prob: float = 0.1,
+        pair_dropout: float = 0.25,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        pre_ln: bool = True,
+        do_tag_embedding: bool = False,
+        tag_ener_pref: bool = False,
+        atomic_sum_gbf: bool = False,
+        pre_add_seq: bool = True,
+        tri_update: bool = True,
+        **kwargs,
+    ):
+        """Construct a descriptor of Gaussian Based Local Cluster.
+
+        Args:
+        - rcut: Cut-off radius.
+        - rcut_smth: Smooth hyper-parameter for pair force & energy. **Not used in this descriptor**.
+        - sel: For each element type, how many atoms is selected as neighbors.
+        - ntypes: Number of atom types.
+        - num_pair: Number of atom type pairs. Default is 2 * ntypes.
+        - kernel_num: Number of gaussian kernels.
+        - embed_dim: Dimension of atomic representation.
+        - pair_embed_dim: Dimension of pair representation.
+        - num_block: Number of evoformer blocks.
+        - layer_num: Number of attention layers.
+        - attn_head: Number of attention heads.
+        - pair_hidden_dim: Hidden dimension of pair representation during attention process.
+        - ffn_embedding_dim: Dimension during feed forward network.
+        - dropout: Dropout probability of atomic representation.
+        - droppath_prob: If not zero, it will use drop paths (Stochastic Depth) per sample and ignore `dropout`.
+        - pair_dropout: Dropout probability of pair representation during triangular update.
+        - attention_dropout: Dropout probability during attetion process.
+        - activation_dropout: Dropout probability of pair feed forward network.
+        - pre_ln: Do previous layer norm or not.
+        - do_tag_embedding: Add tag embedding to atomic and pair representations. (`tags`, `tags2`, `tags3` must exist)
+        - atomic_sum_gbf: Add sum of gaussian outputs to atomic representation or not.
+        - pre_add_seq: Add output of other descriptor (if has) to the atomic representation before attention.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.embed_dim = embed_dim
+        self.num_pair = num_pair
+        self.kernel_num = kernel_num
+        self.pair_embed_dim = pair_embed_dim
+        self.num_block = num_block
+        self.layer_num = layer_num
+        self.attention_heads = attn_head
+        self.pair_hidden_dim = pair_hidden_dim
+        self.ffn_embedding_dim = ffn_embedding_dim
+        self.dropout = dropout
+        self.droppath_prob = droppath_prob
+        self.pair_dropout = pair_dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.pre_ln = pre_ln
+        self.do_tag_embedding = do_tag_embedding
+        self.tag_ener_pref = tag_ener_pref
+        self.atomic_sum_gbf = atomic_sum_gbf
+        self.local_cluster = True
+        self.pre_add_seq = pre_add_seq
+        self.tri_update = tri_update
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sec = torch.tensor(sel)
+        self.nnei = sum(sel)
+
+        if self.do_tag_embedding:
+            self.tag_encoder = nn.Embedding(3, self.embed_dim)
+            self.tag_encoder2 = nn.Embedding(2, self.embed_dim)
+            self.tag_type_embedding = TypeEmbedNet(10, pair_embed_dim)
+        self.edge_type_embedding = nn.Embedding(
+            (ntypes + 1) * (ntypes + 1),
+            pair_embed_dim,
+            padding_idx=(ntypes + 1) * (ntypes + 1) - 1,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+        )
+        self.gaussian_encoder = GaussianEmbedding(
+            rcut,
+            kernel_num,
+            num_pair,
+            embed_dim,
+            pair_embed_dim,
+            sel,
+            ntypes,
+            atomic_sum_gbf,
+        )
+        self.backbone = Evoformer3bEncoder(
+            self.nnei,
+            layer_num=self.layer_num,
+            attn_head=self.attention_heads,
+            atomic_dim=self.embed_dim,
+            pair_dim=self.pair_embed_dim,
+            pair_hidden_dim=self.pair_hidden_dim,
+            ffn_embedding_dim=self.ffn_embedding_dim,
+            dropout=self.dropout,
+            droppath_prob=self.droppath_prob,
+            pair_dropout=self.pair_dropout,
+            attention_dropout=self.attention_dropout,
+            activation_dropout=self.activation_dropout,
+            pre_ln=self.pre_ln,
+            tri_update=self.tri_update,
+        )
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of atomic representation."""
+        return self.embed_dim
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.embed_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of pair representation."""
+        return self.pair_embed_dim
+
+    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        pass
+
+    def forward(
+        self,
+        extended_coord,
+        nlist,
+        atype,
+        nlist_type,
+        nlist_loc=None,
+        atype_tebd=None,
+        nlist_tebd=None,
+        seq_input=None,
+    ):
+        """Calculate the atomic and pair representations of this descriptor.
+
+        Args:
+        - extended_coord: Copied atom coordinates with shape [nframes, nall, 3].
+        - nlist: Neighbor list with shape [nframes, nloc, nnei].
+        - atype: Atom type with shape [nframes, nloc].
+        - nlist_type: Atom type of neighbors with shape [nframes, nloc, nnei].
+        - nlist_loc: Local index of neighbor list with shape [nframes, nloc, nnei].
+        - atype_tebd: Atomic type embedding with shape [nframes, nloc, tebd_dim].
+        - nlist_tebd: Type embeddings of neighbor with shape [nframes, nloc, nnei, tebd_dim].
+        - seq_input: The sequential input from other descriptor with
+                    shape [nframes, nloc, tebd_dim] or [nframes * nloc, 1 + nnei, tebd_dim]
+
+        Returns
+        -------
+        - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron].
+        - ret: environment matrix with shape [nframes, nloc, self.neei, out_size]
+        """
+        nframes, nloc = nlist.shape[:2]
+        nall = extended_coord.shape[1]
+        nlist2 = torch.cat(
+            [
+                torch.arange(0, nloc, device=nlist.device)
+                .reshape(1, nloc, 1)
+                .expand(nframes, -1, -1),
+                nlist,
+            ],
+            dim=-1,
+        )
+        nlist_loc2 = torch.cat(
+            [
+                torch.arange(0, nloc, device=nlist_loc.device)
+                .reshape(1, nloc, 1)
+                .expand(nframes, -1, -1),
+                nlist_loc,
+            ],
+            dim=-1,
+        )
+        nlist_type2 = torch.cat([atype.reshape(nframes, nloc, 1), nlist_type], dim=-1)
+        nnei2_mask = nlist2 != -1
+        padding_mask = nlist2 == -1
+        nlist2 = nlist2 * nnei2_mask
+        nlist_loc2 = nlist_loc2 * nnei2_mask
+
+        # nframes x nloc x (1 + nnei2) x (1 + nnei2)
+        pair_mask = nnei2_mask.unsqueeze(-1) * nnei2_mask.unsqueeze(-2)
+        # nframes x nloc x (1 + nnei2) x (1 + nnei2) x head
+        attn_mask = torch.zeros(
+            [nframes, nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads],
+            device=nlist.device,
+            dtype=extended_coord.dtype,
+        )
+        attn_mask.masked_fill_(padding_mask.unsqueeze(2).unsqueeze(-1), float("-inf"))
+        # (nframes x nloc) x head x (1 + nnei2) x (1 + nnei2)
+        attn_mask = (
+            attn_mask.reshape(
+                nframes * nloc, 1 + self.nnei, 1 + self.nnei, self.attention_heads
+            )
+            .permute(0, 3, 1, 2)
+            .contiguous()
+        )
+
+        # Atomic feature
+        # [(nframes x nloc) x (1 + nnei2) x tebd_dim]
+        atom_feature = torch.gather(
+            atype_tebd,
+            dim=1,
+            index=nlist_loc2.reshape(nframes, -1)
+            .unsqueeze(-1)
+            .expand(-1, -1, self.embed_dim),
+        ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim)
+        if self.pre_add_seq and seq_input is not None:
+            first_dim = seq_input.shape[0]
+            if first_dim == nframes * nloc:
+                atom_feature += seq_input
+            elif first_dim == nframes:
+                atom_feature_seq = torch.gather(
+                    seq_input,
+                    dim=1,
+                    index=nlist_loc2.reshape(nframes, -1)
+                    .unsqueeze(-1)
+                    .expand(-1, -1, self.embed_dim),
+                ).reshape(nframes * nloc, 1 + self.nnei, self.embed_dim)
+                atom_feature += atom_feature_seq
+            else:
+                raise RuntimeError
+        atom_feature = atom_feature * nnei2_mask.reshape(
+            nframes * nloc, 1 + self.nnei, 1
+        )
+
+        # Pair feature
+        # [(nframes x nloc) x (1 + nnei2)]
+        nlist_type2_reshape = nlist_type2.reshape(nframes * nloc, 1 + self.nnei)
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2)]
+        edge_type = nlist_type2_reshape.unsqueeze(-1) * (
+            self.ntypes + 1
+        ) + nlist_type2_reshape.unsqueeze(-2)
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        edge_feature = self.edge_type_embedding(edge_type)
+
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 2]
+        edge_type_2dim = torch.cat(
+            [
+                nlist_type2_reshape.view(nframes * nloc, 1 + self.nnei, 1, 1).expand(
+                    -1, -1, 1 + self.nnei, -1
+                ),
+                nlist_type2_reshape.view(nframes * nloc, 1, 1 + self.nnei, 1).expand(
+                    -1, 1 + self.nnei, -1, -1
+                )
+                + self.ntypes,
+            ],
+            dim=-1,
+        )
+        # [(nframes x nloc) x (1 + nnei2) x 3]
+        coord_selected = torch.gather(
+            extended_coord.unsqueeze(1)
+            .expand(-1, nloc, -1, -1)
+            .reshape(nframes * nloc, nall, 3),
+            dim=1,
+            index=nlist2.reshape(nframes * nloc, 1 + self.nnei, 1).expand(-1, -1, 3),
+        )
+
+        # Update pair features (or and atomic features) with gbf features
+        # delta_pos: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x 3].
+        atomic_feature, pair_feature, delta_pos = self.gaussian_encoder(
+            coord_selected, atom_feature, edge_type_2dim, edge_feature
+        )
+        # [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        attn_bias = pair_feature
+
+        # output: [(nframes x nloc) x (1 + nnei2) x tebd_dim]
+        # pair: [(nframes x nloc) x (1 + nnei2) x (1 + nnei2) x pair_dim]
+        output, pair = self.backbone(
+            atomic_feature,
+            pair=attn_bias,
+            attn_mask=attn_mask,
+            pair_mask=pair_mask,
+            atom_mask=nnei2_mask.reshape(nframes * nloc, 1 + self.nnei),
+        )
+
+        return output, pair, delta_pos, None
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
new file mode 100644
index 0000000000..204ca7589d
--- /dev/null
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -0,0 +1,525 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.network.network import (
+    Identity,
+    Linear,
+)
+from deepmd.pt.utils.nlist import (
+    nlist_distinguish_types,
+)
+from deepmd.pt.utils.utils import (
+    to_torch_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+@BaseDescriptor.register("hybrid")
+class DescrptHybrid(BaseDescriptor, torch.nn.Module):
+    """Concate a list of descriptors to form a new descriptor.
+
+    Parameters
+    ----------
+    list : list : List[Union[BaseDescriptor, Dict[str, Any]]]
+        Build a descriptor from the concatenation of the list of descriptors.
+        The descriptor can be either an object or a dictionary.
+    """
+
+    def __init__(
+        self,
+        list: List[Union[BaseDescriptor, Dict[str, Any]]],
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        # warning: list is conflict with built-in list
+        descrpt_list = list
+        if descrpt_list == [] or descrpt_list is None:
+            raise RuntimeError(
+                "cannot build descriptor from an empty list of descriptors."
+            )
+        formatted_descript_list: List[BaseDescriptor] = []
+        for ii in descrpt_list:
+            if isinstance(ii, BaseDescriptor):
+                formatted_descript_list.append(ii)
+            elif isinstance(ii, dict):
+                formatted_descript_list.append(
+                    # pass other arguments (e.g. ntypes) to the descriptor
+                    BaseDescriptor(**ii, **kwargs)
+                )
+            else:
+                raise NotImplementedError
+        self.descrpt_list = torch.nn.ModuleList(formatted_descript_list)
+        self.numb_descrpt = len(self.descrpt_list)
+        for ii in range(1, self.numb_descrpt):
+            assert (
+                self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
+            ), f"number of atom types in {ii}th descrptor does not match others"
+        # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type
+        self.nlist_cut_idx: List[torch.Tensor] = []
+        if self.mixed_types() and not all(
+            descrpt.mixed_types() for descrpt in self.descrpt_list
+        ):
+            self.sel_no_mixed_types = np.max(
+                [
+                    descrpt.get_sel()
+                    for descrpt in self.descrpt_list
+                    if not descrpt.mixed_types()
+                ],
+                axis=0,
+            ).tolist()
+        else:
+            self.sel_no_mixed_types = None
+        for ii in range(self.numb_descrpt):
+            if self.mixed_types() == self.descrpt_list[ii].mixed_types():
+                hybrid_sel = self.get_sel()
+            else:
+                assert self.sel_no_mixed_types is not None
+                hybrid_sel = self.sel_no_mixed_types
+            sub_sel = self.descrpt_list[ii].get_sel()
+            start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1]
+            end_idx = start_idx + np.array(sub_sel)
+            cut_idx = np.concatenate(
+                [range(ss, ee) for ss, ee in zip(start_idx, end_idx)]
+            ).astype(np.int64)
+            self.nlist_cut_idx.append(to_torch_tensor(cut_idx))
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        # do not use numpy here - jit is not happy
+        return max([descrpt.get_rcut() for descrpt in self.descrpt_list])
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        if self.mixed_types():
+            return [
+                np.max(
+                    [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0
+                ).item()
+            ]
+        else:
+            return np.max(
+                [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0
+            ).tolist()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.descrpt_list[0].get_ntypes()
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return sum([descrpt.get_dim_out() for descrpt in self.descrpt_list])
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list])
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return any(descrpt.mixed_types() for descrpt in self.descrpt_list)
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            for ii, des in enumerate(self.descrpt_list):
+                self.descrpt_list[ii].share_params(
+                    base_class.descrpt_list[ii], shared_level, resume=resume
+                )
+        else:
+            raise NotImplementedError
+
+    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+        """Update mean and stddev for descriptor elements."""
+        for descrpt in self.descrpt_list:
+            descrpt.compute_input_stats(merged, path)
+
+    def forward(
+        self,
+        coord_ext: torch.Tensor,
+        atype_ext: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3. This descriptor returns None
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function. this descriptor returns None
+        """
+        out_descriptor = []
+        out_gr = []
+        out_g2: Optional[torch.Tensor] = None
+        out_h2: Optional[torch.Tensor] = None
+        out_sw: Optional[torch.Tensor] = None
+        if self.sel_no_mixed_types is not None:
+            nl_distinguish_types = nlist_distinguish_types(
+                nlist,
+                atype_ext,
+                self.sel_no_mixed_types,
+            )
+        else:
+            nl_distinguish_types = None
+        # make jit happy
+        # for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx):
+        for ii, descrpt in enumerate(self.descrpt_list):
+            # cut the nlist to the correct length
+            if self.mixed_types() == descrpt.mixed_types():
+                nl = nlist[:, :, self.nlist_cut_idx[ii]]
+            else:
+                # mixed_types is True, but descrpt.mixed_types is False
+                assert nl_distinguish_types is not None
+                nl = nl_distinguish_types[:, :, self.nlist_cut_idx[ii]]
+            odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping)
+            out_descriptor.append(odescriptor)
+            if gr is not None:
+                out_gr.append(gr)
+        out_descriptor = torch.cat(out_descriptor, dim=-1)
+        out_gr = torch.cat(out_gr, dim=-2) if out_gr else None
+        return out_descriptor, out_gr, out_g2, out_h2, out_sw
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["list"] = [
+            BaseDescriptor.update_sel(global_jdata, sub_jdata)
+            for sub_jdata in local_jdata["list"]
+        ]
+        return local_jdata_cpy
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "hybrid",
+            "@version": 1,
+            "list": [descrpt.serialize() for descrpt in self.descrpt_list],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptHybrid":
+        data = data.copy()
+        class_name = data.pop("@class")
+        assert class_name == "Descriptor"
+        class_type = data.pop("type")
+        assert class_type == "hybrid"
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        obj = cls(
+            list=[BaseDescriptor.deserialize(ii) for ii in data["list"]],
+        )
+        return obj
+
+
+@DescriptorBlock.register("hybrid")
+class DescrptBlockHybrid(DescriptorBlock):
+    def __init__(
+        self,
+        list,
+        ntypes: int,
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        hybrid_mode: str = "concat",
+        **kwargs,
+    ):
+        """Construct a hybrid descriptor.
+
+        Args:
+        - descriptor_list: list of descriptors.
+        - descriptor_param: descriptor configs.
+        """
+        super().__init__()
+        supported_descrpt = ["se_atten", "se_uni"]
+        descriptor_list = []
+        for descriptor_param_item in list:
+            descriptor_type_tmp = descriptor_param_item["type"]
+            assert (
+                descriptor_type_tmp in supported_descrpt
+            ), f"Only descriptors in {supported_descrpt} are supported for `hybrid` descriptor!"
+            descriptor_param_item["ntypes"] = ntypes
+            if descriptor_type_tmp == "se_atten":
+                descriptor_param_item["tebd_dim"] = tebd_dim
+                descriptor_param_item["tebd_input_mode"] = tebd_input_mode
+            descriptor_list.append(DescriptorBlock(**descriptor_param_item))
+        self.descriptor_list = torch.nn.ModuleList(descriptor_list)
+        self.descriptor_param = list
+        self.rcut = [descrpt.rcut for descrpt in self.descriptor_list]
+        self.sec = [descrpt.sec for descrpt in self.descriptor_list]
+        self.sel = [descrpt.sel for descrpt in self.descriptor_list]
+        self.split_sel = [sum(ii) for ii in self.sel]
+        self.local_cluster_list = [
+            descrpt.local_cluster for descrpt in self.descriptor_list
+        ]
+        self.local_cluster = True in self.local_cluster_list
+        self.hybrid_mode = hybrid_mode
+        self.tebd_dim = tebd_dim
+        assert self.hybrid_mode in ["concat", "sequential"]
+        sequential_transform = []
+        if self.hybrid_mode == "sequential":
+            for ii in range(len(descriptor_list) - 1):
+                if descriptor_list[ii].dim_out == descriptor_list[ii + 1].dim_in:
+                    sequential_transform.append(Identity())
+                else:
+                    sequential_transform.append(
+                        Linear(
+                            descriptor_list[ii].dim_out,
+                            descriptor_list[ii + 1].dim_in,
+                            bias=False,
+                            init="glorot",
+                        )
+                    )
+            sequential_transform.append(Identity())
+        self.sequential_transform = torch.nn.ModuleList(sequential_transform)
+        self.ntypes = ntypes
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return [sum(ii) for ii in self.get_sel()]
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_emb(self):
+        return self.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return all(descriptor.mixed_types() for descriptor in self.descriptor_list)
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        if self.hybrid_mode == "concat":
+            return sum([descrpt.dim_out for descrpt in self.descriptor_list])
+        elif self.hybrid_mode == "sequential":
+            return self.descriptor_list[-1].dim_out
+        else:
+            raise RuntimeError
+
+    @property
+    def dim_emb_list(self) -> List[int]:
+        """Returns the output dimension list of embeddings."""
+        return [descrpt.dim_emb for descrpt in self.descriptor_list]
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of embedding."""
+        if self.hybrid_mode == "concat":
+            return sum(self.dim_emb_list)
+        elif self.hybrid_mode == "sequential":
+            return self.descriptor_list[-1].dim_emb
+        else:
+            raise RuntimeError
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            for ii, des in enumerate(self.descriptor_list):
+                self.descriptor_list[ii].share_params(
+                    base_class.descriptor_list[ii], shared_level, resume=resume
+                )
+        else:
+            raise NotImplementedError
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        for ii, descrpt in enumerate(self.descriptor_list):
+            # need support for hybrid descriptors
+            descrpt.compute_input_stats(merged, path)
+
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        extended_atype_embd: Optional[torch.Tensor] = None,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - extended_coord: Tell atom coordinates with shape [nframes, natoms[1]*3].
+        - nlist: Tell atom types with shape [nframes, natoms[1]].
+        - atype: Tell atom count and element count. Its shape is [2+self.ntypes].
+        - nlist_type: Tell simulation box with shape [nframes, 9].
+        - atype_tebd: Tell simulation box with shape [nframes, 9].
+        - nlist_tebd: Tell simulation box with shape [nframes, 9].
+
+        Returns
+        -------
+        - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron].
+        - ret: environment matrix with shape [nframes, nloc, self.neei, out_size]
+        """
+        nlist_list = list(torch.split(nlist, self.split_sel, -1))
+        nframes, nloc, nnei = nlist.shape
+        concat_rot_mat = True
+        if self.hybrid_mode == "concat":
+            out_descriptor = []
+            # out_env_mat = []
+            out_rot_mat_list = []
+            # out_diff = []
+            for ii, descrpt in enumerate(self.descriptor_list):
+                descriptor, env_mat, diff, rot_mat, sw = descrpt(
+                    nlist_list[ii],
+                    extended_coord,
+                    extended_atype,
+                    extended_atype_embd,
+                    mapping,
+                )
+                if descriptor.shape[0] == nframes * nloc:
+                    # [nframes * nloc, 1 + nnei, emb_dim]
+                    descriptor = descriptor[:, 0, :].reshape(nframes, nloc, -1)
+                out_descriptor.append(descriptor)
+                # out_env_mat.append(env_mat)
+                # out_diff.append(diff)
+                out_rot_mat_list.append(rot_mat)
+                if rot_mat is None:
+                    concat_rot_mat = False
+            out_descriptor = torch.concat(out_descriptor, dim=-1)
+            if concat_rot_mat:
+                out_rot_mat = torch.concat(out_rot_mat_list, dim=-2)
+            else:
+                out_rot_mat = None
+            return out_descriptor, None, None, out_rot_mat, sw
+        elif self.hybrid_mode == "sequential":
+            assert extended_atype_embd is not None
+            assert mapping is not None
+            nframes, nloc, nnei = nlist.shape
+            nall = extended_coord.view(nframes, -1).shape[1] // 3
+            seq_input_ext = extended_atype_embd
+            seq_input = (
+                seq_input_ext[:, :nloc, :] if len(self.descriptor_list) == 0 else None
+            )
+            env_mat, diff, rot_mat, sw = None, None, None, None
+            env_mat_list, diff_list = [], []
+            for ii, (descrpt, seq_transform) in enumerate(
+                zip(self.descriptor_list, self.sequential_transform)
+            ):
+                seq_output, env_mat, diff, rot_mat, sw = descrpt(
+                    nlist_list[ii],
+                    extended_coord,
+                    extended_atype,
+                    seq_input_ext,
+                    mapping,
+                )
+                seq_input = seq_transform(seq_output)
+                mapping_ext = (
+                    mapping.view(nframes, nall)
+                    .unsqueeze(-1)
+                    .expand(-1, -1, seq_input.shape[-1])
+                )
+                seq_input_ext = torch.gather(seq_input, 1, mapping_ext)
+                env_mat_list.append(env_mat)
+                diff_list.append(diff)
+            return seq_input, env_mat_list, diff_list, rot_mat, sw
+        else:
+            raise RuntimeError
diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py
new file mode 100644
index 0000000000..a58d6b0e2c
--- /dev/null
+++ b/deepmd/pt/model/descriptor/repformer_layer.py
@@ -0,0 +1,749 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    List,
+)
+
+import torch
+
+from deepmd.pt.model.network.network import (
+    SimpleLinear,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    ActivationFn,
+)
+
+
+def torch_linear(*args, **kwargs):
+    return torch.nn.Linear(
+        *args, **kwargs, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+    )
+
+
+def _make_nei_g1(
+    g1_ext: torch.Tensor,
+    nlist: torch.Tensor,
+) -> torch.Tensor:
+    # nlist: nb x nloc x nnei
+    nb, nloc, nnei = nlist.shape
+    # g1_ext: nb x nall x ng1
+    ng1 = g1_ext.shape[-1]
+    # index: nb x (nloc x nnei) x ng1
+    index = nlist.reshape(nb, nloc * nnei).unsqueeze(-1).expand(-1, -1, ng1)
+    # gg1  : nb x (nloc x nnei) x ng1
+    gg1 = torch.gather(g1_ext, dim=1, index=index)
+    # gg1  : nb x nloc x nnei x ng1
+    gg1 = gg1.view(nb, nloc, nnei, ng1)
+    return gg1
+
+
+def _apply_nlist_mask(
+    gg: torch.Tensor,
+    nlist_mask: torch.Tensor,
+) -> torch.Tensor:
+    # gg:  nf x nloc x nnei x ng
+    # msk: nf x nloc x nnei
+    return gg.masked_fill(~nlist_mask.unsqueeze(-1), 0.0)
+
+
+def _apply_switch(gg: torch.Tensor, sw: torch.Tensor) -> torch.Tensor:
+    # gg:  nf x nloc x nnei x ng
+    # sw:  nf x nloc x nnei
+    return gg * sw.unsqueeze(-1)
+
+
+def _apply_h_norm(
+    hh: torch.Tensor,  # nf x nloc x nnei x 3
+) -> torch.Tensor:
+    """Normalize h by the std of vector length.
+    do not have an idea if this is a good way.
+    """
+    nf, nl, nnei, _ = hh.shape
+    # nf x nloc x nnei
+    normh = torch.linalg.norm(hh, dim=-1)
+    # nf x nloc
+    std = torch.std(normh, dim=-1)
+    # nf x nloc x nnei x 3
+    hh = hh[:, :, :, :] / (1.0 + std[:, :, None, None])
+    return hh
+
+
+class Atten2Map(torch.nn.Module):
+    def __init__(
+        self,
+        ni: int,
+        nd: int,
+        nh: int,
+        has_gate: bool = False,  # apply gate to attn map
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+    ):
+        super().__init__()
+        self.ni = ni
+        self.nd = nd
+        self.nh = nh
+        self.mapqk = SimpleLinear(ni, nd * 2 * nh, bias=False)
+        self.has_gate = has_gate
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+
+    def forward(
+        self,
+        g2: torch.Tensor,  # nb x nloc x nnei x ng2
+        h2: torch.Tensor,  # nb x nloc x nnei x 3
+        nlist_mask: torch.Tensor,  # nb x nloc x nnei
+        sw: torch.Tensor,  # nb x nloc x nnei
+    ) -> torch.Tensor:
+        (
+            nb,
+            nloc,
+            nnei,
+            _,
+        ) = g2.shape
+        nd, nh = self.nd, self.nh
+        # nb x nloc x nnei x nd x (nh x 2)
+        g2qk = self.mapqk(g2).view(nb, nloc, nnei, nd, nh * 2)
+        # nb x nloc x (nh x 2) x nnei x nd
+        g2qk = torch.permute(g2qk, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd
+        g2q, g2k = torch.split(g2qk, nh, dim=2)
+        # g2q = torch.nn.functional.normalize(g2q, dim=-1)
+        # g2k = torch.nn.functional.normalize(g2k, dim=-1)
+        # nb x nloc x nh x nnei x nnei
+        attnw = torch.matmul(g2q, torch.transpose(g2k, -1, -2)) / nd**0.5
+        if self.has_gate:
+            gate = torch.matmul(h2, torch.transpose(h2, -1, -2)).unsqueeze(-3)
+            attnw = attnw * gate
+        # mask the attenmap, nb x nloc x 1 x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(2).unsqueeze(2)
+        # mask the attenmap, nb x nloc x 1 x nnei x 1
+        attnw_mask_c = ~nlist_mask.unsqueeze(2).unsqueeze(-1)
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[
+                :, :, None, None, :
+            ] - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = torch.softmax(attnw, dim=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        # nb x nloc x nh x nnei x nnei
+        attnw = attnw.masked_fill(
+            attnw_mask_c,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :]
+        # nb x nloc x nnei x nnei
+        h2h2t = torch.matmul(h2, torch.transpose(h2, -1, -2)) / 3.0**0.5
+        # nb x nloc x nh x nnei x nnei
+        ret = attnw * h2h2t[:, :, None, :, :]
+        # ret = torch.softmax(g2qk, dim=-1)
+        # nb x nloc x nnei x nnei x nh
+        ret = torch.permute(ret, (0, 1, 3, 4, 2))
+        return ret
+
+
+class Atten2MultiHeadApply(torch.nn.Module):
+    def __init__(
+        self,
+        ni: int,
+        nh: int,
+    ):
+        super().__init__()
+        self.ni = ni
+        self.nh = nh
+        self.mapv = SimpleLinear(ni, ni * nh, bias=False)
+        self.head_map = SimpleLinear(ni * nh, ni)
+
+    def forward(
+        self,
+        AA: torch.Tensor,  # nf x nloc x nnei x nnei x nh
+        g2: torch.Tensor,  # nf x nloc x nnei x ng2
+    ) -> torch.Tensor:
+        nf, nloc, nnei, ng2 = g2.shape
+        nh = self.nh
+        # nf x nloc x nnei x ng2 x nh
+        g2v = self.mapv(g2).view(nf, nloc, nnei, ng2, nh)
+        # nf x nloc x nh x nnei x ng2
+        g2v = torch.permute(g2v, (0, 1, 4, 2, 3))
+        # g2v = torch.nn.functional.normalize(g2v, dim=-1)
+        # nf x nloc x nh x nnei x nnei
+        AA = torch.permute(AA, (0, 1, 4, 2, 3))
+        # nf x nloc x nh x nnei x ng2
+        ret = torch.matmul(AA, g2v)
+        # nf x nloc x nnei x ng2 x nh
+        ret = torch.permute(ret, (0, 1, 3, 4, 2)).reshape(nf, nloc, nnei, (ng2 * nh))
+        # nf x nloc x nnei x ng2
+        return self.head_map(ret)
+
+
+class Atten2EquiVarApply(torch.nn.Module):
+    def __init__(
+        self,
+        ni: int,
+        nh: int,
+    ):
+        super().__init__()
+        self.ni = ni
+        self.nh = nh
+        self.head_map = SimpleLinear(nh, 1, bias=False)
+
+    def forward(
+        self,
+        AA: torch.Tensor,  # nf x nloc x nnei x nnei x nh
+        h2: torch.Tensor,  # nf x nloc x nnei x 3
+    ) -> torch.Tensor:
+        nf, nloc, nnei, _ = h2.shape
+        nh = self.nh
+        # nf x nloc x nh x nnei x nnei
+        AA = torch.permute(AA, (0, 1, 4, 2, 3))
+        h2m = torch.unsqueeze(h2, dim=2)
+        # nf x nloc x nh x nnei x 3
+        h2m = torch.tile(h2m, [1, 1, nh, 1, 1])
+        # nf x nloc x nh x nnei x 3
+        ret = torch.matmul(AA, h2m)
+        # nf x nloc x nnei x 3 x nh
+        ret = torch.permute(ret, (0, 1, 3, 4, 2)).view(nf, nloc, nnei, 3, nh)
+        # nf x nloc x nnei x 3
+        return torch.squeeze(self.head_map(ret), dim=-1)
+
+
+class LocalAtten(torch.nn.Module):
+    def __init__(
+        self,
+        ni: int,
+        nd: int,
+        nh: int,
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+    ):
+        super().__init__()
+        self.ni = ni
+        self.nd = nd
+        self.nh = nh
+        self.mapq = SimpleLinear(ni, nd * 1 * nh, bias=False)
+        self.mapkv = SimpleLinear(ni, (nd + ni) * nh, bias=False)
+        self.head_map = SimpleLinear(ni * nh, ni)
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+
+    def forward(
+        self,
+        g1: torch.Tensor,  # nb x nloc x ng1
+        gg1: torch.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: torch.Tensor,  # nb x nloc x nnei
+        sw: torch.Tensor,  # nb x nloc x nnei
+    ) -> torch.Tensor:
+        nb, nloc, nnei = nlist_mask.shape
+        ni, nd, nh = self.ni, self.nd, self.nh
+        assert ni == g1.shape[-1]
+        assert ni == gg1.shape[-1]
+        # nb x nloc x nd x nh
+        g1q = self.mapq(g1).view(nb, nloc, nd, nh)
+        # nb x nloc x nh x nd
+        g1q = torch.permute(g1q, (0, 1, 3, 2))
+        # nb x nloc x nnei x (nd+ni) x nh
+        gg1kv = self.mapkv(gg1).view(nb, nloc, nnei, nd + ni, nh)
+        gg1kv = torch.permute(gg1kv, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd, nb x nloc x nh x nnei x ng1
+        gg1k, gg1v = torch.split(gg1kv, [nd, ni], dim=-1)
+
+        # nb x nloc x nh x 1 x nnei
+        attnw = torch.matmul(g1q.unsqueeze(-2), torch.transpose(gg1k, -1, -2)) / nd**0.5
+        # nb x nloc x nh x nnei
+        attnw = attnw.squeeze(-2)
+        # mask the attenmap, nb x nloc x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(-2)
+        # nb x nloc x nh x nnei
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw.unsqueeze(-2) - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = torch.softmax(attnw, dim=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw.unsqueeze(-2)
+
+        # nb x nloc x nh x ng1
+        ret = (
+            torch.matmul(attnw.unsqueeze(-2), gg1v).squeeze(-2).view(nb, nloc, nh * ni)
+        )
+        # nb x nloc x ng1
+        ret = self.head_map(ret)
+        return ret
+
+
+class RepformerLayer(torch.nn.Module):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        g1_dim=128,
+        g2_dim=16,
+        axis_dim: int = 4,
+        update_chnnl_2: bool = True,
+        do_bn_mode: str = "no",
+        bn_momentum: float = 0.1,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        set_davg_zero: bool = True,  # TODO
+        smooth: bool = True,
+    ):
+        super().__init__()
+        self.epsilon = 1e-4  # protection of 1./nnei
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.ntypes = ntypes
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        assert len(sel) == 1
+        self.sel = torch.tensor(sel, device=env.DEVICE)
+        self.sec = self.sel
+        self.axis_dim = axis_dim
+        self.set_davg_zero = set_davg_zero
+        self.do_bn_mode = do_bn_mode
+        self.bn_momentum = bn_momentum
+        self.act = ActivationFn(activation_function)
+        self.update_g1_has_grrg = update_g1_has_grrg
+        self.update_g1_has_drrd = update_g1_has_drrd
+        self.update_g1_has_conv = update_g1_has_conv
+        self.update_g1_has_attn = update_g1_has_attn
+        self.update_chnnl_2 = update_chnnl_2
+        self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False
+        self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False
+        self.update_h2 = update_h2 if self.update_chnnl_2 else False
+        del update_g2_has_g1g1, update_g2_has_attn, update_h2
+        self.update_style = update_style
+        self.smooth = smooth
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+
+        g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_dim)
+        self.linear1 = SimpleLinear(g1_in_dim, g1_dim)
+        self.linear2 = None
+        self.proj_g1g2 = None
+        self.proj_g1g1g2 = None
+        self.attn2g_map = None
+        self.attn2_mh_apply = None
+        self.attn2_lm = None
+        self.attn2h_map = None
+        self.attn2_ev_apply = None
+        self.loc_attn = None
+
+        if self.update_chnnl_2:
+            self.linear2 = SimpleLinear(g2_dim, g2_dim)
+        if self.update_g1_has_conv:
+            self.proj_g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False)
+        if self.update_g2_has_g1g1:
+            self.proj_g1g1g2 = SimpleLinear(g1_dim, g2_dim, bias=False)
+        if self.update_g2_has_attn:
+            self.attn2g_map = Atten2Map(
+                g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth
+            )
+            self.attn2_mh_apply = Atten2MultiHeadApply(g2_dim, attn2_nhead)
+            self.attn2_lm = torch.nn.LayerNorm(
+                g2_dim,
+                elementwise_affine=True,
+                device=env.DEVICE,
+                dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            )
+        if self.update_h2:
+            self.attn2h_map = Atten2Map(
+                g2_dim, attn2_hidden, attn2_nhead, attn2_has_gate, self.smooth
+            )
+            self.attn2_ev_apply = Atten2EquiVarApply(g2_dim, attn2_nhead)
+        if self.update_g1_has_attn:
+            self.loc_attn = LocalAtten(g1_dim, attn1_hidden, attn1_nhead, self.smooth)
+
+        if self.do_bn_mode == "uniform":
+            self.bn1 = self._bn_layer()
+            self.bn2 = self._bn_layer()
+        elif self.do_bn_mode == "component":
+            self.bn1 = self._bn_layer(nf=g1_dim)
+            self.bn2 = self._bn_layer(nf=g2_dim)
+        elif self.do_bn_mode == "no":
+            self.bn1, self.bn2 = None, None
+        else:
+            raise RuntimeError(f"unknown bn_mode {self.do_bn_mode}")
+
+    def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int:
+        ret = g1d
+        if self.update_g1_has_grrg:
+            ret += g2d * ax
+        if self.update_g1_has_drrd:
+            ret += g1d * ax
+        if self.update_g1_has_conv:
+            ret += g2d
+        return ret
+
+    def _update_h2(
+        self,
+        g2: torch.Tensor,
+        h2: torch.Tensor,
+        nlist_mask: torch.Tensor,
+        sw: torch.Tensor,
+    ) -> torch.Tensor:
+        assert self.attn2h_map is not None
+        assert self.attn2_ev_apply is not None
+        nb, nloc, nnei, _ = g2.shape
+        # # nb x nloc x nnei x nh2
+        # h2_1 = self.attn2_ev_apply(AA, h2)
+        # h2_update.append(h2_1)
+        # nb x nloc x nnei x nnei x nh
+        AAh = self.attn2h_map(g2, h2, nlist_mask, sw)
+        # nb x nloc x nnei x nh2
+        h2_1 = self.attn2_ev_apply(AAh, h2)
+        return h2_1
+
+    def _update_g1_conv(
+        self,
+        gg1: torch.Tensor,
+        g2: torch.Tensor,
+        nlist_mask: torch.Tensor,
+        sw: torch.Tensor,
+    ) -> torch.Tensor:
+        assert self.proj_g1g2 is not None
+        nb, nloc, nnei, _ = g2.shape
+        ng1 = gg1.shape[-1]
+        ng2 = g2.shape[-1]
+        # gg1  : nb x nloc x nnei x ng2
+        gg1 = self.proj_g1g2(gg1).view(nb, nloc, nnei, ng2)
+        # nb x nloc x nnei x ng2
+        gg1 = _apply_nlist_mask(gg1, nlist_mask)
+        if not self.smooth:
+            # normalized by number of neighbors, not smooth
+            # nb x nloc x 1
+            invnnei = 1.0 / (self.epsilon + torch.sum(nlist_mask, dim=-1)).unsqueeze(-1)
+        else:
+            gg1 = _apply_switch(gg1, sw)
+            invnnei = (1.0 / float(nnei)) * torch.ones(
+                (nb, nloc, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=gg1.device
+            )
+        # nb x nloc x ng2
+        g1_11 = torch.sum(g2 * gg1, dim=2) * invnnei
+        return g1_11
+
+    def _cal_h2g2(
+        self,
+        g2: torch.Tensor,
+        h2: torch.Tensor,
+        nlist_mask: torch.Tensor,
+        sw: torch.Tensor,
+    ) -> torch.Tensor:
+        # g2:  nf x nloc x nnei x ng2
+        # h2:  nf x nloc x nnei x 3
+        # msk: nf x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        ng2 = g2.shape[-1]
+        # nb x nloc x nnei x ng2
+        g2 = _apply_nlist_mask(g2, nlist_mask)
+        if not self.smooth:
+            # nb x nloc
+            invnnei = 1.0 / (self.epsilon + torch.sum(nlist_mask, dim=-1))
+            # nb x nloc x 1 x 1
+            invnnei = invnnei.unsqueeze(-1).unsqueeze(-1)
+        else:
+            g2 = _apply_switch(g2, sw)
+            invnnei = (1.0 / float(nnei)) * torch.ones(
+                (nb, nloc, 1, 1), dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=g2.device
+            )
+        # nb x nloc x 3 x ng2
+        h2g2 = torch.matmul(torch.transpose(h2, -1, -2), g2) * invnnei
+        return h2g2
+
+    def _cal_grrg(self, h2g2: torch.Tensor) -> torch.Tensor:
+        # nb x nloc x 3 x ng2
+        nb, nloc, _, ng2 = h2g2.shape
+        # nb x nloc x 3 x axis
+        h2g2m = torch.split(h2g2, self.axis_dim, dim=-1)[0]
+        # nb x nloc x axis x ng2
+        g1_13 = torch.matmul(torch.transpose(h2g2m, -1, -2), h2g2) / (3.0**1)
+        # nb x nloc x (axisxng2)
+        g1_13 = g1_13.view(nb, nloc, self.axis_dim * ng2)
+        return g1_13
+
+    def _update_g1_grrg(
+        self,
+        g2: torch.Tensor,
+        h2: torch.Tensor,
+        nlist_mask: torch.Tensor,
+        sw: torch.Tensor,
+    ) -> torch.Tensor:
+        # g2:  nf x nloc x nnei x ng2
+        # h2:  nf x nloc x nnei x 3
+        # msk: nf x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        ng2 = g2.shape[-1]
+        # nb x nloc x 3 x ng2
+        h2g2 = self._cal_h2g2(g2, h2, nlist_mask, sw)
+        # nb x nloc x (axisxng2)
+        g1_13 = self._cal_grrg(h2g2)
+        return g1_13
+
+    def _update_g2_g1g1(
+        self,
+        g1: torch.Tensor,  # nb x nloc x ng1
+        gg1: torch.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: torch.Tensor,  # nb x nloc x nnei
+        sw: torch.Tensor,  # nb x nloc x nnei
+    ) -> torch.Tensor:
+        ret = g1.unsqueeze(-2) * gg1
+        # nb x nloc x nnei x ng1
+        ret = _apply_nlist_mask(ret, nlist_mask)
+        if self.smooth:
+            ret = _apply_switch(ret, sw)
+        return ret
+
+    def _apply_bn(
+        self,
+        bn_number: int,
+        gg: torch.Tensor,
+    ):
+        if self.do_bn_mode == "uniform":
+            return self._apply_bn_uni(bn_number, gg)
+        elif self.do_bn_mode == "component":
+            return self._apply_bn_comp(bn_number, gg)
+        else:
+            return gg
+
+    def _apply_nb_1(self, bn_number: int, gg: torch.Tensor) -> torch.Tensor:
+        nb, nl, nf = gg.shape
+        gg = gg.view([nb, 1, nl * nf])
+        if bn_number == 1:
+            assert self.bn1 is not None
+            gg = self.bn1(gg)
+        else:
+            assert self.bn2 is not None
+            gg = self.bn2(gg)
+        return gg.view([nb, nl, nf])
+
+    def _apply_nb_2(
+        self,
+        bn_number: int,
+        gg: torch.Tensor,
+    ) -> torch.Tensor:
+        nb, nl, nnei, nf = gg.shape
+        gg = gg.view([nb, 1, nl * nnei * nf])
+        if bn_number == 1:
+            assert self.bn1 is not None
+            gg = self.bn1(gg)
+        else:
+            assert self.bn2 is not None
+            gg = self.bn2(gg)
+        return gg.view([nb, nl, nnei, nf])
+
+    def _apply_bn_uni(
+        self,
+        bn_number: int,
+        gg: torch.Tensor,
+        mode: str = "1",
+    ) -> torch.Tensor:
+        if len(gg.shape) == 3:
+            return self._apply_nb_1(bn_number, gg)
+        elif len(gg.shape) == 4:
+            return self._apply_nb_2(bn_number, gg)
+        else:
+            raise RuntimeError(f"unsupported input shape {gg.shape}")
+
+    def _apply_bn_comp(
+        self,
+        bn_number: int,
+        gg: torch.Tensor,
+    ) -> torch.Tensor:
+        ss = gg.shape
+        nf = ss[-1]
+        gg = gg.view([-1, nf])
+        if bn_number == 1:
+            assert self.bn1 is not None
+            gg = self.bn1(gg).view(ss)
+        else:
+            assert self.bn2 is not None
+            gg = self.bn2(gg).view(ss)
+        return gg
+
+    def forward(
+        self,
+        g1_ext: torch.Tensor,  # nf x nall x ng1
+        g2: torch.Tensor,  # nf x nloc x nnei x ng2
+        h2: torch.Tensor,  # nf x nloc x nnei x 3
+        nlist: torch.Tensor,  # nf x nloc x nnei
+        nlist_mask: torch.Tensor,  # nf x nloc x nnei
+        sw: torch.Tensor,  # switch func, nf x nloc x nnei
+    ):
+        """
+        Parameters
+        ----------
+        g1_ext : nf x nall x ng1         extended single-atom chanel
+        g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
+        h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
+        nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
+        nlist_mask : nf x nloc x nnei   masks of the neighbor list. real nei 1 otherwise 0
+        sw : nf x nloc x nnei        switch function
+
+        Returns
+        -------
+        g1:     nf x nloc x ng1         updated single-atom chanel
+        g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
+        h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
+        """
+        cal_gg1 = (
+            self.update_g1_has_drrd
+            or self.update_g1_has_conv
+            or self.update_g1_has_attn
+            or self.update_g2_has_g1g1
+        )
+
+        nb, nloc, nnei, _ = g2.shape
+        nall = g1_ext.shape[1]
+        g1, _ = torch.split(g1_ext, [nloc, nall - nloc], dim=1)
+        assert (nb, nloc) == g1.shape[:2]
+        assert (nb, nloc, nnei) == h2.shape[:3]
+        ng1 = g1.shape[-1]
+        ng2 = g2.shape[-1]
+        nh2 = h2.shape[-1]
+
+        if self.bn1 is not None:
+            g1 = self._apply_bn(1, g1)
+        if self.bn2 is not None:
+            g2 = self._apply_bn(2, g2)
+        if self.update_h2:
+            h2 = _apply_h_norm(h2)
+
+        g2_update: List[torch.Tensor] = [g2]
+        h2_update: List[torch.Tensor] = [h2]
+        g1_update: List[torch.Tensor] = [g1]
+        g1_mlp: List[torch.Tensor] = [g1]
+
+        if cal_gg1:
+            gg1 = _make_nei_g1(g1_ext, nlist)
+        else:
+            gg1 = None
+
+        if self.update_chnnl_2:
+            # nb x nloc x nnei x ng2
+            assert self.linear2 is not None
+            g2_1 = self.act(self.linear2(g2))
+            g2_update.append(g2_1)
+
+            if self.update_g2_has_g1g1:
+                assert gg1 is not None
+                assert self.proj_g1g1g2 is not None
+                g2_update.append(
+                    self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw))
+                )
+
+            if self.update_g2_has_attn:
+                assert self.attn2g_map is not None
+                assert self.attn2_mh_apply is not None
+                assert self.attn2_lm is not None
+                # nb x nloc x nnei x nnei x nh
+                AAg = self.attn2g_map(g2, h2, nlist_mask, sw)
+                # nb x nloc x nnei x ng2
+                g2_2 = self.attn2_mh_apply(AAg, g2)
+                g2_2 = self.attn2_lm(g2_2)
+                g2_update.append(g2_2)
+
+            if self.update_h2:
+                h2_update.append(self._update_h2(g2, h2, nlist_mask, sw))
+
+        if self.update_g1_has_conv:
+            assert gg1 is not None
+            g1_mlp.append(self._update_g1_conv(gg1, g2, nlist_mask, sw))
+
+        if self.update_g1_has_grrg:
+            g1_mlp.append(self._update_g1_grrg(g2, h2, nlist_mask, sw))
+
+        if self.update_g1_has_drrd:
+            assert gg1 is not None
+            g1_mlp.append(self._update_g1_grrg(gg1, h2, nlist_mask, sw))
+
+        # nb x nloc x [ng1+ng2+(axisxng2)+(axisxng1)]
+        #                  conv   grrg      drrd
+        g1_1 = self.act(self.linear1(torch.cat(g1_mlp, dim=-1)))
+        g1_update.append(g1_1)
+
+        if self.update_g1_has_attn:
+            assert gg1 is not None
+            assert self.loc_attn is not None
+            g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw))
+
+        # update
+        if self.update_chnnl_2:
+            g2_new = self.list_update(g2_update)
+            h2_new = self.list_update(h2_update)
+        else:
+            g2_new, h2_new = g2, h2
+        g1_new = self.list_update(g1_update)
+        return g1_new, g2_new, h2_new
+
+    @torch.jit.export
+    def list_update_res_avg(
+        self,
+        update_list: List[torch.Tensor],
+    ) -> torch.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        for ii in range(1, nitem):
+            uu = uu + update_list[ii]
+        return uu / (float(nitem) ** 0.5)
+
+    @torch.jit.export
+    def list_update_res_incr(self, update_list: List[torch.Tensor]) -> torch.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
+        for ii in range(1, nitem):
+            uu = uu + scale * update_list[ii]
+        return uu
+
+    @torch.jit.export
+    def list_update(self, update_list: List[torch.Tensor]) -> torch.Tensor:
+        if self.update_style == "res_avg":
+            return self.list_update_res_avg(update_list)
+        elif self.update_style == "res_incr":
+            return self.list_update_res_incr(update_list)
+        else:
+            raise RuntimeError(f"unknown update style {self.update_style}")
+
+    def _bn_layer(
+        self,
+        nf: int = 1,
+    ) -> Callable:
+        return torch.nn.BatchNorm1d(
+            nf,
+            eps=1e-5,
+            momentum=self.bn_momentum,
+            affine=False,
+            track_running_stats=True,
+            device=env.DEVICE,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+        )
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
new file mode 100644
index 0000000000..16a38052b1
--- /dev/null
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -0,0 +1,345 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import torch
+
+from deepmd.pt.model.descriptor.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pt.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pt.model.network.network import (
+    SimpleLinear,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pt.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pt.utils.utils import (
+    ActivationFn,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .repformer_layer import (
+    RepformerLayer,
+)
+
+mydtype = env.GLOBAL_PT_FLOAT_PRECISION
+mydev = env.DEVICE
+
+
+def torch_linear(*args, **kwargs):
+    return torch.nn.Linear(*args, **kwargs, dtype=mydtype, device=mydev)
+
+
+simple_linear = SimpleLinear
+mylinear = simple_linear
+
+
+@DescriptorBlock.register("se_repformer")
+@DescriptorBlock.register("se_uni")
+class DescrptBlockRepformers(DescriptorBlock):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        nlayers: int = 3,
+        g1_dim=128,
+        g2_dim=16,
+        axis_dim: int = 4,
+        direct_dist: bool = False,
+        do_bn_mode: str = "no",
+        bn_momentum: float = 0.1,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        set_davg_zero: bool = True,  # TODO
+        smooth: bool = True,
+        add_type_ebd_to_seq: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        type: Optional[str] = None,
+    ):
+        """
+        smooth:
+            If strictly smooth, cannot be used with update_g1_has_attn
+        add_type_ebd_to_seq:
+            At the presence of seq_input (optional input to forward),
+            whether or not add an type embedding to seq_input.
+            If no seq_input is given, it has no effect.
+        """
+        super().__init__()
+        del type
+        self.epsilon = 1e-4  # protection of 1./nnei
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.ntypes = ntypes
+        self.nlayers = nlayers
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4  # use full descriptor.
+        assert len(sel) == 1
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.axis_dim = axis_dim
+        self.set_davg_zero = set_davg_zero
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+        self.act = ActivationFn(activation_function)
+        self.direct_dist = direct_dist
+        self.add_type_ebd_to_seq = add_type_ebd_to_seq
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.env_protection = env_protection
+
+        self.g2_embd = mylinear(1, self.g2_dim)
+        layers = []
+        for ii in range(nlayers):
+            layers.append(
+                RepformerLayer(
+                    rcut,
+                    rcut_smth,
+                    sel,
+                    ntypes,
+                    self.g1_dim,
+                    self.g2_dim,
+                    axis_dim=self.axis_dim,
+                    update_chnnl_2=(ii != nlayers - 1),
+                    do_bn_mode=do_bn_mode,
+                    bn_momentum=bn_momentum,
+                    update_g1_has_conv=update_g1_has_conv,
+                    update_g1_has_drrd=update_g1_has_drrd,
+                    update_g1_has_grrg=update_g1_has_grrg,
+                    update_g1_has_attn=update_g1_has_attn,
+                    update_g2_has_g1g1=update_g2_has_g1g1,
+                    update_g2_has_attn=update_g2_has_attn,
+                    update_h2=update_h2,
+                    attn1_hidden=attn1_hidden,
+                    attn1_nhead=attn1_nhead,
+                    attn2_has_gate=attn2_has_gate,
+                    attn2_hidden=attn2_hidden,
+                    attn2_nhead=attn2_nhead,
+                    activation_function=activation_function,
+                    update_style=update_style,
+                    smooth=smooth,
+                )
+            )
+        self.layers = torch.nn.ModuleList(layers)
+
+        sshape = (self.ntypes, self.nnei, 4)
+        mean = torch.zeros(sshape, dtype=mydtype, device=mydev)
+        stddev = torch.ones(sshape, dtype=mydtype, device=mydev)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension g2."""
+        return self.g2_dim
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        extended_atype_embd: Optional[torch.Tensor] = None,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        assert mapping is not None
+        assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.view(nframes, -1).shape[1] // 3
+        atype = extended_atype[:, :nloc]
+        # nb x nloc x nnei x 4, nb x nloc x nnei x 3, nb x nloc x nnei x 1
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        nlist_mask = nlist != -1
+        sw = torch.squeeze(sw, -1)
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+
+        # [nframes, nloc, tebd_dim]
+        atype_embd = extended_atype_embd[:, :nloc, :]
+        assert list(atype_embd.shape) == [nframes, nloc, self.g1_dim]
+
+        g1 = self.act(atype_embd)
+        # nb x nloc x nnei x 1,  nb x nloc x nnei x 3
+        if not self.direct_dist:
+            g2, h2 = torch.split(dmatrix, [1, 3], dim=-1)
+        else:
+            g2, h2 = torch.linalg.norm(diff, dim=-1, keepdim=True), diff
+            g2 = g2 / self.rcut
+            h2 = h2 / self.rcut
+        # nb x nloc x nnei x ng2
+        g2 = self.act(self.g2_embd(g2))
+
+        # set all padding positions to index of 0
+        # if the a neighbor is real or not is indicated by nlist_mask
+        nlist[nlist == -1] = 0
+        # nb x nall x ng1
+        mapping = mapping.view(nframes, nall).unsqueeze(-1).expand(-1, -1, self.g1_dim)
+        for idx, ll in enumerate(self.layers):
+            # g1:     nb x nloc x ng1
+            # g1_ext: nb x nall x ng1
+            g1_ext = torch.gather(g1, 1, mapping)
+            g1, g2, h2 = ll.forward(
+                g1_ext,
+                g2,
+                h2,
+                nlist,
+                nlist_mask,
+                sw,
+            )
+
+        # uses the last layer.
+        # nb x nloc x 3 x ng2
+        h2g2 = ll._cal_h2g2(g2, h2, nlist_mask, sw)
+        # (nb x nloc) x ng2 x 3
+        rot_mat = torch.permute(h2g2, (0, 1, 3, 2))
+
+        return g1, g2, h2, rot_mat.view(-1, nloc, self.dim_emb, 3), sw
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            self.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+        self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+
+    def get_stats(self) -> Dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
new file mode 100644
index 0000000000..e17b7c5d54
--- /dev/null
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -0,0 +1,657 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+from typing import (
+    Callable,
+    ClassVar,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.descriptor import (
+    DescriptorBlock,
+    prod_env_mat,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pt.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from typing import (
+        Final,
+    )
+except ImportError:
+    from torch.jit import Final
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.pt.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pt.model.network.network import (
+    TypeFilter,
+)
+from deepmd.pt.utils.exclude_mask import (
+    PairExcludeMask,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e2_a")
+@BaseDescriptor.register("se_a")
+class DescrptSeA(BaseDescriptor, torch.nn.Module):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        axis_neuron=16,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        old_impl: bool = False,
+        type_one_side: bool = True,
+        **kwargs,
+    ):
+        super().__init__()
+        self.sea = DescrptBlockSeA(
+            rcut,
+            rcut_smth,
+            sel,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            old_impl=old_impl,
+            type_one_side=type_one_side,
+            **kwargs,
+        )
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.sea.get_rcut()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.sea.get_nsel()
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sea.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.sea.get_ntypes()
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.sea.get_dim_out()
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.sea.get_dim_emb()
+
+    def mixed_types(self):
+        """Returns if the descriptor requires a neighbor list that distinguish different
+        atomic types or not.
+        """
+        return self.sea.mixed_types()
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeA descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in sea
+        if shared_level == 0:
+            self.sea.share_params(base_class.sea, 0, resume=resume)
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.sea.dim_out
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.sea.compute_input_stats(merged, path)
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        """Update the type exclusions."""
+        self.sea.reinit_exclude(exclude_types)
+
+    def forward(
+        self,
+        coord_ext: torch.Tensor,
+        atype_ext: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+
+        """
+        return self.sea.forward(nlist, coord_ext, atype_ext, None, mapping)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: torch.Tensor,
+        stddev: torch.Tensor,
+    ) -> None:
+        self.sea.mean = mean
+        self.sea.stddev = stddev
+
+    def serialize(self) -> dict:
+        obj = self.sea
+        return {
+            "@class": "Descriptor",
+            "type": "se_e2_a",
+            "@version": 1,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "neuron": obj.neuron,
+            "axis_neuron": obj.axis_neuron,
+            "resnet_dt": obj.resnet_dt,
+            "set_davg_zero": obj.set_davg_zero,
+            "activation_function": obj.activation_function,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "@variables": {
+                "davg": obj["davg"].detach().cpu().numpy(),
+                "dstd": obj["dstd"].detach().cpu().numpy(),
+            },
+            ## to be updated when the options are supported.
+            "trainable": True,
+            "type_one_side": obj.type_one_side,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeA":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return torch.tensor(xx, dtype=obj.sea.prec, device=env.DEVICE)
+
+        obj.sea["davg"] = t_cvt(variables["davg"])
+        obj.sea["dstd"] = t_cvt(variables["dstd"])
+        obj.sea.filter_layers = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False)
+
+
+@DescriptorBlock.register("se_e2_a")
+class DescrptBlockSeA(DescriptorBlock):
+    ndescrpt: Final[int]
+    __constants__: ClassVar[list] = ["ndescrpt"]
+
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        axis_neuron=16,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        old_impl: bool = False,
+        type_one_side: bool = True,
+        trainable: bool = True,
+        **kwargs,
+    ):
+        """Construct an embedding net of type `se_a`.
+
+        Args:
+        - rcut: Cut-off radius.
+        - rcut_smth: Smooth hyper-parameter for pair force & energy.
+        - sel: For each element type, how many atoms is selected as neighbors.
+        - filter_neuron: Number of neurons in each hidden layers of the embedding net.
+        - axis_neuron: Number of columns of the sub-matrix of the embedding matrix.
+        """
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.axis_neuron = axis_neuron
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.old_impl = old_impl
+        self.env_protection = env_protection
+        self.ntypes = len(sel)
+        self.type_one_side = type_one_side
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+
+        self.sel = sel
+        # should be on CPU to avoid D2H, as it is used as slice index
+        self.sec = [0, *np.cumsum(self.sel).tolist()]
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE)
+        stddev = torch.ones(wanted_shape, dtype=self.prec, device=env.DEVICE)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.filter_layers_old = None
+        self.filter_layers = None
+
+        if self.old_impl:
+            if not self.type_one_side:
+                raise ValueError(
+                    "The old implementation does not support type_one_side=False."
+                )
+            filter_layers = []
+            # TODO: remove
+            start_index = 0
+            for type_i in range(self.ntypes):
+                one = TypeFilter(start_index, sel[type_i], self.filter_neuron)
+                filter_layers.append(one)
+                start_index += sel[type_i]
+            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
+        else:
+            ndim = 1 if self.type_one_side else 2
+            filter_layers = NetworkCollection(
+                ndim=ndim, ntypes=len(sel), network_type="embedding_network"
+            )
+            for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim):
+                filter_layers[embedding_idx] = EmbeddingNet(
+                    1,
+                    self.filter_neuron,
+                    activation_function=self.activation_function,
+                    precision=self.precision,
+                    resnet_dt=self.resnet_dt,
+                )
+            self.filter_layers = filter_layers
+        self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        return self.neuron[-1]
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return False
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1] * self.axis_neuron
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return 0
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            self.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+        self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+
+    def get_stats(self) -> Dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        extended_atype_embd: Optional[torch.Tensor] = None,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - coord: Tell atom coordinates with shape [nframes, natoms[1]*3].
+        - atype: Tell atom types with shape [nframes, natoms[1]].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+        - box: Tell simulation box with shape [nframes, 9].
+
+        Returns
+        -------
+        - `torch.Tensor`: descriptor matrix with shape [nframes, natoms[0]*self.filter_neuron[-1]*self.axis_neuron].
+        """
+        del extended_atype_embd, mapping
+        nloc = nlist.shape[1]
+        atype = extended_atype[:, :nloc]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+
+        if self.old_impl:
+            assert self.filter_layers_old is not None
+            dmatrix = dmatrix.view(
+                -1, self.ndescrpt
+            )  # shape is [nframes*nall, self.ndescrpt]
+            xyz_scatter = torch.empty(
+                1,
+                device=env.DEVICE,
+            )
+            ret = self.filter_layers_old[0](dmatrix)
+            xyz_scatter = ret
+            for ii, transform in enumerate(self.filter_layers_old[1:]):
+                # shape is [nframes*nall, 4, self.filter_neuron[-1]]
+                ret = transform.forward(dmatrix)
+                xyz_scatter = xyz_scatter + ret
+        else:
+            assert self.filter_layers is not None
+            dmatrix = dmatrix.view(-1, self.nnei, 4)
+            dmatrix = dmatrix.to(dtype=self.prec)
+            nfnl = dmatrix.shape[0]
+            # pre-allocate a shape to pass jit
+            xyz_scatter = torch.zeros(
+                [nfnl, 4, self.filter_neuron[-1]],
+                dtype=self.prec,
+                device=extended_coord.device,
+            )
+            # nfnl x nnei
+            exclude_mask = self.emask(nlist, extended_atype).view(nfnl, -1)
+            for embedding_idx, ll in enumerate(self.filter_layers.networks):
+                if self.type_one_side:
+                    ii = embedding_idx
+                    # torch.jit is not happy with slice(None)
+                    # ti_mask = torch.ones(nfnl, dtype=torch.bool, device=dmatrix.device)
+                    # applying a mask seems to cause performance degradation
+                    ti_mask = None
+                else:
+                    # ti: center atom type, ii: neighbor type...
+                    ii = embedding_idx // self.ntypes
+                    ti = embedding_idx % self.ntypes
+                    ti_mask = atype.ravel().eq(ti)
+                # nfnl x nt
+                if ti_mask is not None:
+                    mm = exclude_mask[ti_mask, self.sec[ii] : self.sec[ii + 1]]
+                else:
+                    mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
+                # nfnl x nt x 4
+                if ti_mask is not None:
+                    rr = dmatrix[ti_mask, self.sec[ii] : self.sec[ii + 1], :]
+                else:
+                    rr = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
+                rr = rr * mm[:, :, None]
+                ss = rr[:, :, :1]
+                # nfnl x nt x ng
+                gg = ll.forward(ss)
+                # nfnl x 4 x ng
+                gr = torch.matmul(rr.permute(0, 2, 1), gg)
+                if ti_mask is not None:
+                    xyz_scatter[ti_mask] += gr
+                else:
+                    xyz_scatter += gr
+
+        xyz_scatter /= self.nnei
+        xyz_scatter_1 = xyz_scatter.permute(0, 2, 1)
+        rot_mat = xyz_scatter_1[:, :, 1:4]
+        xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron]
+        result = torch.matmul(
+            xyz_scatter_1, xyz_scatter_2
+        )  # shape is [nframes*nall, self.filter_neuron[-1], self.axis_neuron]
+        result = result.view(-1, nloc, self.filter_neuron[-1] * self.axis_neuron)
+        rot_mat = rot_mat.view([-1, nloc] + list(rot_mat.shape[1:]))  # noqa:RUF005
+        return (
+            result.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION),
+            rot_mat.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION),
+            None,
+            None,
+            sw,
+        )
+
+
+def analyze_descrpt(matrix, ndescrpt, natoms):
+    """Collect avg, square avg and count of descriptors in a batch."""
+    ntypes = natoms.shape[1] - 2
+    start_index = 0
+    sysr = []
+    sysa = []
+    sysn = []
+    sysr2 = []
+    sysa2 = []
+    for type_i in range(ntypes):
+        end_index = start_index + natoms[0, 2 + type_i]
+        dd = matrix[:, start_index:end_index]  # all descriptors for this element
+        start_index = end_index
+        dd = np.reshape(
+            dd, [-1, 4]
+        )  # Shape is [nframes*natoms[2+type_id]*self.nnei, 4]
+        ddr = dd[:, :1]
+        dda = dd[:, 1:]
+        sumr = np.sum(ddr)
+        suma = np.sum(dda) / 3.0
+        sumn = dd.shape[0]  # Value is nframes*natoms[2+type_id]*self.nnei
+        sumr2 = np.sum(np.multiply(ddr, ddr))
+        suma2 = np.sum(np.multiply(dda, dda)) / 3.0
+        sysr.append(sumr)
+        sysa.append(suma)
+        sysn.append(sumn)
+        sysr2.append(sumr2)
+        sysa2.append(suma2)
+    return sysr, sysr2, sysa, sysa2, sysn
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
new file mode 100644
index 0000000000..051c66385c
--- /dev/null
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -0,0 +1,412 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.descriptor.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pt.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pt.model.network.network import (
+    NeighborWiseAttention,
+    TypeFilter,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pt.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+@DescriptorBlock.register("se_atten")
+class DescrptBlockSeAtten(DescriptorBlock):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        axis_neuron: int = 16,
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        # set_davg_zero: bool = False,
+        set_davg_zero: bool = True,  # TODO
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        post_ln=True,
+        ffn=False,
+        ffn_embed_dim=1024,
+        activation_function="tanh",
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+        return_rot=False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        type: Optional[str] = None,
+    ):
+        """Construct an embedding net of type `se_atten`.
+
+        Args:
+        - rcut: Cut-off radius.
+        - rcut_smth: Smooth hyper-parameter for pair force & energy.
+        - sel: For each element type, how many atoms is selected as neighbors.
+        - filter_neuron: Number of neurons in each hidden layers of the embedding net.
+        - axis_neuron: Number of columns of the sub-matrix of the embedding matrix.
+        """
+        super().__init__()
+        del type
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.filter_neuron = neuron
+        self.axis_neuron = axis_neuron
+        self.tebd_dim = tebd_dim
+        self.tebd_input_mode = tebd_input_mode
+        self.set_davg_zero = set_davg_zero
+        self.attn_dim = attn
+        self.attn_layer = attn_layer
+        self.attn_dotr = attn_dotr
+        self.attn_mask = attn_mask
+        self.post_ln = post_ln
+        self.ffn = ffn
+        self.ffn_embed_dim = ffn_embed_dim
+        self.activation = activation_function
+        # TODO: To be fixed: precision should be given from inputs
+        self.prec = torch.float64
+        self.scaling_factor = scaling_factor
+        self.head_num = head_num
+        self.normalize = normalize
+        self.temperature = temperature
+        self.return_rot = return_rot
+        self.env_protection = env_protection
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.dpa1_attention = NeighborWiseAttention(
+            self.attn_layer,
+            self.nnei,
+            self.filter_neuron[-1],
+            self.attn_dim,
+            dotr=self.attn_dotr,
+            do_mask=self.attn_mask,
+            post_ln=self.post_ln,
+            ffn=self.ffn,
+            ffn_embed_dim=self.ffn_embed_dim,
+            activation=self.activation,
+            scaling_factor=self.scaling_factor,
+            head_num=self.head_num,
+            normalize=self.normalize,
+            temperature=self.temperature,
+        )
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = torch.zeros(
+            wanted_shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
+        stddev = torch.ones(
+            wanted_shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+
+        filter_layers = []
+        one = TypeFilter(
+            0,
+            self.nnei,
+            self.filter_neuron,
+            return_G=True,
+            tebd_dim=self.tebd_dim,
+            use_tebd=True,
+            tebd_mode=self.tebd_input_mode,
+        )
+        filter_layers.append(one)
+        self.filter_layers = torch.nn.ModuleList(filter_layers)
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_in(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_in
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension of embedding."""
+        return self.filter_neuron[-1]
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1] * self.axis_neuron
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.tebd_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of embedding."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            self.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+        self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+
+    def get_stats(self) -> Dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        extended_coord: torch.Tensor,
+        extended_atype: torch.Tensor,
+        extended_atype_embd: Optional[torch.Tensor] = None,
+        mapping: Optional[torch.Tensor] = None,
+    ) -> List[torch.Tensor]:
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - coord: Tell atom coordinates with shape [nframes, natoms[1]*3].
+        - atype: Tell atom types with shape [nframes, natoms[1]].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+        - box: Tell simulation box with shape [nframes, 9].
+
+        Returns
+        -------
+        - result: descriptor with shape [nframes, nloc, self.filter_neuron[-1] * self.axis_neuron].
+        - ret: environment matrix with shape [nframes, nloc, self.neei, out_size]
+        """
+        del mapping
+        assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        nb = nframes
+        nall = extended_coord.view(nb, -1, 3).shape[1]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        # [nfxnlocxnnei, self.ndescrpt]
+        dmatrix = dmatrix.view(-1, self.ndescrpt)
+        nlist_mask = nlist != -1
+        nlist[nlist == -1] = 0
+        sw = torch.squeeze(sw, -1)
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+        # nf x nloc x nt -> nf x nloc x nnei x nt
+        atype_tebd = extended_atype_embd[:, :nloc, :]
+        atype_tebd_nnei = atype_tebd.unsqueeze(2).expand(-1, -1, self.nnei, -1)
+        # nf x nall x nt
+        nt = extended_atype_embd.shape[-1]
+        atype_tebd_ext = extended_atype_embd
+        # nb x (nloc x nnei) x nt
+        index = nlist.reshape(nb, nloc * nnei).unsqueeze(-1).expand(-1, -1, nt)
+        # nb x (nloc x nnei) x nt
+        atype_tebd_nlist = torch.gather(atype_tebd_ext, dim=1, index=index)
+        # nb x nloc x nnei x nt
+        atype_tebd_nlist = atype_tebd_nlist.view(nb, nloc, nnei, nt)
+        ret = self.filter_layers[0](
+            dmatrix,
+            atype_tebd=atype_tebd_nnei,
+            nlist_tebd=atype_tebd_nlist,
+        )  # shape is [nframes*nall, self.neei, out_size]
+        input_r = torch.nn.functional.normalize(
+            dmatrix.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
+        )
+        ret = self.dpa1_attention(
+            ret, nlist_mask, input_r=input_r, sw=sw
+        )  # shape is [nframes*nloc, self.neei, out_size]
+        inputs_reshape = dmatrix.view(-1, self.nnei, 4).permute(
+            0, 2, 1
+        )  # shape is [nframes*natoms[0], 4, self.neei]
+        xyz_scatter = torch.matmul(
+            inputs_reshape, ret
+        )  # shape is [nframes*natoms[0], 4, out_size]
+        xyz_scatter = xyz_scatter / self.nnei
+        xyz_scatter_1 = xyz_scatter.permute(0, 2, 1)
+        rot_mat = xyz_scatter_1[:, :, 1:4]
+        xyz_scatter_2 = xyz_scatter[:, :, 0 : self.axis_neuron]
+        result = torch.matmul(
+            xyz_scatter_1, xyz_scatter_2
+        )  # shape is [nframes*nloc, self.filter_neuron[-1], self.axis_neuron]
+        return (
+            result.view(-1, nloc, self.filter_neuron[-1] * self.axis_neuron),
+            ret.view(-1, nloc, self.nnei, self.filter_neuron[-1]),
+            dmatrix.view(-1, nloc, self.nnei, 4)[..., 1:],
+            rot_mat.view(-1, nloc, self.filter_neuron[-1], 3),
+            sw,
+        )
+
+
+def analyze_descrpt(matrix, ndescrpt, natoms, mixed_types=False, real_atype=None):
+    """Collect avg, square avg and count of descriptors in a batch."""
+    ntypes = natoms.shape[1] - 2
+    if not mixed_types:
+        sysr = []
+        sysa = []
+        sysn = []
+        sysr2 = []
+        sysa2 = []
+        start_index = 0
+        for type_i in range(ntypes):
+            end_index = start_index + natoms[0, 2 + type_i]
+            dd = matrix[:, start_index:end_index]
+            start_index = end_index
+            dd = np.reshape(
+                dd, [-1, 4]
+            )  # Shape is [nframes*natoms[2+type_id]*self.nnei, 4]
+            ddr = dd[:, :1]
+            dda = dd[:, 1:]
+            sumr = np.sum(ddr)
+            suma = np.sum(dda) / 3.0
+            sumn = dd.shape[0]  # Value is nframes*natoms[2+type_id]*self.nnei
+            sumr2 = np.sum(np.multiply(ddr, ddr))
+            suma2 = np.sum(np.multiply(dda, dda)) / 3.0
+            sysr.append(sumr)
+            sysa.append(suma)
+            sysn.append(sumn)
+            sysr2.append(sumr2)
+            sysa2.append(suma2)
+    else:
+        sysr = [0.0 for i in range(ntypes)]
+        sysa = [0.0 for i in range(ntypes)]
+        sysn = [0 for i in range(ntypes)]
+        sysr2 = [0.0 for i in range(ntypes)]
+        sysa2 = [0.0 for i in range(ntypes)]
+        for frame_item in range(matrix.shape[0]):
+            dd_ff = matrix[frame_item]
+            atype_frame = real_atype[frame_item]
+            for type_i in range(ntypes):
+                type_idx = atype_frame == type_i
+                dd = dd_ff[type_idx]
+                dd = np.reshape(dd, [-1, 4])  # typen_atoms * nnei, 4
+                ddr = dd[:, :1]
+                dda = dd[:, 1:]
+                sumr = np.sum(ddr)
+                suma = np.sum(dda) / 3.0
+                sumn = dd.shape[0]
+                sumr2 = np.sum(np.multiply(ddr, ddr))
+                suma2 = np.sum(np.multiply(dda, dda)) / 3.0
+                sysr[type_i] += sumr
+                sysa[type_i] += suma
+                sysn[type_i] += sumn
+                sysr2[type_i] += sumr2
+                sysa2[type_i] += suma2
+
+    return sysr, sysr2, sysa, sysa2, sysn
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
new file mode 100644
index 0000000000..ff922e0649
--- /dev/null
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -0,0 +1,416 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.pt.model.descriptor import (
+    prod_env_mat,
+)
+from deepmd.pt.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pt.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pt.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("se_e2_r")
+@BaseDescriptor.register("se_r")
+class DescrptSeR(BaseDescriptor, torch.nn.Module):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel,
+        neuron=[25, 50, 100],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: List[Tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        old_impl: bool = False,
+        trainable: bool = True,
+        **kwargs,
+    ):
+        super().__init__()
+        self.rcut = rcut
+        self.rcut_smth = rcut_smth
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.old_impl = False  # this does not support old implementation.
+        self.exclude_types = exclude_types
+        self.ntypes = len(sel)
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.env_protection = env_protection
+
+        self.sel = sel
+        self.sec = torch.tensor(
+            np.append([0], np.cumsum(self.sel)), dtype=int, device=env.DEVICE
+        )
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 1
+
+        wanted_shape = (self.ntypes, self.nnei, 1)
+        mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE)
+        stddev = torch.ones(wanted_shape, dtype=self.prec, device=env.DEVICE)
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.filter_layers_old = None
+        self.filter_layers = None
+
+        filter_layers = NetworkCollection(
+            ndim=1, ntypes=len(sel), network_type="embedding_network"
+        )
+        # TODO: ndim=2 if type_one_side=False
+        for ii in range(self.ntypes):
+            filter_layers[(ii,)] = EmbeddingNet(
+                1,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+            )
+        self.filter_layers = filter_layers
+        self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.neuron[-1]
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension."""
+        raise NotImplementedError
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return 0
+
+    def mixed_types(self) -> bool:
+        """If true, the discriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the discriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return False
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeR descriptors, the user-defined share-level
+        # shared_level: 0
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "mean") and not resume:
+                # in case of change params during resume
+                base_env = EnvMatStatSe(base_class)
+                base_env.stats = base_class.stats
+                for kk in base_class.get_stats():
+                    base_env.stats[kk] += self.get_stats()[kk]
+                mean, stddev = base_env()
+                if not base_class.set_davg_zero:
+                    base_class.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+                base_class.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+                self.mean = base_class.mean
+                self.stddev = base_class.stddev
+            # self.load_state_dict(base_class.state_dict()) # this does not work, because it only inits the model
+            # the following will successfully link all the params except buffers
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            self.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+        self.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+
+    def get_stats(self) -> Dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def __setitem__(self, key, value):
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        coord_ext: torch.Tensor,
+        atype_ext: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        coord_ext
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        atype_ext
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            this descriptor returns None
+        h2
+            The rotationally equivariant pair-partical representation.
+            this descriptor returns None
+        sw
+            The smooth switch function.
+
+        """
+        del mapping
+        nloc = nlist.shape[1]
+        atype = atype_ext[:, :nloc]
+        dmatrix, diff, sw = prod_env_mat(
+            coord_ext,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            True,
+            protection=self.env_protection,
+        )
+
+        assert self.filter_layers is not None
+        dmatrix = dmatrix.view(-1, self.nnei, 1)
+        dmatrix = dmatrix.to(dtype=self.prec)
+        nfnl = dmatrix.shape[0]
+        # pre-allocate a shape to pass jit
+        xyz_scatter = torch.zeros(
+            [nfnl, 1, self.filter_neuron[-1]], dtype=self.prec, device=coord_ext.device
+        )
+
+        # nfnl x nnei
+        exclude_mask = self.emask(nlist, atype_ext).view(nfnl, -1)
+        for ii, ll in enumerate(self.filter_layers.networks):
+            # nfnl x nt
+            mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
+            # nfnl x nt x 1
+            ss = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
+            ss = ss * mm[:, :, None]
+            # nfnl x nt x ng
+            gg = ll.forward(ss)
+            gg = torch.mean(gg, dim=1).unsqueeze(1)
+            xyz_scatter += gg * (self.sel[ii] / self.nnei)
+
+        res_rescale = 1.0 / 5.0
+        result = xyz_scatter * res_rescale
+        result = result.view(-1, nloc, self.filter_neuron[-1])
+        return (
+            result.to(dtype=env.GLOBAL_PT_FLOAT_PRECISION),
+            None,
+            None,
+            None,
+            sw,
+        )
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: torch.Tensor,
+        stddev: torch.Tensor,
+    ) -> None:
+        self.mean = mean
+        self.stddev = stddev
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "se_r",
+            "@version": 1,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[self.prec],
+            "embeddings": self.filter_layers.serialize(),
+            "env_mat": DPEnvMat(self.rcut, self.rcut_smth).serialize(),
+            "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
+            "@variables": {
+                "davg": self["davg"].detach().cpu().numpy(),
+                "dstd": self["dstd"].detach().cpu().numpy(),
+            },
+            ## to be updated when the options are supported.
+            "trainable": True,
+            "type_one_side": True,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeR":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        env_mat = data.pop("env_mat")
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return torch.tensor(xx, dtype=obj.prec, device=env.DEVICE)
+
+        obj["davg"] = t_cvt(variables["davg"])
+        obj["dstd"] = t_cvt(variables["dstd"])
+        obj.filter_layers = NetworkCollection.deserialize(embeddings)
+        return obj
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False)
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
new file mode 100644
index 0000000000..1675215d7b
--- /dev/null
+++ b/deepmd/pt/model/model/__init__.py
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""The model that takes the coordinates, cell and atom types as input
+and predicts some property. The models are automatically generated from
+atomic models by the `deepmd.dpmodel.make_model` method.
+
+The `make_model` method does the reduction, auto-differentiation and
+communication of the atomic properties according to output variable
+definition `deepmd.dpmodel.OutputVariableDef`.
+
+All models should be inherited from :class:`deepmd.pt.model.model.model.BaseModel`.
+Models generated by `make_model` have already done it.
+"""
+
+import copy
+import json
+
+import numpy as np
+
+from deepmd.pt.model.atomic_model import (
+    DPAtomicModel,
+    PairTabAtomicModel,
+)
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.task import (
+    BaseFitting,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+from .dp_model import (
+    DPModel,
+)
+from .dp_zbl_model import (
+    DPZBLModel,
+)
+from .ener_model import (
+    EnergyModel,
+)
+from .frozen import (
+    FrozenModel,
+)
+from .make_hessian_model import (
+    make_hessian_model,
+)
+from .make_model import (
+    make_model,
+)
+from .model import (
+    BaseModel,
+)
+from .spin_model import (
+    SpinEnergyModel,
+    SpinModel,
+)
+
+
+def get_spin_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    if not model_params["spin"]["use_spin"] or isinstance(
+        model_params["spin"]["use_spin"][0], int
+    ):
+        use_spin = np.full(len(model_params["type_map"]), False)
+        use_spin[model_params["spin"]["use_spin"]] = True
+        model_params["spin"]["use_spin"] = use_spin.tolist()
+    # include virtual spin and placeholder types
+    model_params["type_map"] += [item + "_spin" for item in model_params["type_map"]]
+    spin = Spin(
+        use_spin=model_params["spin"]["use_spin"],
+        virtual_scale=model_params["spin"]["virtual_scale"],
+    )
+    pair_exclude_types = spin.get_pair_exclude_types(
+        exclude_types=model_params.get("pair_exclude_types", None)
+    )
+    model_params["pair_exclude_types"] = pair_exclude_types
+    # for descriptor data stat
+    model_params["descriptor"]["exclude_types"] = pair_exclude_types
+    atom_exclude_types = spin.get_atom_exclude_types(
+        exclude_types=model_params.get("atom_exclude_types", None)
+    )
+    model_params["atom_exclude_types"] = atom_exclude_types
+    if (
+        "env_protection" not in model_params["descriptor"]
+        or model_params["descriptor"]["env_protection"] == 0.0
+    ):
+        model_params["descriptor"]["env_protection"] = 1e-6
+    if model_params["descriptor"]["type"] in ["se_e2_a"]:
+        # only expand sel for se_e2_a
+        model_params["descriptor"]["sel"] += model_params["descriptor"]["sel"]
+    backbone_model = get_standard_model(model_params)
+    return SpinEnergyModel(backbone_model=backbone_model, spin=spin)
+
+
+def get_zbl_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    ntypes = len(model_params["type_map"])
+    # descriptor
+    model_params["descriptor"]["ntypes"] = ntypes
+    descriptor = BaseDescriptor(**model_params["descriptor"])
+    # fitting
+    fitting_net = model_params.get("fitting_net", None)
+    fitting_net["type"] = fitting_net.get("type", "ener")
+    fitting_net["ntypes"] = descriptor.get_ntypes()
+    fitting_net["mixed_types"] = descriptor.mixed_types()
+    fitting_net["embedding_width"] = descriptor.get_dim_out()
+    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
+    grad_force = "direct" not in fitting_net["type"]
+    if not grad_force:
+        fitting_net["out_dim"] = descriptor.get_dim_emb()
+        if "ener" in fitting_net["type"]:
+            fitting_net["return_energy"] = True
+    fitting = BaseFitting(**fitting_net)
+    dp_model = DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
+    # pairtab
+    filepath = model_params["use_srtab"]
+    pt_model = PairTabAtomicModel(
+        filepath,
+        model_params["descriptor"]["rcut"],
+        model_params["descriptor"]["sel"],
+        type_map=model_params["type_map"],
+    )
+
+    rmin = model_params["sw_rmin"]
+    rmax = model_params["sw_rmax"]
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+    return DPZBLModel(
+        dp_model,
+        pt_model,
+        rmin,
+        rmax,
+        type_map=model_params["type_map"],
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+    )
+
+
+def get_standard_model(model_params):
+    model_params = copy.deepcopy(model_params)
+    ntypes = len(model_params["type_map"])
+    # descriptor
+    model_params["descriptor"]["ntypes"] = ntypes
+    descriptor = BaseDescriptor(**model_params["descriptor"])
+    # fitting
+    fitting_net = model_params.get("fitting_net", None)
+    fitting_net["type"] = fitting_net.get("type", "ener")
+    fitting_net["ntypes"] = descriptor.get_ntypes()
+    fitting_net["mixed_types"] = descriptor.mixed_types()
+    if fitting_net["type"] in ["dipole", "polar"]:
+        fitting_net["embedding_width"] = descriptor.get_dim_emb()
+    fitting_net["dim_descrpt"] = descriptor.get_dim_out()
+    grad_force = "direct" not in fitting_net["type"]
+    if not grad_force:
+        fitting_net["out_dim"] = descriptor.get_dim_emb()
+        if "ener" in fitting_net["type"]:
+            fitting_net["return_energy"] = True
+    fitting = BaseFitting(**fitting_net)
+    atom_exclude_types = model_params.get("atom_exclude_types", [])
+    pair_exclude_types = model_params.get("pair_exclude_types", [])
+
+    model = DPModel(
+        descriptor=descriptor,
+        fitting=fitting,
+        type_map=model_params["type_map"],
+        atom_exclude_types=atom_exclude_types,
+        pair_exclude_types=pair_exclude_types,
+    )
+    model.model_def_script = json.dumps(model_params)
+    return model
+
+
+def get_model(model_params):
+    if "spin" in model_params:
+        return get_spin_model(model_params)
+    elif "use_srtab" in model_params:
+        return get_zbl_model(model_params)
+    else:
+        return get_standard_model(model_params)
+
+
+__all__ = [
+    "BaseModel",
+    "get_model",
+    "DPModel",
+    "EnergyModel",
+    "FrozenModel",
+    "SpinModel",
+    "SpinEnergyModel",
+    "DPZBLModel",
+    "make_model",
+    "make_hessian_model",
+]
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
new file mode 100644
index 0000000000..45b120771b
--- /dev/null
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class DipoleModel(DPModel):
+    model_type = "dipole"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["dipole"] = model_ret["dipole"]
+            model_predict["global_dipole"] = model_ret["dipole_redu"]
+            if self.do_grad_r("dipole"):
+                model_predict["force"] = model_ret["dipole_derv_r"].squeeze(-2)
+            if self.do_grad_c("dipole"):
+                model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["atom_virial"] = model_ret["dipole_derv_c"].squeeze(
+                        -3
+                    )
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["dipole"] = model_ret["dipole"]
+            model_predict["global_dipole"] = model_ret["dipole_redu"]
+            if self.do_grad_r("dipole"):
+                model_predict["force"] = model_ret["dipole_derv_r"].squeeze(-2)
+            if self.do_grad_c("dipole"):
+                model_predict["virial"] = model_ret["dipole_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["atom_virial"] = model_ret["dipole_derv_c"].squeeze(
+                        -3
+                    )
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pt/model/model/dos_model.py b/deepmd/pt/model/model/dos_model.py
new file mode 100644
index 0000000000..e043700bee
--- /dev/null
+++ b/deepmd/pt/model/model/dos_model.py
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class DOSModel(DPModel):
+    model_type = "dos"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    @torch.jit.export
+    def get_numb_dos(self) -> int:
+        """Get the number of  DOS for DOSFittingNet."""
+        return self.get_fitting_net().dim_out
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_dos"] = model_ret["dos"]
+            model_predict["dos"] = model_ret["dos_redu"]
+
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
new file mode 100644
index 0000000000..d7b3c4f4e2
--- /dev/null
+++ b/deepmd/pt/model/model/dp_model.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from deepmd.pt.model.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+from deepmd.pt.model.task.dipole import (
+    DipoleFittingNet,
+)
+from deepmd.pt.model.task.dos import (
+    DOSFittingNet,
+)
+from deepmd.pt.model.task.ener import (
+    EnergyFittingNet,
+    EnergyFittingNetDirect,
+)
+from deepmd.pt.model.task.polarizability import (
+    PolarFittingNet,
+)
+
+from .make_model import (
+    make_model,
+)
+
+
+@BaseModel.register("standard")
+class DPModel(make_model(DPAtomicModel)):
+    def __new__(
+        cls,
+        descriptor=None,
+        fitting=None,
+        *args,
+        # disallow positional atomic_model_
+        atomic_model_: Optional[DPAtomicModel] = None,
+        **kwargs,
+    ):
+        from deepmd.pt.model.model.dipole_model import (
+            DipoleModel,
+        )
+        from deepmd.pt.model.model.dos_model import (
+            DOSModel,
+        )
+        from deepmd.pt.model.model.ener_model import (
+            EnergyModel,
+        )
+        from deepmd.pt.model.model.polar_model import (
+            PolarModel,
+        )
+
+        if atomic_model_ is not None:
+            fitting = atomic_model_.fitting_net
+        else:
+            assert fitting is not None, "fitting network is not provided"
+
+        # according to the fitting network to decide the type of the model
+        if cls is DPModel:
+            # map fitting to model
+            if isinstance(fitting, EnergyFittingNet) or isinstance(
+                fitting, EnergyFittingNetDirect
+            ):
+                cls = EnergyModel
+            elif isinstance(fitting, DipoleFittingNet):
+                cls = DipoleModel
+            elif isinstance(fitting, PolarFittingNet):
+                cls = PolarModel
+            elif isinstance(fitting, DOSFittingNet):
+                cls = DOSModel
+            # else: unknown fitting type, fall back to DPModel
+        return super().__new__(cls)
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["descriptor"] = BaseDescriptor.update_sel(
+            global_jdata, local_jdata["descriptor"]
+        )
+        return local_jdata_cpy
+
+    def get_fitting_net(self):
+        """Get the fitting network."""
+        return self.atomic_model.fitting_net
+
+    def get_descriptor(self):
+        """Get the descriptor."""
+        return self.atomic_model.descriptor
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        # directly call the forward_common method when no specific transform rule
+        return self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
new file mode 100644
index 0000000000..bbc82b8d77
--- /dev/null
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel.model.dp_model import (
+    DPModel,
+)
+from deepmd.pt.model.atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+
+from .make_model import (
+    make_model,
+)
+
+DPZBLModel_ = make_model(DPZBLLinearEnergyAtomicModel)
+
+
+@BaseModel.register("zbl")
+class DPZBLModel(DPZBLModel_):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(-3)
+        else:
+            model_predict["force"] = model_ret["dforce"]
+        if "mask" in model_ret:
+            model_predict["mask"] = model_ret["mask"]
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        if self.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+        if self.do_grad_c("energy"):
+            model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+            if do_atomic_virial:
+                model_predict["extended_virial"] = model_ret["energy_derv_c"].squeeze(
+                    -3
+                )
+        else:
+            assert model_ret["dforce"] is not None
+            model_predict["dforce"] = model_ret["dforce"]
+        return model_predict
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        local_jdata_cpy = local_jdata.copy()
+        local_jdata_cpy["dpmodel"] = DPModel.update_sel(
+            global_jdata, local_jdata["dpmodel"]
+        )
+        return local_jdata_cpy
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
new file mode 100644
index 0000000000..5217293623
--- /dev/null
+++ b/deepmd/pt/model/model/ener_model.py
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class EnergyModel(DPModel):
+    model_type = "ener"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_energy"] = model_ret["energy"]
+            model_predict["energy"] = model_ret["energy_redu"]
+            if self.do_grad_r("energy"):
+                model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+            if self.do_grad_c("energy"):
+                model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["atom_virial"] = model_ret["energy_derv_c"].squeeze(
+                        -3
+                    )
+            else:
+                model_predict["force"] = model_ret["dforce"]
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["atom_energy"] = model_ret["energy"]
+            model_predict["energy"] = model_ret["energy_redu"]
+            if self.do_grad_r("energy"):
+                model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+            if self.do_grad_c("energy"):
+                model_predict["virial"] = model_ret["energy_derv_c_redu"].squeeze(-2)
+                if do_atomic_virial:
+                    model_predict["extended_virial"] = model_ret[
+                        "energy_derv_c"
+                    ].squeeze(-3)
+            else:
+                assert model_ret["dforce"] is not None
+                model_predict["dforce"] = model_ret["dforce"]
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py
new file mode 100644
index 0000000000..e3dcd389bb
--- /dev/null
+++ b/deepmd/pt/model/model/frozen.py
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import tempfile
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+
+
+@BaseModel.register("frozen")
+class FrozenModel(BaseModel):
+    """Load model from a frozen model, which cannot be trained.
+
+    Parameters
+    ----------
+    model_file : str
+        The path to the frozen model
+    """
+
+    def __init__(self, model_file: str, **kwargs):
+        super().__init__(**kwargs)
+        self.model_file = model_file
+        if model_file.endswith(".pth"):
+            self.model = torch.jit.load(model_file)
+        else:
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(suffix=".pth") as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model = torch.jit.load(f.name)
+
+    @torch.jit.export
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of developer implemented atomic models."""
+        return self.model.fitting_output_def()
+
+    @torch.jit.export
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.model.get_rcut()
+
+    @torch.jit.export
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.model.get_type_map()
+
+    @torch.jit.export
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.model.get_sel()
+
+    @torch.jit.export
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.model.get_dim_fparam()
+
+    @torch.jit.export
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.model.get_dim_aparam()
+
+    @torch.jit.export
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.model.get_sel_type()
+
+    @torch.jit.export
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.model.is_aparam_nall()
+
+    @torch.jit.export
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.model.mixed_types()
+
+    @torch.jit.export
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        return self.model.forward(
+            coord,
+            atype,
+            box=box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        # try to use the original script instead of "frozen model"
+        # Note: this cannot change the script of the parent model
+        # it may still try to load hard-coded filename, which might
+        # be a problem
+        return self.model.get_model_def_script()
+
+    def serialize(self) -> dict:
+        from deepmd.pt.model.model import (
+            get_model,
+        )
+
+        # try to recover the original model
+        model_def_script = json.loads(self.get_model_def_script())
+        model = get_model(model_def_script)
+        model.load_state_dict(self.model.state_dict())
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        raise RuntimeError("Should not touch here.")
+
+    @torch.jit.export
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nnei()
+
+    @torch.jit.export
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nsel()
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        return local_jdata
+
+    @torch.jit.export
+    def model_output_type(self) -> str:
+        """Get the output type for the model."""
+        return self.model.model_output_type()
diff --git a/deepmd/pt/model/model/make_hessian_model.py b/deepmd/pt/model/model/make_hessian_model.py
new file mode 100644
index 0000000000..9588348f53
--- /dev/null
+++ b/deepmd/pt/model/model/make_hessian_model.py
@@ -0,0 +1,216 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import math
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    get_hessian_name,
+)
+
+
+def make_hessian_model(T_Model):
+    """Make a model that can compute Hessian.
+
+    LIMITATION: this model is not jitable due to the restrictions of torch jit script.
+
+    LIMITATION: only the hessian of `forward_common` is available.
+
+    Parameters
+    ----------
+    T_Model
+        The model. Should provide the `forward_common` and `atomic_output_def` methods
+
+    Returns
+    -------
+    The model computes hessian.
+
+    """
+
+    class CM(T_Model):
+        def __init__(
+            self,
+            *args,
+            **kwargs,
+        ):
+            super().__init__(
+                *args,
+                **kwargs,
+            )
+            self.hess_fitting_def = copy.deepcopy(super().atomic_output_def())
+
+        def requires_hessian(
+            self,
+            keys: Union[str, List[str]],
+        ):
+            """Set which output variable(s) requires hessian."""
+            if isinstance(keys, str):
+                keys = [keys]
+            for kk in self.hess_fitting_def.keys():
+                if kk in keys:
+                    self.hess_fitting_def[kk].r_hessian = True
+
+        def atomic_output_def(self):
+            """Get the fitting output def."""
+            return self.hess_fitting_def
+
+        def forward_common(
+            self,
+            coord,
+            atype,
+            box: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ) -> Dict[str, torch.Tensor]:
+            """Return model prediction.
+
+            Parameters
+            ----------
+            coord
+                The coordinates of the atoms.
+                shape: nf x (nloc x 3)
+            atype
+                The type of atoms. shape: nf x nloc
+            box
+                The simulation box. shape: nf x 9
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                If calculate the atomic virial.
+
+            Returns
+            -------
+            ret_dict
+                The result dict of type Dict[str,torch.Tensor].
+                The keys are defined by the `ModelOutputDef`.
+
+            """
+            ret = super().forward_common(
+                coord,
+                atype,
+                box=box,
+                fparam=fparam,
+                aparam=aparam,
+                do_atomic_virial=do_atomic_virial,
+            )
+            vdef = self.atomic_output_def()
+            hess_yes = [vdef[kk].r_hessian for kk in vdef.keys()]
+            if any(hess_yes):
+                hess = self._cal_hessian_all(
+                    coord,
+                    atype,
+                    box=box,
+                    fparam=fparam,
+                    aparam=aparam,
+                )
+                ret.update(hess)
+            return ret
+
+        def _cal_hessian_all(
+            self,
+            coord: torch.Tensor,
+            atype: torch.Tensor,
+            box: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+        ) -> Dict[str, torch.Tensor]:
+            nf, nloc = atype.shape
+            coord = coord.view([nf, (nloc * 3)])
+            box = box.view([nf, 9]) if box is not None else None
+            fparam = fparam.view([nf, -1]) if fparam is not None else None
+            aparam = aparam.view([nf, nloc, -1]) if aparam is not None else None
+            fdef = self.atomic_output_def()
+            # keys of values that require hessian
+            hess_keys: List[str] = []
+            for kk in fdef.keys():
+                if fdef[kk].r_hessian:
+                    hess_keys.append(kk)
+            # result dict init by empty lists
+            res = {get_hessian_name(kk): [] for kk in hess_keys}
+            # loop over variable
+            for kk in hess_keys:
+                vdef = fdef[kk]
+                vshape = vdef.shape
+                vsize = math.prod(vdef.shape)
+                # loop over frames
+                for ii in range(nf):
+                    icoord = coord[ii]
+                    iatype = atype[ii]
+                    ibox = box[ii] if box is not None else None
+                    ifparam = fparam[ii] if fparam is not None else None
+                    iaparam = aparam[ii] if aparam is not None else None
+                    # loop over all components
+                    for idx in range(vsize):
+                        hess = self._cal_hessian_one_component(
+                            idx, icoord, iatype, ibox, ifparam, iaparam
+                        )
+                        res[get_hessian_name(kk)].append(hess)
+                res[get_hessian_name(kk)] = torch.stack(res[get_hessian_name(kk)]).view(
+                    (nf, *vshape, nloc * 3, nloc * 3)
+                )
+            return res
+
+        def _cal_hessian_one_component(
+            self,
+            ci,
+            coord,
+            atype,
+            box: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+        ) -> torch.Tensor:
+            # coord, # (nloc x 3)
+            # atype, # nloc
+            # box: Optional[torch.Tensor] = None,     # 9
+            # fparam: Optional[torch.Tensor] = None,  # nfp
+            # aparam: Optional[torch.Tensor] = None,  # (nloc x nap)
+            wc = wrapper_class_forward_energy(self, ci, atype, box, fparam, aparam)
+
+            hess = torch.autograd.functional.hessian(
+                wc,
+                coord,
+                create_graph=False,
+            )
+            return hess
+
+    class wrapper_class_forward_energy:
+        def __init__(
+            self,
+            obj: CM,
+            ci: int,
+            atype: torch.Tensor,
+            box: Optional[torch.Tensor],
+            fparam: Optional[torch.Tensor],
+            aparam: Optional[torch.Tensor],
+        ):
+            self.atype, self.box, self.fparam, self.aparam = atype, box, fparam, aparam
+            self.ci = ci
+            self.obj = obj
+
+        def __call__(
+            self,
+            xx,
+        ):
+            ci = self.ci
+            atype, box, fparam, aparam = self.atype, self.box, self.fparam, self.aparam
+            res = super(CM, self.obj).forward_common(
+                xx.unsqueeze(0),
+                atype.unsqueeze(0),
+                box.unsqueeze(0) if box is not None else None,
+                fparam.unsqueeze(0) if fparam is not None else None,
+                aparam.unsqueeze(0) if aparam is not None else None,
+                do_atomic_virial=False,
+            )
+            er = res["energy_redu"][0].view([-1])[ci]
+            return er
+
+    return CM
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
new file mode 100644
index 0000000000..0e89c05b79
--- /dev/null
+++ b/deepmd/pt/model/model/make_model.py
@@ -0,0 +1,535 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    ModelOutputDef,
+)
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableCategory,
+    OutputVariableOperation,
+    check_operation_applied,
+)
+from deepmd.pt.model.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+from deepmd.pt.model.model.transform_output import (
+    communicate_extended_output,
+    fit_output_to_model_output,
+)
+from deepmd.pt.utils.env import (
+    GLOBAL_PT_ENER_FLOAT_PRECISION,
+    GLOBAL_PT_FLOAT_PRECISION,
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+    nlist_distinguish_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+def make_model(T_AtomicModel: Type[BaseAtomicModel]):
+    """Make a model as a derived class of an atomic model.
+
+    The model provide two interfaces.
+
+    1. the `forward_common_lower`, that takes extended coordinates, atyps and neighbor list,
+    and outputs the atomic and property and derivatives (if required) on the extended region.
+
+    2. the `forward_common`, that takes coordinates, atypes and cell and predicts
+    the atomic and reduced property, and derivatives (if required) on the local region.
+
+    Parameters
+    ----------
+    T_AtomicModel
+        The atomic model.
+
+    Returns
+    -------
+    CM
+        The model.
+
+    """
+
+    class CM(BaseModel):
+        def __init__(
+            self,
+            *args,
+            # underscore to prevent conflict with normal inputs
+            atomic_model_: Optional[T_AtomicModel] = None,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            if atomic_model_ is not None:
+                self.atomic_model: T_AtomicModel = atomic_model_
+            else:
+                self.atomic_model: T_AtomicModel = T_AtomicModel(*args, **kwargs)
+            self.precision_dict = PRECISION_DICT
+            self.reverse_precision_dict = RESERVED_PRECISON_DICT
+            self.global_pt_float_precision = GLOBAL_PT_FLOAT_PRECISION
+            self.global_pt_ener_float_precision = GLOBAL_PT_ENER_FLOAT_PRECISION
+
+        def model_output_def(self):
+            """Get the output def for the model."""
+            return ModelOutputDef(self.atomic_output_def())
+
+        @torch.jit.export
+        def model_output_type(self) -> List[str]:
+            """Get the output type for the model."""
+            output_def = self.model_output_def()
+            var_defs = output_def.var_defs
+            # jit: Comprehension ifs are not supported yet
+            # type hint is critical for JIT
+            vars: List[str] = []
+            for kk, vv in var_defs.items():
+                # .value is critical for JIT
+                if vv.category == OutputVariableCategory.OUT.value:
+                    vars.append(kk)
+            return vars
+
+        # cannot use the name forward. torch script does not work
+        def forward_common(
+            self,
+            coord,
+            atype,
+            box: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ) -> Dict[str, torch.Tensor]:
+            """Return model prediction.
+
+            Parameters
+            ----------
+            coord
+                The coordinates of the atoms.
+                shape: nf x (nloc x 3)
+            atype
+                The type of atoms. shape: nf x nloc
+            box
+                The simulation box. shape: nf x 9
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                If calculate the atomic virial.
+
+            Returns
+            -------
+            ret_dict
+                The result dict of type Dict[str,torch.Tensor].
+                The keys are defined by the `ModelOutputDef`.
+
+            """
+            cc, bb, fp, ap, input_prec = self.input_type_cast(
+                coord, box=box, fparam=fparam, aparam=aparam
+            )
+            del coord, box, fparam, aparam
+            (
+                extended_coord,
+                extended_atype,
+                mapping,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                cc,
+                atype,
+                self.get_rcut(),
+                self.get_sel(),
+                mixed_types=self.mixed_types(),
+                box=bb,
+            )
+            model_predict_lower = self.forward_common_lower(
+                extended_coord,
+                extended_atype,
+                nlist,
+                mapping,
+                do_atomic_virial=do_atomic_virial,
+                fparam=fp,
+                aparam=ap,
+            )
+            model_predict = communicate_extended_output(
+                model_predict_lower,
+                self.model_output_def(),
+                mapping,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def change_out_bias(
+            self,
+            merged,
+            origin_type_map,
+            full_type_map,
+            bias_adjust_mode="change-by-statistic",
+        ) -> None:
+            """Change the output bias of atomic model according to the input data and the pretrained model.
+
+            Parameters
+            ----------
+            merged : Union[Callable[[], List[dict]], List[dict]]
+                - List[dict]: A list of data samples from various data systems.
+                    Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                    originating from the `i`-th data system.
+                - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                    only when needed. Since the sampling process can be slow and memory-intensive,
+                    the lazy function helps by only sampling once.
+            origin_type_map : List[str]
+                The original type_map in dataset, they are targets to change the output bias.
+            full_type_map : List[str]
+                The full type_map in pre-trained model
+            bias_adjust_mode : str
+                The mode for changing output bias : ['change-by-statistic', 'set-by-statistic']
+                'change-by-statistic' : perform predictions on labels of target dataset,
+                        and do least square on the errors to obtain the target shift as bias.
+                'set-by-statistic' : directly use the statistic output bias in the target dataset.
+            """
+            self.atomic_model.change_out_bias(
+                merged,
+                origin_type_map,
+                full_type_map,
+                bias_adjust_mode=bias_adjust_mode,
+            )
+
+        def forward_common_lower(
+            self,
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+            do_atomic_virial: bool = False,
+        ):
+            """Return model prediction. Lower interface that takes
+            extended atomic coordinates and types, nlist, and mapping
+            as input, and returns the predictions on the extended region.
+            The predictions are not reduced.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x (nall x 3)
+            extended_atype
+                atomic type in extended region. nf x nall
+            nlist
+                neighbor list. nf x nloc x nsel.
+            mapping
+                mapps the extended indices to local indices. nf x nall.
+            fparam
+                frame parameter. nf x ndf
+            aparam
+                atomic parameter. nf x nloc x nda
+            do_atomic_virial
+                whether calculate atomic virial.
+
+            Returns
+            -------
+            result_dict
+                the result dict, defined by the `FittingOutputDef`.
+
+            """
+            nframes, nall = extended_atype.shape[:2]
+            extended_coord = extended_coord.view(nframes, -1, 3)
+            nlist = self.format_nlist(extended_coord, extended_atype, nlist)
+            cc_ext, _, fp, ap, input_prec = self.input_type_cast(
+                extended_coord, fparam=fparam, aparam=aparam
+            )
+            del extended_coord, fparam, aparam
+            atomic_ret = self.atomic_model.forward_common_atomic(
+                cc_ext,
+                extended_atype,
+                nlist,
+                mapping=mapping,
+                fparam=fp,
+                aparam=ap,
+            )
+            model_predict = fit_output_to_model_output(
+                atomic_ret,
+                self.atomic_output_def(),
+                cc_ext,
+                do_atomic_virial=do_atomic_virial,
+            )
+            model_predict = self.output_type_cast(model_predict, input_prec)
+            return model_predict
+
+        def input_type_cast(
+            self,
+            coord: torch.Tensor,
+            box: Optional[torch.Tensor] = None,
+            fparam: Optional[torch.Tensor] = None,
+            aparam: Optional[torch.Tensor] = None,
+        ) -> Tuple[
+            torch.Tensor,
+            Optional[torch.Tensor],
+            Optional[torch.Tensor],
+            Optional[torch.Tensor],
+            str,
+        ]:
+            """Cast the input data to global float type."""
+            input_prec = self.reverse_precision_dict[coord.dtype]
+            ###
+            ### type checking would not pass jit, convert to coord prec anyway
+            ###
+            # for vv, kk in zip([fparam, aparam], ["frame", "atomic"]):
+            #     if vv is not None and self.reverse_precision_dict[vv.dtype] != input_prec:
+            #         log.warning(
+            #           f"type of {kk} parameter {self.reverse_precision_dict[vv.dtype]}"
+            #           " does not match"
+            #           f" that of the coordinate {input_prec}"
+            #         )
+            _lst: List[Optional[torch.Tensor]] = [
+                vv.to(coord.dtype) if vv is not None else None
+                for vv in [box, fparam, aparam]
+            ]
+            box, fparam, aparam = _lst
+            if (
+                input_prec
+                == self.reverse_precision_dict[self.global_pt_float_precision]
+            ):
+                return coord, box, fparam, aparam, input_prec
+            else:
+                pp = self.global_pt_float_precision
+                return (
+                    coord.to(pp),
+                    box.to(pp) if box is not None else None,
+                    fparam.to(pp) if fparam is not None else None,
+                    aparam.to(pp) if aparam is not None else None,
+                    input_prec,
+                )
+
+        def output_type_cast(
+            self,
+            model_ret: Dict[str, torch.Tensor],
+            input_prec: str,
+        ) -> Dict[str, torch.Tensor]:
+            """Convert the model output to the input prec."""
+            do_cast = (
+                input_prec
+                != self.reverse_precision_dict[self.global_pt_float_precision]
+            )
+            pp = self.precision_dict[input_prec]
+            odef = self.model_output_def()
+            for kk in odef.keys():
+                if kk not in model_ret.keys():
+                    # do not return energy_derv_c if not do_atomic_virial
+                    continue
+                if check_operation_applied(odef[kk], OutputVariableOperation.REDU):
+                    model_ret[kk] = (
+                        model_ret[kk].to(self.global_pt_ener_float_precision)
+                        if model_ret[kk] is not None
+                        else None
+                    )
+                elif do_cast:
+                    model_ret[kk] = (
+                        model_ret[kk].to(pp) if model_ret[kk] is not None else None
+                    )
+            return model_ret
+
+        def format_nlist(
+            self,
+            extended_coord: torch.Tensor,
+            extended_atype: torch.Tensor,
+            nlist: torch.Tensor,
+        ):
+            """Format the neighbor list.
+
+            1. If the number of neighbors in the `nlist` is equal to sum(self.sel),
+            it does nothong
+
+            2. If the number of neighbors in the `nlist` is smaller than sum(self.sel),
+            the `nlist` is pad with -1.
+
+            3. If the number of neighbors in the `nlist` is larger than sum(self.sel),
+            the nearest sum(sel) neighbors will be preseved.
+
+            Known limitations:
+
+            In the case of not self.mixed_types, the nlist is always formatted.
+            May have side effact on the efficiency.
+
+            Parameters
+            ----------
+            extended_coord
+                coodinates in extended region. nf x nall x 3
+            extended_atype
+                atomic type in extended region. nf x nall
+            nlist
+                neighbor list. nf x nloc x nsel
+
+            Returns
+            -------
+            formated_nlist
+                the formated nlist.
+
+            """
+            mixed_types = self.mixed_types()
+            nlist = self._format_nlist(extended_coord, nlist, sum(self.get_sel()))
+            if not mixed_types:
+                nlist = nlist_distinguish_types(nlist, extended_atype, self.get_sel())
+            return nlist
+
+        def _format_nlist(
+            self,
+            extended_coord: torch.Tensor,
+            nlist: torch.Tensor,
+            nnei: int,
+        ):
+            n_nf, n_nloc, n_nnei = nlist.shape
+            # nf x nall x 3
+            extended_coord = extended_coord.view([n_nf, -1, 3])
+            rcut = self.get_rcut()
+
+            if n_nnei < nnei:
+                nlist = torch.cat(
+                    [
+                        nlist,
+                        -1
+                        * torch.ones(
+                            [n_nf, n_nloc, nnei - n_nnei],
+                            dtype=nlist.dtype,
+                            device=nlist.device,
+                        ),
+                    ],
+                    dim=-1,
+                )
+            elif n_nnei > nnei:
+                m_real_nei = nlist >= 0
+                nlist = torch.where(m_real_nei, nlist, 0)
+                # nf x nloc x 3
+                coord0 = extended_coord[:, :n_nloc, :]
+                # nf x (nloc x nnei) x 3
+                index = nlist.view(n_nf, n_nloc * n_nnei, 1).expand(-1, -1, 3)
+                coord1 = torch.gather(extended_coord, 1, index)
+                # nf x nloc x nnei x 3
+                coord1 = coord1.view(n_nf, n_nloc, n_nnei, 3)
+                # nf x nloc x nnei
+                rr = torch.linalg.norm(coord0[:, :, None, :] - coord1, dim=-1)
+                rr = torch.where(m_real_nei, rr, float("inf"))
+                rr, nlist_mapping = torch.sort(rr, dim=-1)
+                nlist = torch.gather(nlist, 2, nlist_mapping)
+                nlist = torch.where(rr > rcut, -1, nlist)
+                nlist = nlist[..., :nnei]
+            else:  # n_nnei == nnei:
+                pass  # great!
+            assert nlist.shape[-1] == nnei
+            return nlist
+
+        def do_grad_r(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is r_differentiable.
+            if var_name is None, returns if any of the variable is r_differentiable.
+            """
+            return self.atomic_model.do_grad_r(var_name)
+
+        def do_grad_c(
+            self,
+            var_name: Optional[str] = None,
+        ) -> bool:
+            """Tell if the output variable `var_name` is c_differentiable.
+            if var_name is None, returns if any of the variable is c_differentiable.
+            """
+            return self.atomic_model.do_grad_c(var_name)
+
+        def serialize(self) -> dict:
+            return self.atomic_model.serialize()
+
+        @classmethod
+        def deserialize(cls, data) -> "CM":
+            return cls(atomic_model_=T_AtomicModel.deserialize(data))
+
+        @torch.jit.export
+        def get_dim_fparam(self) -> int:
+            """Get the number (dimension) of frame parameters of this atomic model."""
+            return self.atomic_model.get_dim_fparam()
+
+        @torch.jit.export
+        def get_dim_aparam(self) -> int:
+            """Get the number (dimension) of atomic parameters of this atomic model."""
+            return self.atomic_model.get_dim_aparam()
+
+        @torch.jit.export
+        def get_sel_type(self) -> List[int]:
+            """Get the selected atom types of this model.
+
+            Only atoms with selected atom types have atomic contribution
+            to the result of the model.
+            If returning an empty list, all atom types are selected.
+            """
+            return self.atomic_model.get_sel_type()
+
+        @torch.jit.export
+        def is_aparam_nall(self) -> bool:
+            """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+            If False, the shape is (nframes, nloc, ndim).
+            """
+            return self.atomic_model.is_aparam_nall()
+
+        @torch.jit.export
+        def get_rcut(self) -> float:
+            """Get the cut-off radius."""
+            return self.atomic_model.get_rcut()
+
+        @torch.jit.export
+        def get_type_map(self) -> List[str]:
+            """Get the type map."""
+            return self.atomic_model.get_type_map()
+
+        @torch.jit.export
+        def get_nsel(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nsel()
+
+        @torch.jit.export
+        def get_nnei(self) -> int:
+            """Returns the total number of selected neighboring atoms in the cut-off radius."""
+            return self.atomic_model.get_nnei()
+
+        def atomic_output_def(self) -> FittingOutputDef:
+            """Get the output def of the atomic model."""
+            return self.atomic_model.atomic_output_def()
+
+        def compute_or_load_stat(
+            self,
+            sampled_func,
+            stat_file_path: Optional[DPPath] = None,
+        ):
+            """Compute or load the statistics."""
+            return self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
+
+        def get_sel(self) -> List[int]:
+            """Returns the number of selected atoms for each type."""
+            return self.atomic_model.get_sel()
+
+        def mixed_types(self) -> bool:
+            """If true, the model
+            1. assumes total number of atoms aligned across frames;
+            2. uses a neighbor list that does not distinguish different atomic types.
+
+            If false, the model
+            1. assumes total number of atoms of each atom type aligned across frames;
+            2. uses a neighbor list that distinguishes different atomic types.
+
+            """
+            return self.atomic_model.mixed_types()
+
+    return CM
diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
new file mode 100644
index 0000000000..bf97472e33
--- /dev/null
+++ b/deepmd/pt/model/model/model.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel.model.base_model import (
+    make_base_model,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+class BaseModel(torch.nn.Module, make_base_model()):
+    def __init__(self, *args, **kwargs):
+        """Construct a basic model for different tasks."""
+        torch.nn.Module.__init__(self)
+        self.model_def_script = ""
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The sampled data frames from different data systems.
+        stat_file_path
+            The path to the statistics files.
+        """
+        raise NotImplementedError
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.model_def_script
+
+    @torch.jit.export
+    def get_ntypes(self):
+        """Returns the number of element types."""
+        return len(self.get_type_map())
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
new file mode 100644
index 0000000000..403058aa47
--- /dev/null
+++ b/deepmd/pt/model/model/polar_model.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    Optional,
+)
+
+import torch
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class PolarModel(DPModel):
+    model_type = "polar"
+
+    def __init__(
+        self,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["polar"] = model_ret["polar"]
+            model_predict["global_polar"] = model_ret["polar_redu"]
+            if "mask" in model_ret:
+                model_predict["mask"] = model_ret["mask"]
+        else:
+            model_predict = model_ret
+            model_predict["updated_coord"] += coord
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        if self.get_fitting_net() is not None:
+            model_predict = {}
+            model_predict["polar"] = model_ret["polar"]
+            model_predict["global_polar"] = model_ret["polar_redu"]
+        else:
+            model_predict = model_ret
+        return model_predict
diff --git a/deepmd/pt/model/model/spin_model.py b/deepmd/pt/model/model/spin_model.py
new file mode 100644
index 0000000000..df2f48e2e4
--- /dev/null
+++ b/deepmd/pt/model/model/spin_model.py
@@ -0,0 +1,560 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.pt.utils.utils import (
+    to_torch_tensor,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+
+from .dp_model import (
+    DPModel,
+)
+
+
+class SpinModel(torch.nn.Module):
+    """A spin model wrapper, with spin input preprocess and output split."""
+
+    def __init__(
+        self,
+        backbone_model,
+        spin: Spin,
+    ):
+        super().__init__()
+        self.backbone_model = backbone_model
+        self.spin = spin
+        self.ntypes_real = self.spin.ntypes_real
+        self.virtual_scale_mask = to_torch_tensor(self.spin.get_virtual_scale_mask())
+        self.spin_mask = to_torch_tensor(self.spin.get_spin_mask())
+
+    def process_spin_input(self, coord, atype, spin):
+        """Generate virtual coordinates and types, concat into the input."""
+        nframes, nloc = coord.shape[:-1]
+        atype_spin = torch.concat([atype, atype + self.ntypes_real], dim=-1)
+        virtual_coord = coord + spin * self.virtual_scale_mask[atype].reshape(
+            [nframes, nloc, 1]
+        )
+        coord_spin = torch.concat([coord, virtual_coord], dim=-2)
+        return coord_spin, atype_spin
+
+    def process_spin_input_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        """
+        Add `extended_spin` into `extended_coord` to generate virtual atoms, and extend `nlist` and `mapping`.
+        Note that the final `extended_coord_updated` with shape [nframes, nall + nall, 3] has the following order:
+        - [:, :nloc]: original nloc real atoms.
+        - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+        - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+        - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+        """
+        nframes, nall = extended_coord.shape[:2]
+        nloc = nlist.shape[1]
+        virtual_extended_coord = (
+            extended_coord
+            + extended_spin
+            * self.virtual_scale_mask[extended_atype].reshape([nframes, nall, 1])
+        )
+        virtual_extended_atype = extended_atype + self.ntypes_real
+        extended_coord_updated = self.concat_switch_virtual(
+            extended_coord, virtual_extended_coord, nloc
+        )
+        extended_atype_updated = self.concat_switch_virtual(
+            extended_atype, virtual_extended_atype, nloc
+        )
+        if mapping is not None:
+            virtual_mapping = mapping + nloc
+            mapping_updated = self.concat_switch_virtual(mapping, virtual_mapping, nloc)
+        else:
+            mapping_updated = None
+        # extend the nlist
+        nlist_updated = self.extend_nlist(extended_atype, nlist)
+        return (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        )
+
+    def process_spin_output(
+        self, atype, out_tensor, add_mag: bool = True, virtual_scale: bool = True
+    ):
+        """
+        Split the output both real and virtual atoms, and scale the latter.
+        add_mag: whether to add magnetic tensor onto the real tensor.
+            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+        virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
+            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+        """
+        nframes, nloc_double = out_tensor.shape[:2]
+        nloc = nloc_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask
+        else:
+            virtual_scale_mask = self.spin_mask
+        atomic_mask = virtual_scale_mask[atype].reshape([nframes, nloc, 1])
+        out_real, out_mag = torch.split(out_tensor, [nloc, nloc], dim=1)
+        if add_mag:
+            out_real = out_real + out_mag
+        out_mag = (out_mag.view([nframes, nloc, -1]) * atomic_mask).view(out_mag.shape)
+        return out_real, out_mag, atomic_mask > 0.0
+
+    def process_spin_output_lower(
+        self,
+        extended_atype,
+        extended_out_tensor,
+        nloc: int,
+        add_mag: bool = True,
+        virtual_scale: bool = True,
+    ):
+        """
+        Split the extended output of both real and virtual atoms with switch, and scale the latter.
+        add_mag: whether to add magnetic tensor onto the real tensor.
+            Default: True. e.g. Ture for forces and False for atomic virials on real atoms.
+        virtual_scale: whether to scale the magnetic tensor with virtual scale factor.
+            Default: True. e.g. Ture for forces and False for atomic virials on virtual atoms.
+        """
+        nframes, nall_double = extended_out_tensor.shape[:2]
+        nall = nall_double // 2
+        if virtual_scale:
+            virtual_scale_mask = self.virtual_scale_mask
+        else:
+            virtual_scale_mask = self.spin_mask
+        atomic_mask = virtual_scale_mask[extended_atype].reshape([nframes, nall, 1])
+        extended_out_real = torch.cat(
+            [
+                extended_out_tensor[:, :nloc],
+                extended_out_tensor[:, nloc + nloc : nloc + nall],
+            ],
+            dim=1,
+        )
+        extended_out_mag = torch.cat(
+            [
+                extended_out_tensor[:, nloc : nloc + nloc],
+                extended_out_tensor[:, nloc + nall :],
+            ],
+            dim=1,
+        )
+        if add_mag:
+            extended_out_real = extended_out_real + extended_out_mag
+        extended_out_mag = (
+            extended_out_mag.view([nframes, nall, -1]) * atomic_mask
+        ).view(extended_out_mag.shape)
+        return extended_out_real, extended_out_mag, atomic_mask > 0.0
+
+    @staticmethod
+    def extend_nlist(extended_atype, nlist):
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_atype.shape[1]
+        nlist_mask = nlist != -1
+        nlist[nlist == -1] = 0
+        nlist_shift = nlist + nall
+        nlist[~nlist_mask] = -1
+        nlist_shift[~nlist_mask] = -1
+        self_spin = torch.arange(0, nloc, dtype=nlist.dtype, device=nlist.device) + nall
+        self_spin = self_spin.view(1, -1, 1).expand(nframes, -1, -1)
+        # self spin + real neighbor + virtual neighbor
+        # nf x nloc x (1 + nnei + nnei)
+        extended_nlist = torch.cat([self_spin, nlist, nlist_shift], dim=-1)
+        # nf x (nloc + nloc) x (1 + nnei + nnei)
+        extended_nlist = torch.cat(
+            [extended_nlist, -1 * torch.ones_like(extended_nlist)], dim=-2
+        )
+        # update the index for switch
+        first_part_index = (nloc <= extended_nlist) & (extended_nlist < nall)
+        second_part_index = (nall <= extended_nlist) & (extended_nlist < (nall + nloc))
+        extended_nlist[first_part_index] += nloc
+        extended_nlist[second_part_index] -= nall - nloc
+        return extended_nlist
+
+    @staticmethod
+    def concat_switch_virtual(extended_tensor, extended_tensor_virtual, nloc: int):
+        """
+        Concat real and virtual extended tensors, and switch all the local ones to the first nloc * 2 atoms.
+        - [:, :nloc]: original nloc real atoms.
+        - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+        - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+        - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+        """
+        nframes, nall = extended_tensor.shape[:2]
+        out_shape = list(extended_tensor.shape)
+        out_shape[1] *= 2
+        extended_tensor_updated = torch.zeros(
+            out_shape,
+            dtype=extended_tensor.dtype,
+            device=extended_tensor.device,
+        )
+        extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc]
+        extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[
+            :, :nloc
+        ]
+        extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[
+            :, nloc:
+        ]
+        extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:]
+        return extended_tensor_updated.view(out_shape)
+
+    @staticmethod
+    def expand_aparam(aparam, nloc: int):
+        """Expand the atom parameters for virtual atoms if necessary."""
+        nframes, natom, numb_aparam = aparam.shape[1:]
+        if natom == nloc:  # good
+            pass
+        elif natom < nloc:  # for spin with virtual atoms
+            aparam = torch.concat(
+                [
+                    aparam,
+                    torch.zeros(
+                        [nframes, nloc - natom, numb_aparam],
+                        device=aparam.device,
+                        dtype=aparam.dtype,
+                    ),
+                ],
+                dim=1,
+            )
+        else:
+            raise ValueError(
+                f"get an input aparam with {aparam.shape[1]} inputs, ",
+                f"which is larger than {nloc} atoms.",
+            )
+        return aparam
+
+    @torch.jit.export
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        tmap = self.backbone_model.get_type_map()
+        ntypes = len(tmap) // 2  # ignore the virtual type
+        return tmap[:ntypes]
+
+    @torch.jit.export
+    def get_rcut(self):
+        """Get the cut-off radius."""
+        return self.backbone_model.get_rcut()
+
+    @torch.jit.export
+    def get_dim_fparam(self):
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.backbone_model.get_dim_fparam()
+
+    @torch.jit.export
+    def get_dim_aparam(self):
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.backbone_model.get_dim_aparam()
+
+    @torch.jit.export
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.backbone_model.get_sel_type()
+
+    @torch.jit.export
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.backbone_model.is_aparam_nall()
+
+    @torch.jit.export
+    def model_output_type(self) -> List[str]:
+        """Get the output type for the model."""
+        return self.backbone_model.model_output_type()
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        return self.backbone_model.get_model_def_script()
+
+    @torch.jit.export
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        # for C++ interface
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nnei() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nnei()
+
+    @torch.jit.export
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        if not self.backbone_model.mixed_types():
+            return self.backbone_model.get_nsel() // 2  # ignore the virtual selected
+        else:
+            return self.backbone_model.get_nsel()
+
+    @torch.jit.export
+    def has_spin(self) -> bool:
+        """Returns whether it has spin input and output."""
+        return True
+
+    def __getattr__(self, name):
+        """Get attribute from the wrapped model."""
+        if (
+            name == "backbone_model"
+        ):  # torch.nn.Module will exclude modules to self.__dict__["_modules"]
+            return self.__dict__["_modules"]["backbone_model"]
+        elif name in self.__dict__:
+            return self.__dict__[name]
+        else:
+            return getattr(self.backbone_model, name)
+
+    def compute_or_load_stat(
+        self,
+        sampled_func,
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        """
+
+        @functools.lru_cache
+        def spin_sampled_func():
+            sampled = sampled_func()
+            spin_sampled = []
+            for sys in sampled:
+                coord_updated, atype_updated = self.process_spin_input(
+                    sys["coord"], sys["atype"], sys["spin"]
+                )
+                tmp_dict = {
+                    "coord": coord_updated,
+                    "atype": atype_updated,
+                }
+                if "natoms" in sys:
+                    natoms = sys["natoms"]
+                    tmp_dict["natoms"] = torch.cat(
+                        [2 * natoms[:, :2], natoms[:, 2:], natoms[:, 2:]], dim=-1
+                    )
+                for item_key in sys.keys():
+                    if item_key not in ["coord", "atype", "spin", "natoms"]:
+                        tmp_dict[item_key] = sys[item_key]
+                spin_sampled.append(tmp_dict)
+            return spin_sampled
+
+        self.backbone_model.compute_or_load_stat(spin_sampled_func, stat_file_path)
+
+    def forward_common(
+        self,
+        coord,
+        atype,
+        spin,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        nframes, nloc = coord.shape[:2]
+        coord_updated, atype_updated = self.process_spin_input(coord, atype, spin)
+        model_ret = self.backbone_model.forward_common(
+            coord_updated,
+            atype_updated,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_ret[f"{var_name}"] = torch.split(
+            model_ret[f"{var_name}"], [nloc, nloc], dim=1
+        )[0]
+        if self.backbone_model.do_grad_r(var_name):
+            (
+                model_ret[f"{var_name}_derv_r"],
+                model_ret[f"{var_name}_derv_r_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output(atype, model_ret[f"{var_name}_derv_r"])
+        if self.backbone_model.do_grad_c(var_name) and do_atomic_virial:
+            (
+                model_ret[f"{var_name}_derv_c"],
+                model_ret[f"{var_name}_derv_c_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output(
+                atype,
+                model_ret[f"{var_name}_derv_c"],
+                add_mag=False,
+                virtual_scale=False,
+            )
+        return model_ret
+
+    def forward_common_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        nframes, nloc = nlist.shape[:2]
+        (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        ) = self.process_spin_input_lower(
+            extended_coord, extended_atype, extended_spin, nlist, mapping=mapping
+        )
+        model_ret = self.backbone_model.forward_common_lower(
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping=mapping_updated,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_output_type = self.backbone_model.model_output_type()
+        if "mask" in model_output_type:
+            model_output_type.pop(model_output_type.index("mask"))
+        var_name = model_output_type[0]
+        model_ret[f"{var_name}"] = torch.split(
+            model_ret[f"{var_name}"], [nloc, nloc], dim=1
+        )[0]
+        if self.backbone_model.do_grad_r(var_name):
+            (
+                model_ret[f"{var_name}_derv_r"],
+                model_ret[f"{var_name}_derv_r_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output_lower(
+                extended_atype, model_ret[f"{var_name}_derv_r"], nloc
+            )
+        if self.backbone_model.do_grad_c(var_name) and do_atomic_virial:
+            (
+                model_ret[f"{var_name}_derv_c"],
+                model_ret[f"{var_name}_derv_c_mag"],
+                model_ret["mask_mag"],
+            ) = self.process_spin_output_lower(
+                extended_atype,
+                model_ret[f"{var_name}_derv_c"],
+                nloc,
+                add_mag=False,
+                virtual_scale=False,
+            )
+        return model_ret
+
+    def serialize(self) -> dict:
+        return {
+            "backbone_model": self.backbone_model.serialize(),
+            "spin": self.spin.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data) -> "SpinModel":
+        backbone_model_obj = DPModel.deserialize(data["backbone_model"])
+        spin = Spin.deserialize(data["spin"])
+        return cls(
+            backbone_model=backbone_model_obj,
+            spin=spin,
+        )
+
+
+class SpinEnergyModel(SpinModel):
+    """A spin model for energy."""
+
+    model_type = "ener"
+
+    def __init__(
+        self,
+        backbone_model,
+        spin: Spin,
+    ):
+        super().__init__(backbone_model, spin)
+
+    def forward(
+        self,
+        coord,
+        atype,
+        spin,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        if aparam is not None:
+            aparam = self.expand_aparam(aparam, coord.shape[1])
+        model_ret = self.forward_common(
+            coord,
+            atype,
+            spin,
+            box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        model_predict["mask_mag"] = model_ret["mask_mag"]
+        if self.backbone_model.do_grad_r("energy"):
+            model_predict["force"] = model_ret["energy_derv_r"].squeeze(-2)
+            model_predict["force_mag"] = model_ret["energy_derv_r_mag"].squeeze(-2)
+        # not support virial by far
+        return model_predict
+
+    @torch.jit.export
+    def forward_lower(
+        self,
+        extended_coord,
+        extended_atype,
+        extended_spin,
+        nlist,
+        mapping: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ):
+        model_ret = self.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            extended_spin,
+            nlist,
+            mapping=mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+        model_predict = {}
+        model_predict["atom_energy"] = model_ret["energy"]
+        model_predict["energy"] = model_ret["energy_redu"]
+        model_predict["mask_mag"] = model_ret["mask_mag"]
+        if self.backbone_model.do_grad_r("energy"):
+            model_predict["extended_force"] = model_ret["energy_derv_r"].squeeze(-2)
+            model_predict["extended_force_mag"] = model_ret[
+                "energy_derv_r_mag"
+            ].squeeze(-2)
+        # not support virial by far
+        return model_predict
diff --git a/deepmd/pt/model/model/transform_output.py b/deepmd/pt/model/model/transform_output.py
new file mode 100644
index 0000000000..730e6b29d0
--- /dev/null
+++ b/deepmd/pt/model/model/transform_output.py
@@ -0,0 +1,249 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    ModelOutputDef,
+    OutputVariableDef,
+    get_deriv_name,
+    get_reduce_name,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+
+def atomic_virial_corr(
+    extended_coord: torch.Tensor,
+    atom_energy: torch.Tensor,
+):
+    nall = extended_coord.shape[1]
+    nloc = atom_energy.shape[1]
+    coord, _ = torch.split(extended_coord, [nloc, nall - nloc], dim=1)
+    # no derivative with respect to the loc coord.
+    coord = coord.detach()
+    ce = coord * atom_energy
+    sumce0, sumce1, sumce2 = torch.split(torch.sum(ce, dim=1), [1, 1, 1], dim=-1)
+    faked_grad = torch.ones_like(sumce0)
+    lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad])
+    extended_virial_corr0 = torch.autograd.grad(
+        [sumce0], [extended_coord], grad_outputs=lst, create_graph=True
+    )[0]
+    assert extended_virial_corr0 is not None
+    extended_virial_corr1 = torch.autograd.grad(
+        [sumce1], [extended_coord], grad_outputs=lst, create_graph=True
+    )[0]
+    assert extended_virial_corr1 is not None
+    extended_virial_corr2 = torch.autograd.grad(
+        [sumce2], [extended_coord], grad_outputs=lst, create_graph=True
+    )[0]
+    assert extended_virial_corr2 is not None
+    extended_virial_corr = torch.concat(
+        [
+            extended_virial_corr0.unsqueeze(-1),
+            extended_virial_corr1.unsqueeze(-1),
+            extended_virial_corr2.unsqueeze(-1),
+        ],
+        dim=-1,
+    )
+    return extended_virial_corr
+
+
+def task_deriv_one(
+    atom_energy: torch.Tensor,
+    energy: torch.Tensor,
+    extended_coord: torch.Tensor,
+    do_virial: bool = True,
+    do_atomic_virial: bool = False,
+):
+    faked_grad = torch.ones_like(energy)
+    lst = torch.jit.annotate(List[Optional[torch.Tensor]], [faked_grad])
+    extended_force = torch.autograd.grad(
+        [energy], [extended_coord], grad_outputs=lst, create_graph=True
+    )[0]
+    assert extended_force is not None
+    extended_force = -extended_force
+    if do_virial:
+        extended_virial = extended_force.unsqueeze(-1) @ extended_coord.unsqueeze(-2)
+        # the correction sums to zero, which does not contribute to global virial
+        if do_atomic_virial:
+            extended_virial_corr = atomic_virial_corr(extended_coord, atom_energy)
+            extended_virial = extended_virial + extended_virial_corr
+        # to [...,3,3] -> [...,9]
+        extended_virial = extended_virial.view(list(extended_virial.shape[:-2]) + [9])  # noqa:RUF005
+    else:
+        extended_virial = None
+    return extended_force, extended_virial
+
+
+def get_leading_dims(
+    vv: torch.Tensor,
+    vdef: OutputVariableDef,
+):
+    """Get the dimensions of nf x nloc."""
+    vshape = vv.shape
+    return list(vshape[: (len(vshape) - len(vdef.shape))])
+
+
+def get_atom_axis(
+    vdef: torch.Tensor,
+):
+    """Get the axis of atoms."""
+    atom_axis = -(len(vdef.shape) + 1)
+    return atom_axis
+
+
+def take_deriv(
+    vv: torch.Tensor,
+    svv: torch.Tensor,
+    vdef: OutputVariableDef,
+    coord_ext: torch.Tensor,
+    do_virial: bool = False,
+    do_atomic_virial: bool = False,
+):
+    size = 1
+    for ii in vdef.shape:
+        size *= ii
+    vv1 = vv.view(list(get_leading_dims(vv, vdef)) + [size])  # noqa: RUF005
+    svv1 = svv.view(list(get_leading_dims(svv, vdef)) + [size])  # noqa: RUF005
+    split_vv1 = torch.split(vv1, [1] * size, dim=-1)
+    split_svv1 = torch.split(svv1, [1] * size, dim=-1)
+    split_ff, split_avir = [], []
+    for vvi, svvi in zip(split_vv1, split_svv1):
+        # nf x nloc x 3, nf x nloc x 9
+        ffi, aviri = task_deriv_one(
+            vvi,
+            svvi,
+            coord_ext,
+            do_virial=do_virial,
+            do_atomic_virial=do_atomic_virial,
+        )
+        # nf x nloc x 1 x 3, nf x nloc x 1 x 9
+        ffi = ffi.unsqueeze(-2)
+        split_ff.append(ffi)
+        if do_virial:
+            assert aviri is not None
+            aviri = aviri.unsqueeze(-2)
+            split_avir.append(aviri)
+    # nf x nall x v_dim x 3, nf x nall x v_dim x 9
+    out_lead_shape = list(coord_ext.shape[:-1]) + vdef.shape
+    ff = torch.concat(split_ff, dim=-2).view(out_lead_shape + [3])  # noqa: RUF005
+    if do_virial:
+        avir = torch.concat(split_avir, dim=-2).view(out_lead_shape + [9])  # noqa: RUF005
+    else:
+        avir = None
+    return ff, avir
+
+
+def fit_output_to_model_output(
+    fit_ret: Dict[str, torch.Tensor],
+    fit_output_def: FittingOutputDef,
+    coord_ext: torch.Tensor,
+    do_atomic_virial: bool = False,
+) -> Dict[str, torch.Tensor]:
+    """Transform the output of the fitting network to
+    the model output.
+
+    """
+    redu_prec = env.GLOBAL_PT_ENER_FLOAT_PRECISION
+    model_ret = dict(fit_ret.items())
+    for kk, vv in fit_ret.items():
+        vdef = fit_output_def[kk]
+        shap = vdef.shape
+        atom_axis = -(len(shap) + 1)
+        if vdef.reduciable:
+            kk_redu = get_reduce_name(kk)
+            model_ret[kk_redu] = torch.sum(vv.to(redu_prec), dim=atom_axis)
+            if vdef.r_differentiable:
+                kk_derv_r, kk_derv_c = get_deriv_name(kk)
+                dr, dc = take_deriv(
+                    vv,
+                    model_ret[kk_redu],
+                    vdef,
+                    coord_ext,
+                    do_virial=vdef.c_differentiable,
+                    do_atomic_virial=do_atomic_virial,
+                )
+                model_ret[kk_derv_r] = dr
+                if vdef.c_differentiable:
+                    assert dc is not None
+                    model_ret[kk_derv_c] = dc
+                    model_ret[kk_derv_c + "_redu"] = torch.sum(
+                        model_ret[kk_derv_c].to(redu_prec), dim=1
+                    )
+    return model_ret
+
+
+def communicate_extended_output(
+    model_ret: Dict[str, torch.Tensor],
+    model_output_def: ModelOutputDef,
+    mapping: torch.Tensor,  # nf x nloc
+    do_atomic_virial: bool = False,
+) -> Dict[str, torch.Tensor]:
+    """Transform the output of the model network defined on
+    local and ghost (extended) atoms to local atoms.
+
+    """
+    redu_prec = env.GLOBAL_PT_ENER_FLOAT_PRECISION
+    new_ret = {}
+    for kk in model_output_def.keys_outp():
+        vv = model_ret[kk]
+        vdef = model_output_def[kk]
+        new_ret[kk] = vv
+        if vdef.reduciable:
+            kk_redu = get_reduce_name(kk)
+            new_ret[kk_redu] = model_ret[kk_redu]
+            # nf x nloc
+            vldims = get_leading_dims(vv, vdef)
+            # nf x nall
+            mldims = list(mapping.shape)
+            kk_derv_r, kk_derv_c = get_deriv_name(kk)
+            if vdef.r_differentiable:
+                # vdim x 3
+                derv_r_ext_dims = list(vdef.shape) + [3]  # noqa:RUF005
+                mapping = mapping.view(mldims + [1] * len(derv_r_ext_dims)).expand(
+                    [-1] * len(mldims) + derv_r_ext_dims
+                )
+                force = torch.zeros(
+                    vldims + derv_r_ext_dims, dtype=vv.dtype, device=vv.device
+                )
+                # nf x nloc x nvar x 3
+                new_ret[kk_derv_r] = torch.scatter_reduce(
+                    force,
+                    1,
+                    index=mapping,
+                    src=model_ret[kk_derv_r],
+                    reduce="sum",
+                )
+            if vdef.c_differentiable:
+                assert vdef.r_differentiable
+                derv_c_ext_dims = list(vdef.shape) + [9]  # noqa:RUF005
+                # nf x nloc x nvar x 3 -> nf x nloc x nvar x 9
+                mapping = torch.tile(
+                    mapping,
+                    [1] * (len(mldims) + len(vdef.shape)) + [3],
+                )
+                virial = torch.zeros(
+                    vldims + derv_c_ext_dims, dtype=vv.dtype, device=vv.device
+                )
+                # nf x nloc x nvar x 9
+                new_ret[kk_derv_c] = torch.scatter_reduce(
+                    virial,
+                    1,
+                    index=mapping,
+                    src=model_ret[kk_derv_c],
+                    reduce="sum",
+                )
+                new_ret[kk_derv_c + "_redu"] = torch.sum(
+                    new_ret[kk_derv_c].to(redu_prec), dim=1
+                )
+                if not do_atomic_virial:
+                    # pop atomic virial, because it is not correctly calculated.
+                    new_ret.pop(kk_derv_c)
+    return new_ret
diff --git a/deepmd/pt/model/network/__init__.py b/deepmd/pt/model/network/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pt/model/network/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
new file mode 100644
index 0000000000..762461111e
--- /dev/null
+++ b/deepmd/pt/model/network/mlp.py
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    ClassVar,
+    Dict,
+    Optional,
+)
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from deepmd.pt.utils import (
+    env,
+)
+
+device = env.DEVICE
+
+from deepmd.dpmodel.utils import (
+    NativeLayer,
+)
+from deepmd.dpmodel.utils import NetworkCollection as DPNetworkCollection
+from deepmd.dpmodel.utils import (
+    make_embedding_network,
+    make_fitting_network,
+    make_multilayer_network,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pt.utils.utils import (
+    ActivationFn,
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+try:
+    from deepmd._version import version as __version__
+except ImportError:
+    __version__ = "unknown"
+
+
+def empty_t(shape, precision):
+    return torch.empty(shape, dtype=precision, device=device)
+
+
+class MLPLayer(nn.Module):
+    def __init__(
+        self,
+        num_in,
+        num_out,
+        bias: bool = True,
+        use_timestep: bool = False,
+        activation_function: Optional[str] = None,
+        resnet: bool = False,
+        bavg: float = 0.0,
+        stddev: float = 1.0,
+        precision: str = DEFAULT_PRECISION,
+    ):
+        super().__init__()
+        # only use_timestep when skip connection is established.
+        self.use_timestep = use_timestep and (
+            num_out == num_in or num_out == num_in * 2
+        )
+        self.activate_name = activation_function
+        self.activate = ActivationFn(self.activate_name)
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.matrix = nn.Parameter(data=empty_t((num_in, num_out), self.prec))
+        nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in))
+        if bias:
+            self.bias = nn.Parameter(
+                data=empty_t([num_out], self.prec),
+            )
+            nn.init.normal_(self.bias.data, mean=bavg, std=stddev)
+        else:
+            self.bias = None
+        if self.use_timestep:
+            self.idt = nn.Parameter(data=empty_t([num_out], self.prec))
+            nn.init.normal_(self.idt.data, mean=0.1, std=0.001)
+        else:
+            self.idt = None
+        self.resnet = resnet
+
+    def check_type_consistency(self):
+        precision = self.precision
+
+        def check_var(var):
+            if var is not None:
+                # assertion "float64" == "double" would fail
+                assert PRECISION_DICT[var.dtype.name] is PRECISION_DICT[precision]
+
+        check_var(self.w)
+        check_var(self.b)
+        check_var(self.idt)
+
+    def dim_in(self) -> int:
+        return self.matrix.shape[0]
+
+    def dim_out(self) -> int:
+        return self.matrix.shape[1]
+
+    def forward(
+        self,
+        xx: torch.Tensor,
+    ) -> torch.Tensor:
+        """One MLP layer used by DP model.
+
+        Parameters
+        ----------
+        xx : torch.Tensor
+            The input.
+
+        Returns
+        -------
+        yy: torch.Tensor
+            The output.
+        """
+        ori_prec = xx.dtype
+        xx = xx.to(self.prec)
+        yy = (
+            torch.matmul(xx, self.matrix) + self.bias
+            if self.bias is not None
+            else torch.matmul(xx, self.matrix)
+        )
+        yy = self.activate(yy).clone()
+        yy = yy * self.idt if self.idt is not None else yy
+        if self.resnet:
+            if xx.shape[-1] == yy.shape[-1]:
+                yy += xx
+            elif 2 * xx.shape[-1] == yy.shape[-1]:
+                yy += torch.concat([xx, xx], dim=-1)
+            else:
+                yy = yy
+        yy = yy.to(ori_prec)
+        return yy
+
+    def serialize(self) -> dict:
+        """Serialize the layer to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized layer.
+        """
+        nl = NativeLayer(
+            self.matrix.shape[0],
+            self.matrix.shape[1],
+            bias=self.bias is not None,
+            use_timestep=self.idt is not None,
+            activation_function=self.activate_name,
+            resnet=self.resnet,
+            precision=self.precision,
+        )
+        nl.w, nl.b, nl.idt = (
+            to_numpy_array(self.matrix),
+            to_numpy_array(self.bias),
+            to_numpy_array(self.idt),
+        )
+        return nl.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "MLPLayer":
+        """Deserialize the layer from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        nl = NativeLayer.deserialize(data)
+        obj = cls(
+            nl["matrix"].shape[0],
+            nl["matrix"].shape[1],
+            bias=nl["bias"] is not None,
+            use_timestep=nl["idt"] is not None,
+            activation_function=nl["activation_function"],
+            resnet=nl["resnet"],
+            precision=nl["precision"],
+        )
+        prec = PRECISION_DICT[obj.precision]
+
+        def check_load_param(ss):
+            return (
+                nn.Parameter(data=to_torch_tensor(nl[ss]))
+                if nl[ss] is not None
+                else None
+            )
+
+        obj.matrix = check_load_param("matrix")
+        obj.bias = check_load_param("bias")
+        obj.idt = check_load_param("idt")
+        return obj
+
+
+MLP_ = make_multilayer_network(MLPLayer, nn.Module)
+
+
+class MLP(MLP_):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.layers = torch.nn.ModuleList(self.layers)
+
+    forward = MLP_.call
+
+
+EmbeddingNet = make_embedding_network(MLP, MLPLayer)
+
+FittingNet = make_fitting_network(EmbeddingNet, MLP, MLPLayer)
+
+
+class NetworkCollection(DPNetworkCollection, nn.Module):
+    """PyTorch implementation of NetworkCollection."""
+
+    NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
+        "network": MLP,
+        "embedding_network": EmbeddingNet,
+        "fitting_network": FittingNet,
+    }
+
+    def __init__(self, *args, **kwargs):
+        # init both two base classes
+        DPNetworkCollection.__init__(self, *args, **kwargs)
+        nn.Module.__init__(self)
+        self.networks = self._networks = torch.nn.ModuleList(self._networks)
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
new file mode 100644
index 0000000000..c895f642e1
--- /dev/null
+++ b/deepmd/pt/model/network/network.py
@@ -0,0 +1,2035 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from deepmd.pt.model.network.mlp import (
+    EmbeddingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+try:
+    from typing import (
+        Final,
+    )
+except ImportError:
+    from torch.jit import Final
+
+from functools import (
+    partial,
+)
+
+import torch.utils.checkpoint
+
+from deepmd.pt.utils.utils import (
+    ActivationFn,
+)
+
+
+def Tensor(*shape):
+    return torch.empty(shape, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)
+
+
+class Dropout(nn.Module):
+    def __init__(self, p):
+        super().__init__()
+        self.p = p
+
+    def forward(self, x, inplace: bool = False):
+        if self.p > 0 and self.training:
+            return F.dropout(x, p=self.p, training=True, inplace=inplace)
+        else:
+            return x
+
+
+class Identity(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return x
+
+
+class DropPath(torch.nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
+
+    def __init__(self, prob=None):
+        super().__init__()
+        self.drop_prob = prob
+
+    def forward(self, x):
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        keep_prob = 1 - self.drop_prob
+        shape = (x.shape[0],) + (1,) * (
+            x.ndim - 1
+        )  # work with diff dim tensors, not just 2D ConvNets
+        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+        random_tensor.floor_()  # binarize
+        output = x.div(keep_prob) * random_tensor
+        return output
+
+    def extra_repr(self) -> str:
+        return f"prob={self.drop_prob}"
+
+
+def softmax_dropout(
+    input_x, dropout_prob, is_training=True, mask=None, bias=None, inplace=True
+):
+    input_x = input_x.contiguous()
+    if not inplace:
+        input_x = input_x.clone()
+    if mask is not None:
+        input_x += mask
+    if bias is not None:
+        input_x += bias
+    return F.dropout(F.softmax(input_x, dim=-1), p=dropout_prob, training=is_training)
+
+
+def checkpoint_sequential(
+    functions,
+    input_x,
+    enabled=True,
+):
+    def wrap_tuple(a):
+        return (a,) if type(a) is not tuple else a
+
+    def exec(func, a):
+        return wrap_tuple(func(*a))
+
+    def get_wrap_exec(func):
+        def wrap_exec(*a):
+            return exec(func, a)
+
+        return wrap_exec
+
+    input_x = wrap_tuple(input_x)
+
+    is_grad_enabled = torch.is_grad_enabled()
+
+    if enabled and is_grad_enabled:
+        for func in functions:
+            input_x = torch.utils.checkpoint.checkpoint(get_wrap_exec(func), *input_x)
+    else:
+        for func in functions:
+            input_x = exec(func, input_x)
+    return input_x
+
+
+class ResidualLinear(nn.Module):
+    resnet: Final[int]
+
+    def __init__(self, num_in, num_out, bavg=0.0, stddev=1.0, resnet_dt=False):
+        """Construct a residual linear layer.
+
+        Args:
+        - num_in: Width of input tensor.
+        - num_out: Width of output tensor.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.num_in = num_in
+        self.num_out = num_out
+        self.resnet = resnet_dt
+
+        self.matrix = nn.Parameter(data=Tensor(num_in, num_out))
+        nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in))
+        self.bias = nn.Parameter(data=Tensor(1, num_out))
+        nn.init.normal_(self.bias.data, mean=bavg, std=stddev)
+        if self.resnet:
+            self.idt = nn.Parameter(data=Tensor(1, num_out))
+            nn.init.normal_(self.idt.data, mean=1.0, std=0.001)
+
+    def forward(self, inputs):
+        """Return X ?+ X*W+b."""
+        xw_plus_b = torch.matmul(inputs, self.matrix) + self.bias
+        hidden = torch.tanh(xw_plus_b)
+        if self.resnet:
+            hidden = hidden * self.idt
+        if self.num_in == self.num_out:
+            return inputs + hidden
+        elif self.num_in * 2 == self.num_out:
+            return torch.cat([inputs, inputs], dim=1) + hidden
+        else:
+            return hidden
+
+
+class TypeFilter(nn.Module):
+    use_tebd: Final[bool]
+    tebd_mode: Final[str]
+
+    def __init__(
+        self,
+        offset,
+        length,
+        neuron,
+        return_G=False,
+        tebd_dim=0,
+        use_tebd=False,
+        tebd_mode="concat",
+    ):
+        """Construct a filter on the given element as neighbor.
+
+        Args:
+        - offset: Element offset in the descriptor matrix.
+        - length: Atom count of this element.
+        - neuron: Number of neurons in each hidden layers of the embedding net.
+        """
+        super().__init__()
+        self.offset = offset
+        self.length = length
+        self.tebd_dim = tebd_dim
+        self.use_tebd = use_tebd
+        self.tebd_mode = tebd_mode
+        supported_tebd_mode = ["concat", "dot", "dot_residual_s", "dot_residual_t"]
+        assert (
+            tebd_mode in supported_tebd_mode
+        ), f"Unknown tebd_mode {tebd_mode}! Supported are {supported_tebd_mode}."
+        if use_tebd and tebd_mode == "concat":
+            self.neuron = [1 + tebd_dim * 2, *neuron]
+        else:
+            self.neuron = [1, *neuron]
+
+        deep_layers = []
+        for ii in range(1, len(self.neuron)):
+            one = ResidualLinear(self.neuron[ii - 1], self.neuron[ii])
+            deep_layers.append(one)
+        self.deep_layers = nn.ModuleList(deep_layers)
+
+        deep_layers_t = []
+        if use_tebd and tebd_mode in ["dot", "dot_residual_s", "dot_residual_t"]:
+            self.neuron_t = [tebd_dim * 2, *neuron]
+            for ii in range(1, len(self.neuron_t)):
+                one = ResidualLinear(self.neuron_t[ii - 1], self.neuron_t[ii])
+                deep_layers_t.append(one)
+        self.deep_layers_t = nn.ModuleList(deep_layers_t)
+
+        self.return_G = return_G
+
+    def forward(
+        self,
+        inputs,
+        atype_tebd: Optional[torch.Tensor] = None,
+        nlist_tebd: Optional[torch.Tensor] = None,
+    ):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - inputs: Descriptor matrix. Its shape is [nframes*natoms[0], len_descriptor].
+
+        Returns
+        -------
+        - `torch.Tensor`: Embedding contributed by me. Its shape is [nframes*natoms[0], 4, self.neuron[-1]].
+        """
+        inputs_i = inputs[:, self.offset * 4 : (self.offset + self.length) * 4]
+        inputs_reshape = inputs_i.reshape(
+            -1, 4
+        )  # shape is [nframes*natoms[0]*self.length, 4]
+        xyz_scatter = inputs_reshape[:, 0:1]
+
+        # concat the tebd as input
+        if self.use_tebd and self.tebd_mode == "concat":
+            assert nlist_tebd is not None and atype_tebd is not None
+            nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim)
+            atype_tebd = atype_tebd.reshape(-1, self.tebd_dim)
+            # [nframes * nloc * nnei, 1 + tebd_dim * 2]
+            xyz_scatter = torch.concat([xyz_scatter, nlist_tebd, atype_tebd], dim=1)
+
+        for linear in self.deep_layers:
+            xyz_scatter = linear(xyz_scatter)
+            # [nframes * nloc * nnei, out_size]
+
+        # dot the tebd output
+        if self.use_tebd and self.tebd_mode in [
+            "dot",
+            "dot_residual_s",
+            "dot_residual_t",
+        ]:
+            assert nlist_tebd is not None and atype_tebd is not None
+            nlist_tebd = nlist_tebd.reshape(-1, self.tebd_dim)
+            atype_tebd = atype_tebd.reshape(-1, self.tebd_dim)
+            # [nframes * nloc * nnei, tebd_dim * 2]
+            two_side_tebd = torch.concat([nlist_tebd, atype_tebd], dim=1)
+            for linear in self.deep_layers_t:
+                two_side_tebd = linear(two_side_tebd)
+                # [nframes * nloc * nnei, out_size]
+            if self.tebd_mode == "dot":
+                xyz_scatter = xyz_scatter * two_side_tebd
+            elif self.tebd_mode == "dot_residual_s":
+                xyz_scatter = xyz_scatter * two_side_tebd + xyz_scatter
+            elif self.tebd_mode == "dot_residual_t":
+                xyz_scatter = xyz_scatter * two_side_tebd + two_side_tebd
+
+        xyz_scatter = xyz_scatter.view(
+            -1, self.length, self.neuron[-1]
+        )  # shape is [nframes*natoms[0], self.length, self.neuron[-1]]
+        if self.return_G:
+            return xyz_scatter
+        else:
+            # shape is [nframes*natoms[0], 4, self.length]
+            inputs_reshape = inputs_i.view(-1, self.length, 4).permute(0, 2, 1)
+            return torch.matmul(inputs_reshape, xyz_scatter)
+
+
+class SimpleLinear(nn.Module):
+    use_timestep: Final[bool]
+
+    def __init__(
+        self,
+        num_in,
+        num_out,
+        bavg=0.0,
+        stddev=1.0,
+        use_timestep=False,
+        activate=None,
+        bias: bool = True,
+    ):
+        """Construct a linear layer.
+
+        Args:
+        - num_in: Width of input tensor.
+        - num_out: Width of output tensor.
+        - use_timestep: Apply time-step to weight.
+        - activate: type of activate func.
+        """
+        super().__init__()
+        self.num_in = num_in
+        self.num_out = num_out
+        self.use_timestep = use_timestep
+        self.activate = ActivationFn(activate)
+
+        self.matrix = nn.Parameter(data=Tensor(num_in, num_out))
+        nn.init.normal_(self.matrix.data, std=stddev / np.sqrt(num_out + num_in))
+        if bias:
+            self.bias = nn.Parameter(data=Tensor(1, num_out))
+            nn.init.normal_(self.bias.data, mean=bavg, std=stddev)
+        else:
+            self.bias = None
+        if self.use_timestep:
+            self.idt = nn.Parameter(data=Tensor(1, num_out))
+            nn.init.normal_(self.idt.data, mean=0.1, std=0.001)
+
+    def forward(self, inputs):
+        """Return X*W+b."""
+        xw = torch.matmul(inputs, self.matrix)
+        hidden = xw + self.bias if self.bias is not None else xw
+        hidden = self.activate(hidden)
+        if self.use_timestep:
+            hidden = hidden * self.idt
+        return hidden
+
+
+class Linear(nn.Linear):
+    def __init__(
+        self,
+        d_in: int,
+        d_out: int,
+        bias: bool = True,
+        init: str = "default",
+    ):
+        super().__init__(
+            d_in,
+            d_out,
+            bias=bias,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+
+        self.use_bias = bias
+
+        if self.use_bias:
+            with torch.no_grad():
+                self.bias.fill_(0)
+
+        if init == "default":
+            self._trunc_normal_init(1.0)
+        elif init == "relu":
+            self._trunc_normal_init(2.0)
+        elif init == "glorot":
+            self._glorot_uniform_init()
+        elif init == "gating":
+            self._zero_init(self.use_bias)
+        elif init == "normal":
+            self._normal_init()
+        elif init == "final":
+            self._zero_init(False)
+        else:
+            raise ValueError("Invalid init method.")
+
+    def _trunc_normal_init(self, scale=1.0):
+        # Constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+        TRUNCATED_NORMAL_STDDEV_FACTOR = 0.87962566103423978
+        _, fan_in = self.weight.shape
+        scale = scale / max(1, fan_in)
+        std = (scale**0.5) / TRUNCATED_NORMAL_STDDEV_FACTOR
+        nn.init.trunc_normal_(self.weight, mean=0.0, std=std)
+
+    def _glorot_uniform_init(self):
+        nn.init.xavier_uniform_(self.weight, gain=1)
+
+    def _zero_init(self, use_bias=True):
+        with torch.no_grad():
+            self.weight.fill_(0.0)
+            if use_bias:
+                with torch.no_grad():
+                    self.bias.fill_(1.0)
+
+    def _normal_init(self):
+        nn.init.kaiming_normal_(self.weight, nonlinearity="linear")
+
+
+class Transition(nn.Module):
+    def __init__(self, d_in, n, dropout=0.0):
+        super().__init__()
+
+        self.d_in = d_in
+        self.n = n
+
+        self.linear_1 = Linear(self.d_in, self.n * self.d_in, init="relu")
+        self.act = nn.GELU()
+        self.linear_2 = Linear(self.n * self.d_in, d_in, init="final")
+        self.dropout = dropout
+
+    def _transition(self, x):
+        x = self.linear_1(x)
+        x = self.act(x)
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.linear_2(x)
+        return x
+
+    def forward(
+        self,
+        x: torch.Tensor,
+    ) -> torch.Tensor:
+        x = self._transition(x=x)
+        return x
+
+
+class Embedding(nn.Embedding):
+    def __init__(
+        self,
+        num_embeddings: int,
+        embedding_dim: int,
+        padding_idx: Optional[int] = None,
+        dtype=torch.float64,
+    ):
+        super().__init__(
+            num_embeddings, embedding_dim, padding_idx=padding_idx, dtype=dtype
+        )
+        self._normal_init()
+
+        if padding_idx is not None:
+            self.weight.data[self.padding_idx].zero_()
+
+    def _normal_init(self, std=0.02):
+        nn.init.normal_(self.weight, mean=0.0, std=std)
+
+
+class NonLinearHead(nn.Module):
+    def __init__(self, input_dim, out_dim, activation_fn, hidden=None):
+        super().__init__()
+        hidden = input_dim if not hidden else hidden
+        self.linear1 = SimpleLinear(input_dim, hidden, activate=activation_fn)
+        self.linear2 = SimpleLinear(hidden, out_dim)
+
+    def forward(self, x):
+        x = self.linear1(x)
+        x = self.linear2(x)
+        return x
+
+
+class NonLinear(nn.Module):
+    def __init__(self, input, output_size, hidden=None):
+        super().__init__()
+
+        if hidden is None:
+            hidden = input
+        self.layer1 = Linear(input, hidden, init="relu")
+        self.layer2 = Linear(hidden, output_size, init="final")
+
+    def forward(self, x):
+        x = F.linear(x, self.layer1.weight)
+        # x = fused_ops.bias_torch_gelu(x, self.layer1.bias)
+        x = nn.GELU()(x) + self.layer1.bias
+        x = self.layer2(x)
+        return x
+
+    def zero_init(self):
+        nn.init.zeros_(self.layer2.weight)
+        nn.init.zeros_(self.layer2.bias)
+
+
+class MaskLMHead(nn.Module):
+    """Head for masked language modeling."""
+
+    def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
+        super().__init__()
+        self.dense = SimpleLinear(embed_dim, embed_dim)
+        self.activation_fn = ActivationFn(activation_fn)
+        self.layer_norm = nn.LayerNorm(embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+
+        if weight is None:
+            weight = nn.Linear(
+                embed_dim, output_dim, bias=False, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            ).weight
+        self.weight = weight
+        self.bias = nn.Parameter(
+            torch.zeros(output_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        )
+
+    def forward(self, features, masked_tokens: Optional[torch.Tensor] = None, **kwargs):
+        # Only project the masked tokens while training,
+        # saves both memory and computation
+        if masked_tokens is not None:
+            features = features[masked_tokens, :]
+
+        x = self.dense(features)
+        x = self.activation_fn(x)
+        x = self.layer_norm(x)
+        # project back to size of vocabulary with bias
+        x = F.linear(x, self.weight) + self.bias
+        return x
+
+
+class ResidualDeep(nn.Module):
+    def __init__(
+        self, type_id, embedding_width, neuron, bias_atom_e, out_dim=1, resnet_dt=False
+    ):
+        """Construct a filter on the given element as neighbor.
+
+        Args:
+        - typei: Element ID.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the embedding net.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.type_id = type_id
+        self.neuron = [embedding_width, *neuron]
+        self.out_dim = out_dim
+
+        deep_layers = []
+        for ii in range(1, len(self.neuron)):
+            one = SimpleLinear(
+                num_in=self.neuron[ii - 1],
+                num_out=self.neuron[ii],
+                use_timestep=(
+                    resnet_dt and ii > 1 and self.neuron[ii - 1] == self.neuron[ii]
+                ),
+                activate="tanh",
+            )
+            deep_layers.append(one)
+        self.deep_layers = nn.ModuleList(deep_layers)
+        if not env.ENERGY_BIAS_TRAINABLE:
+            bias_atom_e = 0
+        self.final_layer = SimpleLinear(self.neuron[-1], self.out_dim, bias_atom_e)
+
+    def forward(self, inputs):
+        """Calculate decoded embedding for each atom.
+
+        Args:
+        - inputs: Embedding net output per atom. Its shape is [nframes*nloc, self.embedding_width].
+
+        Returns
+        -------
+        - `torch.Tensor`: Output layer with shape [nframes*nloc, self.neuron[-1]].
+        """
+        outputs = inputs
+        for idx, linear in enumerate(self.deep_layers):
+            if idx > 0 and linear.num_in == linear.num_out:
+                outputs = outputs + linear(outputs)
+            else:
+                outputs = linear(outputs)
+        outputs = self.final_layer(outputs)
+        return outputs
+
+
+class TypeEmbedNet(nn.Module):
+    def __init__(self, type_nums, embed_dim, bavg=0.0, stddev=1.0):
+        """Construct a type embedding net."""
+        super().__init__()
+        self.embedding = TypeEmbedNetConsistent(
+            ntypes=type_nums,
+            neuron=[embed_dim],
+            padding=True,
+            activation_function="Linear",
+            precision="default",
+        )
+        # nn.init.normal_(self.embedding.weight[:-1], mean=bavg, std=stddev)
+
+    def forward(self, atype):
+        """
+        Args:
+            atype: Type of each input, [nframes, nloc] or [nframes, nloc, nnei].
+
+        Returns
+        -------
+        type_embedding:
+
+        """
+        return self.embedding(atype.device)[atype]
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only TypeEmbedNet of the same type can share params!"
+        if shared_level == 0:
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        else:
+            raise NotImplementedError
+
+
+class TypeEmbedNetConsistent(nn.Module):
+    r"""Type embedding network that is consistent with other backends.
+
+    Parameters
+    ----------
+    ntypes : int
+        Number of atom types
+    neuron : list[int]
+        Number of neurons in each hidden layers of the embedding net
+    resnet_dt
+        Time-step `dt` in the resnet construction: y = x + dt * \phi (Wx + b)
+    activation_function
+        The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+        The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable
+        If the weights of embedding net are trainable.
+    seed
+        Random seed for initializing the network parameters.
+    padding
+        Concat the zero padding to the output, as the default embedding of empty type.
+    """
+
+    def __init__(
+        self,
+        *,
+        ntypes: int,
+        neuron: List[int],
+        resnet_dt: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        padding: bool = False,
+    ):
+        """Construct a type embedding net."""
+        super().__init__()
+        self.ntypes = ntypes
+        self.neuron = neuron
+        self.seed = seed
+        self.resnet_dt = resnet_dt
+        self.precision = precision
+        self.prec = env.PRECISION_DICT[self.precision]
+        self.activation_function = str(activation_function)
+        self.trainable = trainable
+        self.padding = padding
+        # no way to pass seed?
+        self.embedding_net = EmbeddingNet(
+            ntypes,
+            self.neuron,
+            self.activation_function,
+            self.resnet_dt,
+            self.precision,
+        )
+        for param in self.parameters():
+            param.requires_grad = trainable
+
+    def forward(self, device: torch.device):
+        """Caulate type embedding network.
+
+        Returns
+        -------
+        type_embedding: torch.Tensor
+            Type embedding network.
+        """
+        embed = self.embedding_net(
+            torch.eye(self.ntypes, dtype=self.prec, device=device)
+        )
+        if self.padding:
+            embed = torch.cat(
+                [embed, torch.zeros(1, embed.shape[1], dtype=self.prec, device=device)]
+            )
+        return embed
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        TypeEmbedNetConsistent
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data_cls = data.pop("@class")
+        assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
+
+        embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        type_embedding_net = cls(**data)
+        type_embedding_net.embedding_net = embedding_net
+        return type_embedding_net
+
+    def serialize(self) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        return {
+            "@class": "TypeEmbedNet",
+            "@version": 1,
+            "ntypes": self.ntypes,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "precision": self.precision,
+            "activation_function": self.activation_function,
+            "trainable": self.trainable,
+            "padding": self.padding,
+            "embedding": self.embedding_net.serialize(),
+        }
+
+
+@torch.jit.script
+def gaussian(x, mean, std: float):
+    pi = 3.14159
+    a = (2 * pi) ** 0.5
+    return torch.exp(-0.5 * (((x - mean) / std) ** 2)) / (a * std)
+
+
+class GaussianKernel(nn.Module):
+    def __init__(self, K=128, num_pair=512, std_width=1.0, start=0.0, stop=9.0):
+        super().__init__()
+        self.K = K
+        std_width = std_width
+        start = start
+        stop = stop
+        mean = torch.linspace(start, stop, K, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.std = (std_width * (mean[1] - mean[0])).item()
+        self.register_buffer("mean", mean)
+        self.mul = Embedding(
+            num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.bias = Embedding(
+            num_pair + 1, 1, padding_idx=num_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        nn.init.constant_(self.bias.weight, 0)
+        nn.init.constant_(self.mul.weight, 1.0)
+
+    def forward(self, x, atom_pair):
+        mul = self.mul(atom_pair).abs().sum(dim=-2)
+        bias = self.bias(atom_pair).sum(dim=-2)
+        x = mul * x.unsqueeze(-1) + bias
+        # [nframes, nloc, nnei, K]
+        x = x.expand(-1, -1, -1, self.K)
+        mean = self.mean.view(-1)
+        return gaussian(x, mean, self.std)
+
+
+class GaussianEmbedding(nn.Module):
+    def __init__(
+        self,
+        rcut,
+        kernel_num,
+        num_pair,
+        embed_dim,
+        pair_embed_dim,
+        sel,
+        ntypes,
+        atomic_sum_gbf,
+    ):
+        """Construct a gaussian kernel based embedding of pair representation.
+
+        Args:
+            rcut: Radial cutoff.
+            kernel_num: Number of gaussian kernels.
+            num_pair: Number of different pairs.
+            embed_dim: Dimension of atomic representation.
+            pair_embed_dim: Dimension of pair representation.
+            sel: Number of neighbors.
+            ntypes: Number of atom types.
+        """
+        super().__init__()
+        self.gbf = GaussianKernel(K=kernel_num, num_pair=num_pair, stop=rcut)
+        self.gbf_proj = NonLinear(kernel_num, pair_embed_dim)
+        self.embed_dim = embed_dim
+        self.pair_embed_dim = pair_embed_dim
+        self.atomic_sum_gbf = atomic_sum_gbf
+        if self.atomic_sum_gbf:
+            if kernel_num != self.embed_dim:
+                self.edge_proj = torch.nn.Linear(
+                    kernel_num, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+                )
+            else:
+                self.edge_proj = None
+        self.ntypes = ntypes
+        self.nnei = sel
+
+    def forward(self, coord_selected, atom_feature, edge_type_2dim, edge_feature):
+        ## local cluster forward
+        """Calculate decoded embedding for each atom.
+        Args:
+            coord_selected: Clustered atom coordinates with shape [nframes*nloc, natoms, 3].
+            atom_feature: Previous calculated atomic features with shape [nframes*nloc, natoms, embed_dim].
+            edge_type_2dim: Edge index for gbf calculation with shape [nframes*nloc, natoms, natoms, 2].
+            edge_feature: Previous calculated edge features with shape [nframes*nloc, natoms, natoms, pair_dim].
+
+        Returns
+        -------
+        atom_feature: Updated atomic features with shape [nframes*nloc, natoms, embed_dim].
+        attn_bias: Updated edge features as attention bias with shape [nframes*nloc, natoms, natoms, pair_dim].
+        delta_pos: Delta position for force/vector prediction with shape [nframes*nloc, natoms, natoms, 3].
+        """
+        ncluster, natoms, _ = coord_selected.shape
+        # ncluster x natoms x natoms x 3
+        delta_pos = coord_selected.unsqueeze(1) - coord_selected.unsqueeze(2)
+        # (ncluster x natoms x natoms
+        dist = delta_pos.norm(dim=-1).view(-1, natoms, natoms)
+        # [ncluster, natoms, natoms, K]
+        gbf_feature = self.gbf(dist, edge_type_2dim)
+        if self.atomic_sum_gbf:
+            edge_features = gbf_feature
+            # [ncluster, natoms, K]
+            sum_edge_features = edge_features.sum(dim=-2)
+            if self.edge_proj is not None:
+                sum_edge_features = self.edge_proj(sum_edge_features)
+            # [ncluster, natoms, embed_dim]
+            atom_feature = atom_feature + sum_edge_features
+
+        # [ncluster, natoms, natoms, pair_dim]
+        gbf_result = self.gbf_proj(gbf_feature)
+
+        attn_bias = gbf_result + edge_feature
+        return atom_feature, attn_bias, delta_pos
+
+
+class NeighborWiseAttention(nn.Module):
+    def __init__(
+        self,
+        layer_num,
+        nnei,
+        embed_dim,
+        hidden_dim,
+        dotr=False,
+        do_mask=False,
+        post_ln=True,
+        ffn=False,
+        ffn_embed_dim=1024,
+        activation="tanh",
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+    ):
+        """Construct a neighbor-wise attention net."""
+        super().__init__()
+        self.layer_num = layer_num
+        attention_layers = []
+        for i in range(self.layer_num):
+            attention_layers.append(
+                NeighborWiseAttentionLayer(
+                    nnei,
+                    embed_dim,
+                    hidden_dim,
+                    dotr=dotr,
+                    do_mask=do_mask,
+                    post_ln=post_ln,
+                    ffn=ffn,
+                    ffn_embed_dim=ffn_embed_dim,
+                    activation=activation,
+                    scaling_factor=scaling_factor,
+                    head_num=head_num,
+                    normalize=normalize,
+                    temperature=temperature,
+                )
+            )
+        self.attention_layers = nn.ModuleList(attention_layers)
+
+    def forward(
+        self,
+        input_G,
+        nei_mask,
+        input_r: Optional[torch.Tensor] = None,
+        sw: Optional[torch.Tensor] = None,
+    ):
+        """
+        Args:
+            input_G: Input G, [nframes * nloc, nnei, embed_dim].
+            nei_mask: neighbor mask, [nframes * nloc, nnei].
+            input_r: normalized radial, [nframes, nloc, nei, 3].
+
+        Returns
+        -------
+        out: Output G, [nframes * nloc, nnei, embed_dim]
+
+        """
+        out = input_G
+        # https://github.com/pytorch/pytorch/issues/39165#issuecomment-635472592
+        for layer in self.attention_layers:
+            out = layer(out, nei_mask, input_r=input_r, sw=sw)
+        return out
+
+
+class NeighborWiseAttentionLayer(nn.Module):
+    ffn: Final[bool]
+
+    def __init__(
+        self,
+        nnei,
+        embed_dim,
+        hidden_dim,
+        dotr=False,
+        do_mask=False,
+        post_ln=True,
+        ffn=False,
+        ffn_embed_dim=1024,
+        activation="tanh",
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+    ):
+        """Construct a neighbor-wise attention layer."""
+        super().__init__()
+        self.nnei = nnei
+        self.embed_dim = embed_dim
+        self.hidden_dim = hidden_dim
+        self.dotr = dotr
+        self.do_mask = do_mask
+        self.post_ln = post_ln
+        self.ffn = ffn
+        self.attention_layer = GatedSelfAttetion(
+            nnei,
+            embed_dim,
+            hidden_dim,
+            dotr=dotr,
+            do_mask=do_mask,
+            scaling_factor=scaling_factor,
+            head_num=head_num,
+            normalize=normalize,
+            temperature=temperature,
+        )
+        self.attn_layer_norm = nn.LayerNorm(
+            self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
+        if self.ffn:
+            self.ffn_embed_dim = ffn_embed_dim
+            self.fc1 = nn.Linear(
+                self.embed_dim, self.ffn_embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+            self.activation_fn = ActivationFn(activation)
+            self.fc2 = nn.Linear(
+                self.ffn_embed_dim, self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+            self.final_layer_norm = nn.LayerNorm(
+                self.embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+
+    def forward(
+        self,
+        x,
+        nei_mask,
+        input_r: Optional[torch.Tensor] = None,
+        sw: Optional[torch.Tensor] = None,
+    ):
+        residual = x
+        if not self.post_ln:
+            x = self.attn_layer_norm(x)
+        x = self.attention_layer(x, nei_mask, input_r=input_r, sw=sw)
+        x = residual + x
+        if self.post_ln:
+            x = self.attn_layer_norm(x)
+        if self.ffn:
+            residual = x
+            if not self.post_ln:
+                x = self.final_layer_norm(x)
+            x = self.fc1(x)
+            x = self.activation_fn(x)
+            x = self.fc2(x)
+            x = residual + x
+            if self.post_ln:
+                x = self.final_layer_norm(x)
+        return x
+
+
+class GatedSelfAttetion(nn.Module):
+    def __init__(
+        self,
+        nnei,
+        embed_dim,
+        hidden_dim,
+        dotr=False,
+        do_mask=False,
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+        bias=True,
+        smooth=True,
+    ):
+        """Construct a neighbor-wise attention net."""
+        super().__init__()
+        self.nnei = nnei
+        self.embed_dim = embed_dim
+        self.hidden_dim = hidden_dim
+        self.head_num = head_num
+        self.dotr = dotr
+        self.do_mask = do_mask
+        if temperature is None:
+            self.scaling = (self.hidden_dim * scaling_factor) ** -0.5
+        else:
+            self.scaling = temperature
+        self.normalize = normalize
+        self.in_proj = SimpleLinear(
+            embed_dim,
+            hidden_dim * 3,
+            bavg=0.0,
+            stddev=1.0,
+            use_timestep=False,
+            bias=bias,
+        )
+        self.out_proj = SimpleLinear(
+            hidden_dim, embed_dim, bavg=0.0, stddev=1.0, use_timestep=False, bias=bias
+        )
+        self.smooth = smooth
+
+    def forward(
+        self,
+        query,
+        nei_mask,
+        input_r: Optional[torch.Tensor] = None,
+        sw: Optional[torch.Tensor] = None,
+        attnw_shift: float = 20.0,
+    ):
+        """
+        Args:
+            query: input G, [nframes * nloc, nnei, embed_dim].
+            nei_mask: neighbor mask, [nframes * nloc, nnei].
+            input_r: normalized radial, [nframes, nloc, nei, 3].
+
+        Returns
+        -------
+        type_embedding:
+
+        """
+        q, k, v = self.in_proj(query).chunk(3, dim=-1)
+        #  [nframes * nloc, nnei, hidden_dim]
+        q = q.view(-1, self.nnei, self.hidden_dim)
+        k = k.view(-1, self.nnei, self.hidden_dim)
+        v = v.view(-1, self.nnei, self.hidden_dim)
+        if self.normalize:
+            q = F.normalize(q, dim=-1)
+            k = F.normalize(k, dim=-1)
+            v = F.normalize(v, dim=-1)
+        q = q * self.scaling
+        k = k.transpose(1, 2)
+        #  [nframes * nloc, nnei, nnei]
+        attn_weights = torch.bmm(q, k)
+        #  [nframes * nloc, nnei]
+        nei_mask = nei_mask.view(-1, self.nnei)
+        if self.smooth:
+            # [nframes * nloc, nnei]
+            assert sw is not None
+            sw = sw.view([-1, self.nnei])
+            attn_weights = (attn_weights + attnw_shift) * sw[:, :, None] * sw[
+                :, None, :
+            ] - attnw_shift
+        else:
+            attn_weights = attn_weights.masked_fill(
+                ~nei_mask.unsqueeze(1), float("-inf")
+            )
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights.masked_fill(~nei_mask.unsqueeze(-1), 0.0)
+        if self.smooth:
+            assert sw is not None
+            attn_weights = attn_weights * sw[:, :, None] * sw[:, None, :]
+        if self.dotr:
+            assert input_r is not None, "input_r must be provided when dotr is True!"
+            angular_weight = torch.bmm(input_r, input_r.transpose(1, 2))
+            attn_weights = attn_weights * angular_weight
+        o = torch.bmm(attn_weights, v)
+        output = self.out_proj(o)
+        return output
+
+
+class LocalSelfMultiheadAttention(nn.Module):
+    def __init__(self, feature_dim, attn_head, scaling_factor=1.0):
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.attn_head = attn_head
+        self.head_dim = feature_dim // attn_head
+        assert (
+            feature_dim % attn_head == 0
+        ), f"feature_dim {feature_dim} must be divided by attn_head {attn_head}!"
+        self.scaling = (self.head_dim * scaling_factor) ** -0.5
+        self.in_proj = SimpleLinear(self.feature_dim, self.feature_dim * 3)
+        # TODO debug
+        # self.out_proj = SimpleLinear(self.feature_dim, self.feature_dim)
+
+    def forward(
+        self,
+        query,
+        attn_bias: Optional[torch.Tensor] = None,
+        nlist_mask: Optional[torch.Tensor] = None,
+        nlist: Optional[torch.Tensor] = None,
+        return_attn=True,
+    ):
+        nframes, nloc, feature_dim = query.size()
+        _, _, nnei = nlist.size()
+        assert feature_dim == self.feature_dim
+        # [nframes, nloc, feature_dim]
+        q, k, v = self.in_proj(query).chunk(3, dim=-1)
+        # [nframes * attn_head * nloc, 1, head_dim]
+        q = (
+            q.view(nframes, nloc, self.attn_head, self.head_dim)
+            .transpose(1, 2)
+            .contiguous()
+            .view(nframes * self.attn_head * nloc, 1, self.head_dim)
+            * self.scaling
+        )
+        # [nframes, nloc, feature_dim] --> [nframes, nloc + 1, feature_dim]
+        # with nlist [nframes, nloc, nnei] --> [nframes, nloc, nnei, feature_dim]
+        # padding = torch.zeros(feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION).to(k.device)
+        # k = torch.concat([k, padding.unsqueeze(0).unsqueeze(1)], dim=1)
+        # v = torch.concat([v, padding.unsqueeze(0).unsqueeze(1)], dim=1)
+
+        # [nframes, nloc * nnei, feature_dim]
+        index = nlist.view(nframes, -1).unsqueeze(-1).expand(-1, -1, feature_dim)
+        k = torch.gather(k, dim=1, index=index)
+        # [nframes, nloc * nnei, feature_dim]
+        v = torch.gather(v, dim=1, index=index)
+        # [nframes * attn_head * nloc, nnei, head_dim]
+        k = (
+            k.view(nframes, nloc, nnei, self.attn_head, self.head_dim)
+            .permute(0, 3, 1, 2, 4)
+            .contiguous()
+            .view(nframes * self.attn_head * nloc, nnei, self.head_dim)
+        )
+        v = (
+            v.view(nframes, nloc, nnei, self.attn_head, self.head_dim)
+            .permute(0, 3, 1, 2, 4)
+            .contiguous()
+            .view(nframes * self.attn_head * nloc, nnei, self.head_dim)
+        )
+        # [nframes * attn_head * nloc, 1, nnei]
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        # maskfill
+        # [nframes, attn_head, nloc, nnei]
+        attn_weights = attn_weights.view(
+            nframes, self.attn_head, nloc, nnei
+        ).masked_fill(~nlist_mask.unsqueeze(1), float("-inf"))
+        # add bias
+        if return_attn:
+            attn_weights = attn_weights + attn_bias
+        # softmax
+        # [nframes * attn_head * nloc, 1, nnei]
+        attn = F.softmax(attn_weights, dim=-1).view(
+            nframes * self.attn_head * nloc, 1, nnei
+        )
+        # bmm
+        # [nframes * attn_head * nloc, 1, head_dim]
+        o = torch.bmm(attn, v)
+        assert list(o.size()) == [nframes * self.attn_head * nloc, 1, self.head_dim]
+        # [nframes, nloc, feature_dim]
+        o = (
+            o.view(nframes, self.attn_head, nloc, self.head_dim)
+            .transpose(1, 2)
+            .contiguous()
+            .view(nframes, nloc, self.feature_dim)
+        )
+        # out
+        ## TODO debug:
+        # o = self.out_proj(o)
+        if not return_attn:
+            return o
+        else:
+            return o, attn_weights, attn
+
+
+class NodeTaskHead(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        pair_dim: int,
+        num_head: int,
+    ):
+        super().__init__()
+        self.layer_norm = nn.LayerNorm(embed_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.pair_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.embed_dim = embed_dim
+        self.q_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
+        self.k_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
+        self.v_proj = Linear(embed_dim, embed_dim, bias=False, init="glorot")
+        self.num_heads = num_head
+        self.head_dim = embed_dim // num_head
+        self.scaling = self.head_dim**-0.5
+        self.force_proj = Linear(embed_dim, 1, init="final", bias=False)
+        self.linear_bias = Linear(pair_dim, num_head)
+        self.dropout = 0.1
+
+    def zero_init(self):
+        nn.init.zeros_(self.force_proj.weight)
+
+    def forward(
+        self,
+        query: Tensor,
+        pair: Tensor,
+        delta_pos: Tensor,
+        attn_mask: Tensor = None,
+    ) -> Tensor:
+        ncluster, natoms, _ = query.size()
+        query = self.layer_norm(query)
+        # [ncluster, natoms, natoms, pair_dim]
+        pair = self.pair_norm(pair)
+
+        # [ncluster, attn_head, natoms, head_dim]
+        q = (
+            self.q_proj(query)
+            .view(ncluster, natoms, self.num_heads, -1)
+            .transpose(1, 2)
+            * self.scaling
+        )
+        # [ncluster, attn_head, natoms, head_dim]
+        k = (
+            self.k_proj(query)
+            .view(ncluster, natoms, self.num_heads, -1)
+            .transpose(1, 2)
+        )
+        v = (
+            self.v_proj(query)
+            .view(ncluster, natoms, self.num_heads, -1)
+            .transpose(1, 2)
+        )
+        # [ncluster, attn_head, natoms, natoms]
+        attn = q @ k.transpose(-1, -2)
+        del q, k
+        # [ncluster, attn_head, natoms, natoms]
+        bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous()
+
+        # [ncluster, attn_head, natoms, natoms]
+        attn_probs = softmax_dropout(
+            attn,
+            self.dropout,
+            self.training,
+            mask=attn_mask,
+            bias=bias.contiguous(),
+        ).view(ncluster, self.num_heads, natoms, natoms)
+
+        # delta_pos: [ncluster, natoms, natoms, 3]
+        # [ncluster, attn_head, natoms, natoms, 3]
+        rot_attn_probs = attn_probs.unsqueeze(-1) * delta_pos.unsqueeze(1).type_as(
+            attn_probs
+        )
+        # [ncluster, attn_head, 3, natoms, natoms]
+        rot_attn_probs = rot_attn_probs.permute(0, 1, 4, 2, 3)
+        # [ncluster, attn_head, 3, natoms, head_dim]
+        x = rot_attn_probs @ v.unsqueeze(2)
+        # [ncluster, natoms, 3, embed_dim]
+        x = x.permute(0, 3, 2, 1, 4).contiguous().view(ncluster, natoms, 3, -1)
+        cur_force = self.force_proj(x).view(ncluster, natoms, 3)
+        return cur_force
+
+
+class EnergyHead(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+    ):
+        super().__init__()
+        self.layer_norm = nn.LayerNorm(input_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.linear_in = Linear(input_dim, input_dim, init="relu")
+
+        self.linear_out = Linear(input_dim, output_dim, bias=True, init="final")
+
+    def forward(self, x):
+        x = x.type(self.linear_in.weight.dtype)
+        x = F.gelu(self.layer_norm(self.linear_in(x)))
+        x = self.linear_out(x)
+        return x
+
+
+class OuterProduct(nn.Module):
+    def __init__(self, d_atom, d_pair, d_hid=32):
+        super().__init__()
+
+        self.d_atom = d_atom
+        self.d_pair = d_pair
+        self.d_hid = d_hid
+
+        self.linear_in = nn.Linear(
+            d_atom, d_hid * 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.linear_out = nn.Linear(
+            d_hid**2, d_pair, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.act = nn.GELU()
+
+    def _opm(self, a, b):
+        # [nframes, nloc, d]
+        nframes, nloc, d = a.shape
+        a = a.view(nframes, nloc, 1, d, 1)
+        b = b.view(nframes, 1, nloc, 1, d)
+        # [nframes, nloc, nloc, d, d]
+        outer = a * b
+        outer = outer.view(outer.shape[:-2] + (-1,))
+        outer = self.linear_out(outer)
+        return outer
+
+    def forward(
+        self,
+        m: torch.Tensor,
+        nlist: torch.Tensor,
+        op_mask: float,
+        op_norm: float,
+    ) -> torch.Tensor:
+        ab = self.linear_in(m)
+        ab = ab * op_mask
+        a, b = ab.chunk(2, dim=-1)
+        # [ncluster, natoms, natoms, d_pair]
+        z = self._opm(a, b)
+        z *= op_norm
+        return z
+
+
+class Attention(nn.Module):
+    def __init__(
+        self,
+        q_dim: int,
+        k_dim: int,
+        v_dim: int,
+        head_dim: int,
+        num_heads: int,
+        gating: bool = False,
+        dropout: float = 0.0,
+    ):
+        super().__init__()
+
+        self.num_heads = num_heads
+        self.head_dim = head_dim
+        total_dim = head_dim * self.num_heads
+        self.total_dim = total_dim
+        self.q_dim = q_dim
+        self.gating = gating
+        self.linear_q = Linear(q_dim, total_dim, bias=False, init="glorot")
+        self.linear_k = Linear(k_dim, total_dim, bias=False, init="glorot")
+        self.linear_v = Linear(v_dim, total_dim, bias=False, init="glorot")
+        self.linear_o = Linear(total_dim, q_dim, init="final")
+        self.linear_g = None
+        if self.gating:
+            self.linear_g = Linear(q_dim, total_dim, init="gating")
+        # precompute the 1/sqrt(head_dim)
+        self.norm = head_dim**-0.5
+        self.dropout = dropout
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        bias: torch.Tensor,
+        mask: torch.Tensor = None,
+    ) -> torch.Tensor:
+        nframes, nloc, embed_dim = q.size()
+        g = None
+        if self.linear_g is not None:
+            # gating, use raw query input
+            # [nframes, nloc, total_dim]
+            g = self.linear_g(q)
+        # [nframes, nloc, total_dim]
+        q = self.linear_q(q)
+        q *= self.norm
+        # [nframes, nloc, total_dim]
+        k = self.linear_k(k)
+        # [nframes, nloc, total_dim]
+        v = self.linear_v(v)
+        # global
+        # q [nframes, h, nloc, d]
+        # k [nframes, h, nloc, d]
+        # v [nframes, h, nloc, d]
+        # attn [nframes, h, nloc, nloc]
+        # o [nframes, h, nloc, d]
+
+        # [nframes, h, nloc, d]
+        q = q.view(q.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous()
+        k = k.view(k.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3).contiguous()
+        v = v.view(v.shape[:-1] + (self.num_heads, -1)).transpose(-2, -3)
+        # [nframes, h, nloc, nloc]
+        attn = torch.matmul(q, k.transpose(-1, -2))
+        del q, k
+        # [nframes, h, nloc, nloc]
+        attn = softmax_dropout(attn, self.dropout, self.training, mask=mask, bias=bias)
+        # [nframes, h, nloc, d]
+        o = torch.matmul(attn, v)
+        del attn, v
+
+        # local
+        # q [nframes, h, nloc, 1, d]
+        # k [nframes, h, nloc, nnei, d]
+        # v [nframes, h, nloc, nnei, d]
+        # attn [nframes, h, nloc, nnei]
+        # o [nframes, h, nloc, d]
+
+        assert list(o.size()) == [nframes, self.num_heads, nloc, self.head_dim]
+        # [nframes, nloc, total_dim]
+        o = o.transpose(-2, -3).contiguous()
+        o = o.view(*o.shape[:-2], -1)
+
+        if g is not None:
+            o = torch.sigmoid(g) * o
+
+        # merge heads
+        o = self.linear_o(o)
+        return o
+
+
+class AtomAttention(nn.Module):
+    def __init__(
+        self,
+        q_dim: int,
+        k_dim: int,
+        v_dim: int,
+        pair_dim: int,
+        head_dim: int,
+        num_heads: int,
+        gating: bool = False,
+        dropout: float = 0.0,
+    ):
+        super().__init__()
+
+        self.mha = Attention(
+            q_dim, k_dim, v_dim, head_dim, num_heads, gating=gating, dropout=dropout
+        )
+        self.layer_norm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.linear_bias = Linear(pair_dim, num_heads)
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        nlist: torch.Tensor,
+        pair: torch.Tensor,
+        mask: torch.Tensor = None,
+    ) -> torch.Tensor:
+        pair = self.layer_norm(pair)
+        bias = self.linear_bias(pair).permute(0, 3, 1, 2).contiguous()
+        return self.mha(q, k, v, bias=bias, mask=mask)
+
+
+class TriangleMultiplication(nn.Module):
+    def __init__(self, d_pair, d_hid):
+        super().__init__()
+
+        self.linear_ab_p = Linear(d_pair, d_hid * 2)
+        self.linear_ab_g = Linear(d_pair, d_hid * 2, init="gating")
+
+        self.linear_g = Linear(d_pair, d_pair, init="gating")
+        self.linear_z = Linear(d_hid, d_pair, init="final")
+
+        self.layer_norm_out = nn.LayerNorm(d_hid, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+
+    def forward(
+        self,
+        z: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        # z : [nframes, nloc, nloc, pair_dim]
+
+        # [nframes, nloc, nloc, pair_dim]
+        g = self.linear_g(z)
+        if self.training:
+            ab = self.linear_ab_p(z) * torch.sigmoid(self.linear_ab_g(z))
+        else:
+            ab = self.linear_ab_p(z)
+            ab *= torch.sigmoid(self.linear_ab_g(z))
+        # [nframes, nloc, nloc, d]
+        a, b = torch.chunk(ab, 2, dim=-1)
+        del z, ab
+
+        # [nframes, d, nloc_i, nloc_k] row not trans
+        a1 = a.permute(0, 3, 1, 2)
+        # [nframes, d, nloc_k, nloc_j(i)]  trans
+        b1 = b.transpose(-1, -3)
+        # [nframes, d, nloc_i, nloc_j]
+        x = torch.matmul(a1, b1)
+        del a1, b1
+
+        # [nframes, d, nloc_k, nloc_j(i)] not trans
+        b2 = b.permute(0, 3, 1, 2)
+        # [nframes, d, nloc_i, nloc_k]  col trans # check TODO
+        a2 = a.transpose(-1, -3)
+
+        # [nframes, d, nloc_i, nloc_j]
+        x = x + torch.matmul(a2, b2)
+        del a, b, a2, b2
+
+        # [nframes, nloc_i, nloc_j, d]
+        x = x.permute(0, 2, 3, 1)
+
+        x = self.layer_norm_out(x)
+        x = self.linear_z(x)
+        return g * x
+
+
+class EvoformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        feature_dim: int = 768,
+        ffn_dim: int = 2048,
+        attn_head: int = 8,
+        activation_fn: str = "gelu",
+        post_ln: bool = False,
+    ):
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.ffn_dim = ffn_dim
+        self.attn_head = attn_head
+        self.activation_fn = (
+            ActivationFn(activation_fn) if activation_fn is not None else None
+        )
+        self.post_ln = post_ln
+        self.self_attn_layer_norm = nn.LayerNorm(
+            self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+
+        self.self_attn = LocalSelfMultiheadAttention(
+            self.feature_dim,
+            self.attn_head,
+        )
+        self.final_layer_norm = nn.LayerNorm(
+            self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.fc1 = SimpleLinear(self.feature_dim, self.ffn_dim)
+        self.fc2 = SimpleLinear(self.ffn_dim, self.feature_dim)
+
+    def forward(
+        self,
+        x,
+        attn_bias: Optional[torch.Tensor] = None,
+        nlist_mask: Optional[torch.Tensor] = None,
+        nlist: Optional[torch.Tensor] = None,
+        return_attn=True,
+    ):
+        residual = x
+        if not self.post_ln:
+            x = self.self_attn_layer_norm(x)
+        x = self.self_attn(
+            query=x,
+            attn_bias=attn_bias,
+            nlist_mask=nlist_mask,
+            nlist=nlist,
+            return_attn=return_attn,
+        )
+        if return_attn:
+            x, attn_weights, attn_probs = x
+        x = residual + x
+        if self.post_ln:
+            x = self.self_attn_layer_norm(x)
+
+        residual = x
+        if not self.post_ln:
+            x = self.final_layer_norm(x)
+        x = self.fc1(x)
+        x = self.activation_fn(x)
+        x = self.fc2(x)
+        x = residual + x
+        if self.post_ln:
+            x = self.final_layer_norm(x)
+        if not return_attn:
+            return x
+        else:
+            return x, attn_weights, attn_probs
+
+
+# output: atomic_rep, transformed_atomic_rep, pair_rep, delta_pair_rep, norm_x, norm_delta_pair_rep,
+class Evoformer2bEncoder(nn.Module):
+    def __init__(
+        self,
+        nnei: int,
+        layer_num: int = 6,
+        attn_head: int = 8,
+        atomic_dim: int = 1024,
+        pair_dim: int = 100,
+        feature_dim: int = 1024,
+        ffn_dim: int = 2048,
+        post_ln: bool = False,
+        final_layer_norm: bool = True,
+        final_head_layer_norm: bool = False,
+        emb_layer_norm: bool = False,
+        atomic_residual: bool = False,
+        evo_residual: bool = False,
+        residual_factor: float = 1.0,
+        activation_function: str = "gelu",
+    ):
+        super().__init__()
+        self.nnei = nnei
+        self.layer_num = layer_num
+        self.attn_head = attn_head
+        self.atomic_dim = atomic_dim
+        self.pair_dim = pair_dim
+        self.feature_dim = feature_dim
+        self.ffn_dim = ffn_dim
+        self.post_ln = post_ln
+        self._final_layer_norm = final_layer_norm
+        self._final_head_layer_norm = final_head_layer_norm
+        self._emb_layer_norm = emb_layer_norm
+        self.activation_function = activation_function
+        self.evo_residual = evo_residual
+        self.residual_factor = residual_factor
+        if atomic_residual and atomic_dim == feature_dim:
+            self.atomic_residual = True
+        else:
+            self.atomic_residual = False
+        self.in_proj = SimpleLinear(
+            self.atomic_dim,
+            self.feature_dim,
+            bavg=0.0,
+            stddev=1.0,
+            use_timestep=False,
+            activate="tanh",
+        )  # TODO
+        self.out_proj = SimpleLinear(
+            self.feature_dim,
+            self.atomic_dim,
+            bavg=0.0,
+            stddev=1.0,
+            use_timestep=False,
+            activate="tanh",
+        )
+        if self._emb_layer_norm:
+            self.emb_layer_norm = nn.LayerNorm(
+                self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+
+        ## TODO debug : self.in_proj_pair = NonLinearHead(self.pair_dim, self.attn_head, activation_fn=None)
+        self.in_proj_pair = SimpleLinear(self.pair_dim, self.attn_head, activate=None)
+        evoformer_encoder_layers = []
+        for i in range(self.layer_num):
+            evoformer_encoder_layers.append(
+                EvoformerEncoderLayer(
+                    feature_dim=self.feature_dim,
+                    ffn_dim=self.ffn_dim,
+                    attn_head=self.attn_head,
+                    activation_fn=self.activation_function,
+                    post_ln=self.post_ln,
+                )
+            )
+        self.evoformer_encoder_layers = nn.ModuleList(evoformer_encoder_layers)
+        if self._final_layer_norm:
+            self.final_layer_norm = nn.LayerNorm(
+                self.feature_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+        if self._final_head_layer_norm:
+            self.final_head_layer_norm = nn.LayerNorm(
+                self.attn_head, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+
+    def forward(self, atomic_rep, pair_rep, nlist, nlist_type, nlist_mask):
+        """Encoder the atomic and pair representations.
+
+        Args:
+        - atomic_rep: Atomic representation with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation with shape [nframes, nloc, nnei, pair_dim].
+        - nlist: Neighbor list with shape [nframes, nloc, nnei].
+        - nlist_type: Neighbor types with shape [nframes, nloc, nnei].
+        - nlist_mask: Neighbor mask with shape [nframes, nloc, nnei], `False` if blank.
+
+        Returns
+        -------
+        - atomic_rep: Atomic representation after encoder with shape [nframes, nloc, feature_dim].
+        - transformed_atomic_rep: Transformed atomic representation after encoder with shape [nframes, nloc, atomic_dim].
+        - pair_rep: Pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - delta_pair_rep: Delta pair representation after encoder with shape [nframes, nloc, nnei, attn_head].
+        - norm_x: Normalization loss of atomic_rep.
+        - norm_delta_pair_rep: Normalization loss of delta_pair_rep.
+        """
+        # Global branch
+        nframes, nloc, _ = atomic_rep.size()
+        nnei = pair_rep.shape[2]
+        input_atomic_rep = atomic_rep
+        # [nframes, nloc, feature_dim]
+        if self.atomic_residual:
+            atomic_rep = atomic_rep + self.in_proj(atomic_rep)
+        else:
+            atomic_rep = self.in_proj(atomic_rep)
+
+        if self._emb_layer_norm:
+            atomic_rep = self.emb_layer_norm(atomic_rep)
+
+        # Local branch
+        # [nframes, nloc, nnei, attn_head]
+        pair_rep = self.in_proj_pair(pair_rep)
+        # [nframes, attn_head, nloc, nnei]
+        pair_rep = pair_rep.permute(0, 3, 1, 2).contiguous()
+        input_pair_rep = pair_rep
+        pair_rep = pair_rep.masked_fill(~nlist_mask.unsqueeze(1), float("-inf"))
+
+        for i in range(self.layer_num):
+            atomic_rep, pair_rep, _ = self.evoformer_encoder_layers[i](
+                atomic_rep,
+                attn_bias=pair_rep,
+                nlist_mask=nlist_mask,
+                nlist=nlist,
+                return_attn=True,
+            )
+
+        def norm_loss(x, eps=1e-10, tolerance=1.0):
+            # x = x.float()
+            max_norm = x.shape[-1] ** 0.5
+            norm = torch.sqrt(torch.sum(x**2, dim=-1) + eps)
+            error = F.relu((norm - max_norm).abs() - tolerance)
+            return error
+
+        def masked_mean(mask, value, dim=-1, eps=1e-10):
+            return (
+                torch.sum(mask * value, dim=dim) / (eps + torch.sum(mask, dim=dim))
+            ).mean()
+
+        # atomic_rep shape: [nframes, nloc, feature_dim]
+        # pair_rep shape: [nframes, attn_head, nloc, nnei]
+
+        norm_x = torch.mean(norm_loss(atomic_rep))
+        if self._final_layer_norm:
+            atomic_rep = self.final_layer_norm(atomic_rep)
+
+        delta_pair_rep = pair_rep - input_pair_rep
+        delta_pair_rep = delta_pair_rep.masked_fill(~nlist_mask.unsqueeze(1), 0)
+        # [nframes, nloc, nnei, attn_head]
+        delta_pair_rep = (
+            delta_pair_rep.view(nframes, self.attn_head, nloc, nnei)
+            .permute(0, 2, 3, 1)
+            .contiguous()
+        )
+
+        # [nframes, nloc, nnei]
+        norm_delta_pair_rep = norm_loss(delta_pair_rep)
+        norm_delta_pair_rep = masked_mean(mask=nlist_mask, value=norm_delta_pair_rep)
+        if self._final_head_layer_norm:
+            delta_pair_rep = self.final_head_layer_norm(delta_pair_rep)
+
+        if self.atomic_residual:
+            transformed_atomic_rep = atomic_rep + self.out_proj(atomic_rep)
+        else:
+            transformed_atomic_rep = self.out_proj(atomic_rep)
+
+        if self.evo_residual:
+            transformed_atomic_rep = (
+                self.residual_factor * transformed_atomic_rep + input_atomic_rep
+            ) * (1 / np.sqrt(2))
+
+        return (
+            atomic_rep,
+            transformed_atomic_rep,
+            pair_rep,
+            delta_pair_rep,
+            norm_x,
+            norm_delta_pair_rep,
+        )
+
+
+class Evoformer3bEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        nnei,
+        embedding_dim: int = 768,
+        pair_dim: int = 64,
+        pair_hidden_dim: int = 32,
+        ffn_embedding_dim: int = 3072,
+        num_attention_heads: int = 8,
+        dropout: float = 0.1,
+        droppath_prob: float = 0.0,
+        pair_dropout: float = 0.25,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        pre_ln: bool = True,
+        tri_update: bool = True,
+    ):
+        super().__init__()
+        # Initialize parameters
+        self.nnei = nnei
+        self.embedding_dim = embedding_dim
+        self.num_attention_heads = num_attention_heads
+        self.attention_dropout = attention_dropout
+
+        # self.dropout = dropout
+        self.activation_dropout = activation_dropout
+
+        if droppath_prob > 0.0:
+            self.dropout_module = DropPath(droppath_prob)
+        else:
+            self.dropout_module = Dropout(dropout)
+
+        # self.self_attn = AtomAttentionLocal(embedding_dim, embedding_dim, embedding_dim, pair_dim,
+        #                                     embedding_dim // num_attention_heads, num_attention_heads,
+        #                                     gating=False, dropout=attention_dropout)
+        self.self_attn = AtomAttention(
+            embedding_dim,
+            embedding_dim,
+            embedding_dim,
+            pair_dim,
+            embedding_dim // num_attention_heads,
+            num_attention_heads,
+            gating=False,
+            dropout=attention_dropout,
+        )
+        # layer norm associated with the self attention layer
+        self.pre_ln = pre_ln
+        self.self_attn_layer_norm = nn.LayerNorm(
+            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.fc1 = nn.Linear(
+            self.embedding_dim, ffn_embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.fc2 = nn.Linear(
+            ffn_embedding_dim, self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.final_layer_norm = nn.LayerNorm(
+            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+
+        self.x_layer_norm_opm = nn.LayerNorm(
+            self.embedding_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        # self.opm = OuterProductLocal(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim)
+        self.opm = OuterProduct(self.embedding_dim, pair_dim, d_hid=pair_hidden_dim)
+        # self.pair_layer_norm_opm = nn.LayerNorm(pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        self.pair_layer_norm_ffn = nn.LayerNorm(
+            pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        self.pair_ffn = Transition(
+            pair_dim,
+            1,
+            dropout=activation_dropout,
+        )
+        self.pair_dropout = pair_dropout
+        self.tri_update = tri_update
+        if self.tri_update:
+            self.pair_layer_norm_trimul = nn.LayerNorm(
+                pair_dim, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+            self.pair_tri_mul = TriangleMultiplication(pair_dim, pair_hidden_dim)
+
+    def update_pair(
+        self,
+        x,
+        pair,
+        nlist,
+        op_mask,
+        op_norm,
+    ):
+        # local:
+        # [nframes, nloc, nnei, pair_dim]
+        # global:
+        # [nframes, nloc, nloc, pair_dim]
+        pair = pair + self.dropout_module(
+            self.opm(self.x_layer_norm_opm(x), nlist, op_mask, op_norm)
+        )
+        if not self.pre_ln:
+            pair = self.pair_layer_norm_opm(pair)
+        return x, pair
+
+    def shared_dropout(self, x, shared_dim, dropout):
+        shape = list(x.shape)
+        shape[shared_dim] = 1
+        with torch.no_grad():
+            mask = x.new_ones(shape)
+        return F.dropout(mask, p=dropout, training=self.training) * x
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        pair: torch.Tensor,
+        nlist: torch.Tensor = None,
+        attn_mask: Optional[torch.Tensor] = None,
+        pair_mask: Optional[torch.Tensor] = None,
+        op_mask: float = 1.0,
+        op_norm: float = 1.0,
+    ):
+        """Encoder the atomic and pair representations.
+
+        Args:
+        - x: Atomic representation with shape [ncluster, natoms, embed_dim].
+        - pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
+        - attn_mask: Attention mask with shape [ncluster, head, natoms, natoms].
+        - pair_mask: Neighbor mask with shape [ncluster, natoms, natoms].
+
+        """
+        # [ncluster, natoms, embed_dim]
+        residual = x
+        if self.pre_ln:
+            x = self.self_attn_layer_norm(x)
+        x = self.self_attn(
+            x,
+            x,
+            x,
+            nlist=nlist,
+            pair=pair,
+            mask=attn_mask,
+        )
+        # x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.dropout_module(x)
+        x = residual + x
+        if not self.pre_ln:
+            x = self.self_attn_layer_norm(x)
+
+        residual = x
+        if self.pre_ln:
+            x = self.final_layer_norm(x)
+        x = F.linear(x, self.fc1.weight)
+        # x = fused_ops.bias_torch_gelu(x, self.fc1.bias)
+        x = nn.GELU()(x) + self.fc1.bias
+        x = F.dropout(x, p=self.activation_dropout, training=self.training)
+        x = self.fc2(x)
+        # x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.dropout_module(x)
+
+        x = residual + x
+        if not self.pre_ln:
+            x = self.final_layer_norm(x)
+
+        block = [
+            partial(
+                self.update_pair,
+                nlist=nlist,
+                op_mask=op_mask,
+                op_norm=op_norm,
+            )
+        ]
+
+        x, pair = checkpoint_sequential(
+            block,
+            input_x=(x, pair),
+        )
+
+        if self.tri_update:
+            residual_pair = pair
+            if self.pre_ln:
+                pair = self.pair_layer_norm_trimul(pair)
+
+            pair = self.shared_dropout(
+                self.pair_tri_mul(pair, pair_mask), -3, self.pair_dropout
+            )
+            pair = residual_pair + pair
+            if not self.pre_ln:
+                pair = self.pair_layer_norm_trimul(pair)
+
+        residual_pair = pair
+        if self.pre_ln:
+            pair = self.pair_layer_norm_ffn(pair)
+        pair = self.dropout_module(self.pair_ffn(pair))
+        pair = residual_pair + pair
+        if not self.pre_ln:
+            pair = self.pair_layer_norm_ffn(pair)
+        return x, pair
+
+
+class Evoformer3bEncoder(nn.Module):
+    def __init__(
+        self,
+        nnei,
+        layer_num=6,
+        attn_head=8,
+        atomic_dim=768,
+        pair_dim=64,
+        pair_hidden_dim=32,
+        ffn_embedding_dim=3072,
+        dropout: float = 0.1,
+        droppath_prob: float = 0.0,
+        pair_dropout: float = 0.25,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        pre_ln: bool = True,
+        tri_update: bool = True,
+        **kwargs,
+    ):
+        super().__init__()
+        self.nnei = nnei
+        if droppath_prob > 0:
+            droppath_probs = [
+                x.item() for x in torch.linspace(0, droppath_prob, layer_num)
+            ]
+        else:
+            droppath_probs = None
+
+        self.layers = nn.ModuleList(
+            [
+                Evoformer3bEncoderLayer(
+                    nnei,
+                    atomic_dim,
+                    pair_dim,
+                    pair_hidden_dim,
+                    ffn_embedding_dim,
+                    num_attention_heads=attn_head,
+                    dropout=dropout,
+                    droppath_prob=droppath_probs[_],
+                    pair_dropout=pair_dropout,
+                    attention_dropout=attention_dropout,
+                    activation_dropout=activation_dropout,
+                    pre_ln=pre_ln,
+                    tri_update=tri_update,
+                )
+                for _ in range(layer_num)
+            ]
+        )
+
+    def forward(self, x, pair, attn_mask=None, pair_mask=None, atom_mask=None):
+        """Encoder the atomic and pair representations.
+
+        Args:
+            x: Atomic representation with shape [ncluster, natoms, atomic_dim].
+            pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
+            attn_mask: Attention mask (with -inf for softmax) with shape [ncluster, head, natoms, natoms].
+            pair_mask: Pair mask (with 1 for real atom pair and 0 for padding) with shape [ncluster, natoms, natoms].
+            atom_mask: Atom mask (with 1 for real atom and 0 for padding) with shape [ncluster, natoms].
+
+        Returns
+        -------
+        x: Atomic representation with shape [ncluster, natoms, atomic_dim].
+        pair: Pair representation with shape [ncluster, natoms, natoms, pair_dim].
+
+        """
+        # [ncluster, natoms, 1]
+        op_mask = atom_mask.unsqueeze(-1)
+        op_mask = op_mask * (op_mask.size(-2) ** -0.5)
+        eps = 1e-3
+        # [ncluster, natoms, natoms, 1]
+        op_norm = 1.0 / (eps + torch.einsum("...bc,...dc->...bdc", op_mask, op_mask))
+        for layer in self.layers:
+            x, pair = layer(
+                x,
+                pair,
+                nlist=None,
+                attn_mask=attn_mask,
+                pair_mask=pair_mask,
+                op_mask=op_mask,
+                op_norm=op_norm,
+            )
+        return x, pair
diff --git a/deepmd/pt/model/task/__init__.py b/deepmd/pt/model/task/__init__.py
new file mode 100644
index 0000000000..9430ede766
--- /dev/null
+++ b/deepmd/pt/model/task/__init__.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .atten_lcc import (
+    FittingNetAttenLcc,
+)
+from .base_fitting import (
+    BaseFitting,
+)
+from .denoise import (
+    DenoiseNet,
+)
+from .dipole import (
+    DipoleFittingNet,
+)
+from .ener import (
+    EnergyFittingNet,
+    EnergyFittingNetDirect,
+)
+from .fitting import (
+    Fitting,
+)
+from .polarizability import (
+    PolarFittingNet,
+)
+from .type_predict import (
+    TypePredictNet,
+)
+
+__all__ = [
+    "FittingNetAttenLcc",
+    "DenoiseNet",
+    "DipoleFittingNet",
+    "EnergyFittingNet",
+    "EnergyFittingNetDirect",
+    "Fitting",
+    "BaseFitting",
+    "TypePredictNet",
+    "PolarFittingNet",
+]
diff --git a/deepmd/pt/model/task/atten_lcc.py b/deepmd/pt/model/task/atten_lcc.py
new file mode 100644
index 0000000000..e5961335ec
--- /dev/null
+++ b/deepmd/pt/model/task/atten_lcc.py
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+import torch.nn as nn
+
+from deepmd.pt.model.network.network import (
+    EnergyHead,
+    NodeTaskHead,
+)
+from deepmd.pt.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+
+class FittingNetAttenLcc(Fitting):
+    def __init__(
+        self, embedding_width, bias_atom_e, pair_embed_dim, attention_heads, **kwargs
+    ):
+        super().__init__()
+        self.embedding_width = embedding_width
+        self.engergy_proj = EnergyHead(self.embedding_width, 1)
+        self.energe_agg_factor = nn.Embedding(4, 1, dtype=env.GLOBAL_PT_FLOAT_PRECISION)
+        nn.init.normal_(self.energe_agg_factor.weight, 0, 0.01)
+        bias_atom_e = torch.tensor(bias_atom_e)
+        self.register_buffer("bias_atom_e", bias_atom_e)
+        self.pair_embed_dim = pair_embed_dim
+        self.attention_heads = attention_heads
+        self.node_proc = NodeTaskHead(
+            self.embedding_width, self.pair_embed_dim, self.attention_heads
+        )
+        self.node_proc.zero_init()
+
+    def forward(self, output, pair, delta_pos, atype, nframes, nloc):
+        # [nframes x nloc x tebd_dim]
+        output_nloc = (output[:, 0, :]).reshape(nframes, nloc, self.embedding_width)
+        # Optional: GRRG or mean of gbf TODO
+
+        # energy outut
+        # [nframes, nloc]
+        energy_out = self.engergy_proj(output_nloc).view(nframes, nloc)
+        # [nframes, nloc]
+        energy_factor = self.energe_agg_factor(torch.zeros_like(atype)).view(
+            nframes, nloc
+        )
+        energy_out = (energy_out * energy_factor) + self.bias_atom_e[atype]
+        energy_out = energy_out.sum(dim=-1)
+
+        # vector output
+        # predict_force: [(nframes x nloc) x (1 + nnei2) x 3]
+        predict_force = self.node_proc(output, pair, delta_pos=delta_pos)
+        # predict_force_nloc: [nframes x nloc x 3]
+        predict_force_nloc = (predict_force[:, 0, :]).reshape(nframes, nloc, 3)
+        return energy_out, predict_force_nloc
diff --git a/deepmd/pt/model/task/base_fitting.py b/deepmd/pt/model/task/base_fitting.py
new file mode 100644
index 0000000000..884a1bfe57
--- /dev/null
+++ b/deepmd/pt/model/task/base_fitting.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+
+from deepmd.dpmodel.fitting import (
+    make_base_fitting,
+)
+
+BaseFitting = make_base_fitting(torch.Tensor, fwd_method_name="forward")
diff --git a/deepmd/pt/model/task/denoise.py b/deepmd/pt/model/task/denoise.py
new file mode 100644
index 0000000000..5f1e780de3
--- /dev/null
+++ b/deepmd/pt/model/task/denoise.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pt.model.network.network import (
+    MaskLMHead,
+    NonLinearHead,
+)
+from deepmd.pt.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+
+@fitting_check_output
+class DenoiseNet(Fitting):
+    def __init__(
+        self,
+        feature_dim,
+        ntypes,
+        attn_head=8,
+        prefactor=[0.5, 0.5],
+        activation_function="gelu",
+        **kwargs,
+    ):
+        """Construct a denoise net.
+
+        Args:
+        - ntypes: Element count.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the fitting net.
+        - bias_atom_e: Average enery per atom for each element.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.ntypes = ntypes
+        self.attn_head = attn_head
+        self.prefactor = torch.tensor(
+            prefactor, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
+
+        self.lm_head = MaskLMHead(
+            embed_dim=self.feature_dim,
+            output_dim=ntypes,
+            activation_fn=activation_function,
+            weight=None,
+        )
+
+        if not isinstance(self.attn_head, list):
+            self.pair2coord_proj = NonLinearHead(
+                self.attn_head, 1, activation_fn=activation_function
+            )
+        else:
+            self.pair2coord_proj = []
+            self.ndescriptor = len(self.attn_head)
+            for ii in range(self.ndescriptor):
+                _pair2coord_proj = NonLinearHead(
+                    self.attn_head[ii], 1, activation_fn=activation_function
+                )
+                self.pair2coord_proj.append(_pair2coord_proj)
+            self.pair2coord_proj = torch.nn.ModuleList(self.pair2coord_proj)
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "updated_coord",
+                    [3],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+                OutputVariableDef(
+                    "logits",
+                    [-1],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        pair_weights,
+        diff,
+        nlist_mask,
+        features,
+        sw,
+        masked_tokens: Optional[torch.Tensor] = None,
+    ):
+        """Calculate the updated coord.
+        Args:
+        - coord: Input noisy coord with shape [nframes, nloc, 3].
+        - pair_weights: Input pair weights with shape [nframes, nloc, nnei, head].
+        - diff: Input pair relative coord list with shape [nframes, nloc, nnei, 3].
+        - nlist_mask: Input nlist mask with shape [nframes, nloc, nnei].
+
+        Returns
+        -------
+        - denoised_coord: Denoised updated coord with shape [nframes, nloc, 3].
+        """
+        # [nframes, nloc, nnei, 1]
+        logits = self.lm_head(features, masked_tokens=masked_tokens)
+        if not isinstance(self.attn_head, list):
+            attn_probs = self.pair2coord_proj(pair_weights)
+            out_coord = (attn_probs * diff).sum(dim=-2) / (
+                sw.sum(dim=-1).unsqueeze(-1) + 1e-6
+            )
+        else:
+            assert len(self.prefactor) == self.ndescriptor
+            all_coord_update = []
+            assert len(pair_weights) == len(diff) == len(nlist_mask) == self.ndescriptor
+            for ii in range(self.ndescriptor):
+                _attn_probs = self.pair2coord_proj[ii](pair_weights[ii])
+                _coord_update = (_attn_probs * diff[ii]).sum(dim=-2) / (
+                    nlist_mask[ii].sum(dim=-1).unsqueeze(-1) + 1e-6
+                )
+                all_coord_update.append(_coord_update)
+            out_coord = self.prefactor[0] * all_coord_update[0]
+            for ii in range(self.ndescriptor - 1):
+                out_coord += self.prefactor[ii + 1] * all_coord_update[ii + 1]
+        return {
+            "updated_coord": out_coord,
+            "logits": logits,
+        }
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
new file mode 100644
index 0000000000..ca445c8588
--- /dev/null
+++ b/deepmd/pt/model/task/dipole.py
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("dipole")
+class DipoleFittingNet(GeneralFitting):
+    """Construct a dipole fitting net.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'dipole'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    r_differentiable
+        If the variable is differentiated with respect to coordinates of atoms.
+        Only reduciable variable are differentiable.
+    c_differentiable
+        If the variable is differentiated with respect to the cell tensor (pbc case).
+        Only reduciable variable are differentiable.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: List[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[int] = None,
+        exclude_types: List[int] = [],
+        r_differentiable: bool = True,
+        c_differentiable: bool = True,
+        **kwargs,
+    ):
+        self.embedding_width = embedding_width
+        self.r_differentiable = r_differentiable
+        self.c_differentiable = c_differentiable
+        super().__init__(
+            var_name=kwargs.pop("var_name", "dipole"),
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            **kwargs,
+        )
+        self.old_impl = False  # this only supports the new implementation.
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.embedding_width
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "dipole"
+        data["embedding_width"] = self.embedding_width
+        data["old_impl"] = self.old_impl
+        data["r_differentiable"] = self.r_differentiable
+        data["c_differentiable"] = self.c_differentiable
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [3],
+                    reduciable=True,
+                    r_differentiable=self.r_differentiable,
+                    c_differentiable=self.c_differentiable,
+                ),
+            ]
+        )
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        pass
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        nframes, nloc, _ = descriptor.shape
+        assert gr is not None, "Must provide the rotation matrix for dipole fitting."
+        # (nframes, nloc, m1)
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        # (nframes * nloc, 1, m1)
+        out = out.view(-1, 1, self.embedding_width)
+        # (nframes * nloc, m1, 3)
+        gr = gr.view(nframes * nloc, -1, 3)
+        # (nframes, nloc, 3)
+        out = torch.bmm(out, gr).squeeze(-2).view(nframes, nloc, 3)
+        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py
new file mode 100644
index 0000000000..196872d17c
--- /dev/null
+++ b/deepmd/pt/model/task/dos.py
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.model.task.fitting import (
+    Fitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_atomic,
+    compute_stats_from_redu,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("dos")
+class DOSFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        numb_dos: int = 300,
+        neuron: List[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        bias_dos: Optional[torch.Tensor] = None,
+        trainable: Union[bool, List[bool]] = True,
+        seed: Optional[int] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        exclude_types: List[int] = [],
+        mixed_types: bool = True,
+    ):
+        if bias_dos is not None:
+            self.bias_dos = bias_dos
+        else:
+            self.bias_dos = torch.zeros(
+                (ntypes, numb_dos), dtype=dtype, device=env.DEVICE
+            )
+        super().__init__(
+            var_name="dos",
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            dim_out=numb_dos,
+            neuron=neuron,
+            bias_atom_e=bias_dos,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            trainable=trainable,
+        )
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ) -> None:
+        """
+        Compute the output statistics (e.g. dos bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        if stat_file_path is not None:
+            stat_file_path = stat_file_path / "bias_dos"
+        if stat_file_path is not None and stat_file_path.is_file():
+            bias_dos = stat_file_path.load_numpy()
+        else:
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+            for sys in range(len(sampled)):
+                nframs = sampled[sys]["atype"].shape[0]
+
+                if "atom_dos" in sampled[sys]:
+                    bias_dos = compute_stats_from_atomic(
+                        sampled[sys]["atom_dos"].numpy(force=True),
+                        sampled[sys]["atype"].numpy(force=True),
+                    )[0]
+                else:
+                    sys_type_count = np.zeros(
+                        (nframs, self.ntypes), dtype=env.GLOBAL_NP_FLOAT_PRECISION
+                    )
+                    for itype in range(self.ntypes):
+                        type_mask = sampled[sys]["atype"] == itype
+                        sys_type_count[:, itype] = type_mask.sum(dim=1).numpy(
+                            force=True
+                        )
+                    sys_bias_redu = sampled[sys]["dos"].numpy(force=True)
+
+                    bias_dos = compute_stats_from_redu(
+                        sys_bias_redu, sys_type_count, rcond=self.rcond
+                    )[0]
+                if stat_file_path is not None:
+                    stat_file_path.save_numpy(bias_dos)
+        self.bias_dos = torch.tensor(bias_dos, device=env.DEVICE)
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DOSFittingNet":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("var_name", None)
+        data.pop("tot_ener_zero", None)
+        data.pop("layer_name", None)
+        data.pop("use_aparam_as_mask", None)
+        data.pop("spin", None)
+        data.pop("atom_ener", None)
+        data["numb_dos"] = data.pop("dim_out")
+        obj = super().deserialize(data)
+
+        return obj
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        # dd = super(InvarFitting, self).serialize()
+        dd = {
+            **InvarFitting.serialize(self),
+            "type": "dos",
+            "dim_out": self.dim_out,
+        }
+        dd["@variables"]["bias_atom_e"] = to_numpy_array(self.bias_atom_e)
+
+        return dd
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
new file mode 100644
index 0000000000..12c0917dd2
--- /dev/null
+++ b/deepmd/pt/model/task/ener.py
@@ -0,0 +1,247 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pt.model.network.network import (
+    ResidualDeep,
+)
+from deepmd.pt.model.task.fitting import (
+    Fitting,
+    GeneralFitting,
+)
+from deepmd.pt.model.task.invar_fitting import (
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@Fitting.register("ener")
+class EnergyFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        **kwargs,
+    ):
+        super().__init__(
+            "energy",
+            ntypes,
+            dim_descrpt,
+            1,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            **kwargs,
+        )
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("var_name")
+        data.pop("dim_out")
+        return super().deserialize(data)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            **super().serialize(),
+            "type": "ener",
+        }
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
+
+
+@Fitting.register("direct_force")
+@Fitting.register("direct_force_ener")
+@fitting_check_output
+class EnergyFittingNetDirect(Fitting):
+    def __init__(
+        self,
+        ntypes,
+        dim_descrpt,
+        neuron,
+        bias_atom_e=None,
+        out_dim=1,
+        resnet_dt=True,
+        use_tebd=True,
+        return_energy=False,
+        **kwargs,
+    ):
+        """Construct a fitting net for energy.
+
+        Args:
+        - ntypes: Element count.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the fitting net.
+        - bias_atom_e: Average enery per atom for each element.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        super().__init__()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.use_tebd = use_tebd
+        self.out_dim = out_dim
+        if bias_atom_e is None:
+            bias_atom_e = np.zeros([self.ntypes])
+        if not use_tebd:
+            assert self.ntypes == len(bias_atom_e), "Element count mismatches!"
+        bias_atom_e = torch.tensor(bias_atom_e, device=env.DEVICE)
+        self.register_buffer("bias_atom_e", bias_atom_e)
+
+        filter_layers_dipole = []
+        for type_i in range(self.ntypes):
+            one = ResidualDeep(
+                type_i,
+                dim_descrpt,
+                neuron,
+                0.0,
+                out_dim=out_dim,
+                resnet_dt=resnet_dt,
+            )
+            filter_layers_dipole.append(one)
+        self.filter_layers_dipole = torch.nn.ModuleList(filter_layers_dipole)
+
+        self.return_energy = return_energy
+        filter_layers = []
+        if self.return_energy:
+            for type_i in range(self.ntypes):
+                bias_type = 0.0 if self.use_tebd else bias_atom_e[type_i]
+                one = ResidualDeep(
+                    type_i, dim_descrpt, neuron, bias_type, resnet_dt=resnet_dt
+                )
+                filter_layers.append(one)
+        self.filter_layers = torch.nn.ModuleList(filter_layers)
+
+        if "seed" in kwargs:
+            torch.manual_seed(kwargs["seed"])
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+                OutputVariableDef(
+                    "dforce",
+                    [3],
+                    reduciable=False,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    def deserialize(cls) -> "EnergyFittingNetDirect":
+        raise NotImplementedError
+
+    def forward(
+        self,
+        inputs: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, None]:
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        nframes, nloc, _ = inputs.size()
+        if self.use_tebd:
+            # if atype_tebd is not None:
+            #     inputs = torch.concat([inputs, atype_tebd], dim=-1)
+            vec_out = self.filter_layers_dipole[0](
+                inputs
+            )  # Shape is [nframes, nloc, m1]
+            assert list(vec_out.size()) == [nframes, nloc, self.out_dim]
+            # (nf x nloc) x 1 x od
+            vec_out = vec_out.view(-1, 1, self.out_dim)
+            assert gr is not None
+            # (nf x nloc) x od x 3
+            gr = gr.view(-1, self.out_dim, 3)
+            vec_out = (
+                torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3)
+            )  # Shape is [nframes, nloc, 3]
+        else:
+            vec_out = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
+            for type_i, filter_layer in enumerate(self.filter_layers_dipole):
+                mask = atype == type_i
+                vec_out_type = filter_layer(inputs)  # Shape is [nframes, nloc, m1]
+                vec_out_type = vec_out_type * mask.unsqueeze(-1)
+                vec_out = vec_out + vec_out_type  # Shape is [nframes, natoms[0], 1]
+
+        outs = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
+        if self.return_energy:
+            if self.use_tebd:
+                atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[
+                    atype
+                ].unsqueeze(-1)
+                outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+            else:
+                for type_i, filter_layer in enumerate(self.filter_layers):
+                    mask = atype == type_i
+                    atom_energy = filter_layer(inputs)
+                    if not env.ENERGY_BIAS_TRAINABLE:
+                        atom_energy = atom_energy + self.bias_atom_e[type_i]
+                    atom_energy = atom_energy * mask.unsqueeze(-1)
+                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+        return {
+            "energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION),
+            "dforce": vec_out,
+        }
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
new file mode 100644
index 0000000000..00579b957f
--- /dev/null
+++ b/deepmd/pt/model/task/fitting.py
@@ -0,0 +1,498 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from abc import (
+    abstractmethod,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.network.mlp import (
+    FittingNet,
+    NetworkCollection,
+)
+from deepmd.pt.model.network.network import (
+    ResidualDeep,
+)
+from deepmd.pt.model.task.base_fitting import (
+    BaseFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pt.utils.exclude_mask import (
+    AtomExcludeMask,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+class Fitting(torch.nn.Module, BaseFitting):
+    # plugin moved to BaseFitting
+
+    def __new__(cls, *args, **kwargs):
+        if cls is Fitting:
+            return BaseFitting.__new__(BaseFitting, *args, **kwargs)
+        return super().__new__(cls)
+
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only fitting nets of the same type can share params!"
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "bias_atom_e"):
+                self.bias_atom_e = base_class.bias_atom_e
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        elif shared_level == 1:
+            # only not share the bias_atom_e
+            # the following will successfully link all the params except buffers, which need manually link.
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        else:
+            raise NotImplementedError
+
+
+class GeneralFitting(Fitting):
+    """Construct a general fitting net.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'energy', 'dipole', and 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    dim_out : int
+        The output dimension of the fitting net.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_e : torch.Tensor, optional
+        Average enery per atom for each element.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    exclude_types: List[int]
+        Atomic contributions of the excluded atom types are set zero.
+    trainable : Union[List[bool], bool]
+        If the parameters in the fitting net are trainable.
+        Now this only supports setting all the parameters in the fitting net at one state.
+        When in List[bool], the trainable will be True only if all the boolean parameters are True.
+    remove_vaccum_contribution: List[bool], optional
+        Remove vaccum contribution before the bias is added. The list assigned each
+        type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
+        length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list.
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[int] = None,
+        exclude_types: List[int] = [],
+        trainable: Union[bool, List[bool]] = True,
+        remove_vaccum_contribution: Optional[List[bool]] = None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.var_name = var_name
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.neuron = neuron
+        self.mixed_types = mixed_types
+        self.resnet_dt = resnet_dt
+        self.numb_fparam = numb_fparam
+        self.numb_aparam = numb_aparam
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.rcond = rcond
+        # order matters, should be place after the assignment of ntypes
+        self.reinit_exclude(exclude_types)
+        self.trainable = trainable
+        # need support for each layer settings
+        self.trainable = (
+            all(self.trainable) if isinstance(self.trainable, list) else self.trainable
+        )
+        self.remove_vaccum_contribution = remove_vaccum_contribution
+
+        net_dim_out = self._net_out_dim()
+        # init constants
+        if bias_atom_e is None:
+            bias_atom_e = np.zeros([self.ntypes, net_dim_out], dtype=np.float64)
+        bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device)
+        bias_atom_e = bias_atom_e.view([self.ntypes, net_dim_out])
+        if not self.mixed_types:
+            assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!"
+        self.register_buffer("bias_atom_e", bias_atom_e)
+
+        if self.numb_fparam > 0:
+            self.register_buffer(
+                "fparam_avg",
+                torch.zeros(self.numb_fparam, dtype=self.prec, device=device),
+            )
+            self.register_buffer(
+                "fparam_inv_std",
+                torch.ones(self.numb_fparam, dtype=self.prec, device=device),
+            )
+        else:
+            self.fparam_avg, self.fparam_inv_std = None, None
+        if self.numb_aparam > 0:
+            self.register_buffer(
+                "aparam_avg",
+                torch.zeros(self.numb_aparam, dtype=self.prec, device=device),
+            )
+            self.register_buffer(
+                "aparam_inv_std",
+                torch.ones(self.numb_aparam, dtype=self.prec, device=device),
+            )
+        else:
+            self.aparam_avg, self.aparam_inv_std = None, None
+
+        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+
+        self.old_impl = kwargs.get("old_impl", False)
+        if self.old_impl:
+            filter_layers = []
+            for type_i in range(self.ntypes if not self.mixed_types else 1):
+                bias_type = 0.0
+                one = ResidualDeep(
+                    type_i,
+                    self.dim_descrpt,
+                    self.neuron,
+                    bias_type,
+                    resnet_dt=self.resnet_dt,
+                )
+                filter_layers.append(one)
+            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
+            self.filter_layers = None
+        else:
+            self.filter_layers = NetworkCollection(
+                1 if not self.mixed_types else 0,
+                self.ntypes,
+                network_type="fitting_network",
+                networks=[
+                    FittingNet(
+                        in_dim,
+                        net_dim_out,
+                        self.neuron,
+                        self.activation_function,
+                        self.resnet_dt,
+                        self.precision,
+                        bias_out=True,
+                    )
+                    for ii in range(self.ntypes if not self.mixed_types else 1)
+                ],
+            )
+            self.filter_layers_old = None
+
+        if seed is not None:
+            torch.manual_seed(seed)
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = self.trainable
+
+    def reinit_exclude(
+        self,
+        exclude_types: List[int] = [],
+    ):
+        self.exclude_types = exclude_types
+        self.emask = AtomExcludeMask(self.ntypes, self.exclude_types)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            "@class": "Fitting",
+            "@version": 1,
+            "var_name": self.var_name,
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "mixed_types": self.mixed_types,
+            "nets": self.filter_layers.serialize(),
+            "rcond": self.rcond,
+            "exclude_types": self.exclude_types,
+            "@variables": {
+                "bias_atom_e": to_numpy_array(self.bias_atom_e),
+                "fparam_avg": to_numpy_array(self.fparam_avg),
+                "fparam_inv_std": to_numpy_array(self.fparam_inv_std),
+                "aparam_avg": to_numpy_array(self.aparam_avg),
+                "aparam_inv_std": to_numpy_array(self.aparam_inv_std),
+            },
+            # "tot_ener_zero": self.tot_ener_zero ,
+            # "trainable": self.trainable ,
+            # "atom_ener": self.atom_ener ,
+            # "layer_name": self.layer_name ,
+            # "use_aparam_as_mask": self.use_aparam_as_mask ,
+            # "spin": self.spin ,
+            ## NOTICE:  not supported by far
+            "tot_ener_zero": False,
+            "trainable": [self.trainable] * (len(self.neuron) + 1),
+            "layer_name": None,
+            "use_aparam_as_mask": False,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables")
+        nets = data.pop("nets")
+        obj = cls(**data)
+        for kk in variables.keys():
+            obj[kk] = to_torch_tensor(variables[kk])
+        obj.filter_layers = NetworkCollection.deserialize(nets)
+        return obj
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.numb_fparam
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.numb_aparam
+
+    # make jit happy
+    exclude_types: List[int]
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        # make jit happy
+        sel_type: List[int] = []
+        for ii in range(self.ntypes):
+            if ii not in self.exclude_types:
+                sel_type.append(ii)
+        return sel_type
+
+    def __setitem__(self, key, value):
+        if key in ["bias_atom_e"]:
+            value = value.view([self.ntypes, self._net_out_dim()])
+            self.bias_atom_e = value
+        elif key in ["fparam_avg"]:
+            self.fparam_avg = value
+        elif key in ["fparam_inv_std"]:
+            self.fparam_inv_std = value
+        elif key in ["aparam_avg"]:
+            self.aparam_avg = value
+        elif key in ["aparam_inv_std"]:
+            self.aparam_inv_std = value
+        elif key in ["scale"]:
+            self.scale = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["bias_atom_e"]:
+            return self.bias_atom_e
+        elif key in ["fparam_avg"]:
+            return self.fparam_avg
+        elif key in ["fparam_inv_std"]:
+            return self.fparam_inv_std
+        elif key in ["aparam_avg"]:
+            return self.aparam_avg
+        elif key in ["aparam_inv_std"]:
+            return self.aparam_inv_std
+        elif key in ["scale"]:
+            return self.scale
+        else:
+            raise KeyError(key)
+
+    @abstractmethod
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        pass
+
+    def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor:
+        return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1])
+
+    def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor:
+        return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1])
+
+    def _forward_common(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        xx = descriptor
+        if self.remove_vaccum_contribution is not None:
+            # TODO: compute the input for vaccm when remove_vaccum_contribution is set
+            # Idealy, the input for vaccum should be computed;
+            # we consider it as always zero for convenience.
+            # Needs a compute_input_stats for vaccum passed from the
+            # descriptor.
+            xx_zeros = torch.zeros_like(xx)
+        else:
+            xx_zeros = None
+        nf, nloc, nd = xx.shape
+        net_dim_out = self._net_out_dim()
+
+        if nd != self.dim_descrpt:
+            raise ValueError(
+                "get an input descriptor of dim {nd},"
+                "which is not consistent with {self.dim_descrpt}."
+            )
+        # check fparam dim, concate to input descriptor
+        if self.numb_fparam > 0:
+            assert fparam is not None, "fparam should not be None"
+            assert self.fparam_avg is not None
+            assert self.fparam_inv_std is not None
+            if fparam.shape[-1] != self.numb_fparam:
+                raise ValueError(
+                    "get an input fparam of dim {fparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_fparam}.",
+                )
+            fparam = fparam.view([nf, self.numb_fparam])
+            nb, _ = fparam.shape
+            t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb)
+            t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb)
+            fparam = (fparam - t_fparam_avg) * t_fparam_inv_std
+            fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
+            xx = torch.cat(
+                [xx, fparam],
+                dim=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = torch.cat(
+                    [xx_zeros, fparam],
+                    dim=-1,
+                )
+        # check aparam dim, concate to input descriptor
+        if self.numb_aparam > 0:
+            assert aparam is not None, "aparam should not be None"
+            assert self.aparam_avg is not None
+            assert self.aparam_inv_std is not None
+            if aparam.shape[-1] != self.numb_aparam:
+                raise ValueError(
+                    f"get an input aparam of dim {aparam.shape[-1]}, ",
+                    f"which is not consistent with {self.numb_aparam}.",
+                )
+            aparam = aparam.view([nf, -1, self.numb_aparam])
+            nb, nloc, _ = aparam.shape
+            t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc)
+            t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc)
+            aparam = (aparam - t_aparam_avg) * t_aparam_inv_std
+            xx = torch.cat(
+                [xx, aparam],
+                dim=-1,
+            )
+            if xx_zeros is not None:
+                xx_zeros = torch.cat(
+                    [xx_zeros, aparam],
+                    dim=-1,
+                )
+
+        outs = torch.zeros(
+            (nf, nloc, net_dim_out),
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=descriptor.device,
+        )  # jit assertion
+        if self.old_impl:
+            assert self.filter_layers_old is not None
+            assert xx_zeros is None
+            if self.mixed_types:
+                atom_property = self.filter_layers_old[0](xx) + self.bias_atom_e[atype]
+                outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
+            else:
+                for type_i, filter_layer in enumerate(self.filter_layers_old):
+                    mask = atype == type_i
+                    atom_property = filter_layer(xx)
+                    atom_property = atom_property + self.bias_atom_e[type_i]
+                    atom_property = atom_property * mask.unsqueeze(-1)
+                    outs = outs + atom_property  # Shape is [nframes, natoms[0], 1]
+        else:
+            if self.mixed_types:
+                atom_property = (
+                    self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
+                )
+                if xx_zeros is not None:
+                    atom_property -= self.filter_layers.networks[0](xx_zeros)
+                outs = (
+                    outs + atom_property
+                )  # Shape is [nframes, natoms[0], net_dim_out]
+            else:
+                for type_i, ll in enumerate(self.filter_layers.networks):
+                    mask = (atype == type_i).unsqueeze(-1)
+                    mask = torch.tile(mask, (1, 1, net_dim_out))
+                    atom_property = ll(xx)
+                    if xx_zeros is not None:
+                        # must assert, otherwise jit is not happy
+                        assert self.remove_vaccum_contribution is not None
+                        if not (
+                            len(self.remove_vaccum_contribution) > type_i
+                            and not self.remove_vaccum_contribution[type_i]
+                        ):
+                            atom_property -= ll(xx_zeros)
+                    atom_property = atom_property + self.bias_atom_e[type_i]
+                    atom_property = atom_property * mask
+                    outs = (
+                        outs + atom_property
+                    )  # Shape is [nframes, natoms[0], net_dim_out]
+        # nf x nloc
+        mask = self.emask(atype)
+        # nf x nloc x nod
+        outs = outs * mask[:, :, None]
+        return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
new file mode 100644
index 0000000000..585f697193
--- /dev/null
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+from deepmd.pt.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("invar")
+@fitting_check_output
+class InvarFitting(GeneralFitting):
+    """Construct a fitting net for energy.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'energy', 'dipole', and 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    dim_out : int
+        The output dimension of the fitting net.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    bias_atom_e : torch.Tensor, optional
+        Average enery per atom for each element.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    exclude_types: List[int]
+        Atomic contributions of the excluded atom types are set zero.
+    atom_ener: List[float], optional
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[int] = None,
+        exclude_types: List[int] = [],
+        atom_ener: Optional[List[float]] = None,
+        **kwargs,
+    ):
+        self.dim_out = dim_out
+        self.atom_ener = atom_ener
+        super().__init__(
+            var_name=var_name,
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            remove_vaccum_contribution=None
+            if atom_ener is None or len([x for x in atom_ener if x is not None]) == 0
+            else [x is not None for x in atom_ener],
+            **kwargs,
+        )
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return self.dim_out
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "invar"
+        data["dim_out"] = self.dim_out
+        data["atom_ener"] = self.atom_ener
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        return super().deserialize(data)
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        bias_atom_e = compute_output_stats(
+            merged,
+            self.ntypes,
+            keys=["energy"],
+            stat_file_path=stat_file_path,
+            rcond=self.rcond,
+            atom_ener=self.atom_ener,
+        )["energy"]
+        self.bias_atom_e.copy_(bias_atom_e.view([self.ntypes, self.dim_out]))
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reduciable=True,
+                    r_differentiable=True,
+                    c_differentiable=True,
+                ),
+            ]
+        )
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        return self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
new file mode 100644
index 0000000000..544d23555c
--- /dev/null
+++ b/deepmd/pt/model/task/polarizability.py
@@ -0,0 +1,321 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
+from deepmd.pt.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_atomic,
+    compute_stats_from_redu,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+log = logging.getLogger(__name__)
+
+
+@GeneralFitting.register("polar")
+class PolarFittingNet(GeneralFitting):
+    """Construct a polar fitting net.
+
+    Parameters
+    ----------
+    var_name : str
+        The atomic property to fit, 'polar'.
+    ntypes : int
+        Element count.
+    dim_descrpt : int
+        Embedding width per atom.
+    embedding_width : int
+        The dimension of rotation matrix, m1.
+    neuron : List[int]
+        Number of neurons in each hidden layers of the fitting net.
+    resnet_dt : bool
+        Using time-step in the ResNet construction.
+    numb_fparam : int
+        Number of frame parameters.
+    numb_aparam : int
+        Number of atomic parameters.
+    activation_function : str
+        Activation function.
+    precision : str
+        Numerical precision.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    seed : int, optional
+        Random seed.
+    fit_diag : bool
+        Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to
+        normal polarizability matrix by contracting with the rotation matrix.
+    scale : List[float]
+        The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
+    shift_diag : bool
+        Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
+        neuron: List[int] = [128, 128, 128],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        rcond: Optional[float] = None,
+        seed: Optional[int] = None,
+        exclude_types: List[int] = [],
+        fit_diag: bool = True,
+        scale: Optional[Union[List[float], float]] = None,
+        shift_diag: bool = True,
+        **kwargs,
+    ):
+        self.embedding_width = embedding_width
+        self.fit_diag = fit_diag
+        self.scale = scale
+        if self.scale is None:
+            self.scale = [1.0 for _ in range(ntypes)]
+        else:
+            if isinstance(self.scale, list):
+                assert (
+                    len(self.scale) == ntypes
+                ), "Scale should be a list of length ntypes."
+            elif isinstance(self.scale, float):
+                self.scale = [self.scale for _ in range(ntypes)]
+            else:
+                raise ValueError(
+                    "Scale must be a list of float of length ntypes or a float."
+                )
+        self.scale = torch.tensor(
+            self.scale, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        ).view(ntypes, 1)
+        self.shift_diag = shift_diag
+        self.constant_matrix = torch.zeros(
+            ntypes, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        )
+        super().__init__(
+            var_name=kwargs.pop("var_name", "polar"),
+            ntypes=ntypes,
+            dim_descrpt=dim_descrpt,
+            neuron=neuron,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            rcond=rcond,
+            seed=seed,
+            exclude_types=exclude_types,
+            **kwargs,
+        )
+        self.old_impl = False  # this only supports the new implementation.
+
+    def _net_out_dim(self):
+        """Set the FittingNet output dim."""
+        return (
+            self.embedding_width
+            if self.fit_diag
+            else self.embedding_width * self.embedding_width
+        )
+
+    def __setitem__(self, key, value):
+        if key in ["constant_matrix"]:
+            self.constant_matrix = value
+        else:
+            super().__setitem__(key, value)
+
+    def __getitem__(self, key):
+        if key in ["constant_matrix"]:
+            return self.constant_matrix
+        else:
+            return super().__getitem__(key)
+
+    def serialize(self) -> dict:
+        data = super().serialize()
+        data["type"] = "polar"
+        data["@version"] = 2
+        data["embedding_width"] = self.embedding_width
+        data["old_impl"] = self.old_impl
+        data["fit_diag"] = self.fit_diag
+        data["shift_diag"] = self.shift_diag
+        data["@variables"]["scale"] = to_numpy_array(self.scale)
+        data["@variables"]["constant_matrix"] = to_numpy_array(self.constant_matrix)
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "GeneralFitting":
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
+        return super().deserialize(data)
+
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [3, 3],
+                    reduciable=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                ),
+            ]
+        )
+
+    def compute_output_stats(
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ) -> None:
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        if self.shift_diag:
+            if stat_file_path is not None:
+                stat_file_path = stat_file_path / "constant_matrix"
+            if stat_file_path is not None and stat_file_path.is_file():
+                constant_matrix = stat_file_path.load_numpy()
+            else:
+                if callable(merged):
+                    # only get data for once
+                    sampled = merged()
+                else:
+                    sampled = merged
+
+                sys_constant_matrix = []
+                for sys in range(len(sampled)):
+                    nframs = sampled[sys]["atype"].shape[0]
+
+                    if sampled[sys]["find_atomic_polarizability"] > 0.0:
+                        sys_atom_polar = compute_stats_from_atomic(
+                            sampled[sys]["atomic_polarizability"].numpy(force=True),
+                            sampled[sys]["atype"].numpy(force=True),
+                        )[0]
+                    else:
+                        if not sampled[sys]["find_polarizability"] > 0.0:
+                            continue
+                        sys_type_count = np.zeros(
+                            (nframs, self.ntypes), dtype=env.GLOBAL_NP_FLOAT_PRECISION
+                        )
+                        for itype in range(self.ntypes):
+                            type_mask = sampled[sys]["atype"] == itype
+                            sys_type_count[:, itype] = type_mask.sum(dim=1).numpy(
+                                force=True
+                            )
+
+                        sys_bias_redu = sampled[sys]["polarizability"].numpy(force=True)
+
+                        sys_atom_polar = compute_stats_from_redu(
+                            sys_bias_redu, sys_type_count, rcond=self.rcond
+                        )[0]
+                    cur_constant_matrix = np.zeros(
+                        self.ntypes, dtype=env.GLOBAL_NP_FLOAT_PRECISION
+                    )
+
+                    for itype in range(self.ntypes):
+                        cur_constant_matrix[itype] = np.mean(
+                            np.diagonal(sys_atom_polar[itype].reshape(3, 3))
+                        )
+                    sys_constant_matrix.append(cur_constant_matrix)
+                constant_matrix = np.stack(sys_constant_matrix).mean(axis=0)
+
+                # handle nan values.
+                constant_matrix = np.nan_to_num(constant_matrix)
+            if stat_file_path is not None:
+                stat_file_path.save_numpy(constant_matrix)
+            self.constant_matrix = torch.tensor(constant_matrix, device=env.DEVICE)
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        nframes, nloc, _ = descriptor.shape
+        assert (
+            gr is not None
+        ), "Must provide the rotation matrix for polarizability fitting."
+        # (nframes, nloc, _net_out_dim)
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        out = out * self.scale[atype]
+        gr = gr.view(nframes * nloc, -1, 3)  # (nframes * nloc, m1, 3)
+
+        if self.fit_diag:
+            out = out.reshape(-1, self.embedding_width)
+            out = torch.einsum("ij,ijk->ijk", out, gr)
+        else:
+            out = out.reshape(-1, self.embedding_width, self.embedding_width)
+            out = (out + out.transpose(1, 2)) / 2
+            out = torch.einsum("bim,bmj->bij", out, gr)  # (nframes * nloc, m1, 3)
+        out = torch.einsum(
+            "bim,bmj->bij", gr.transpose(1, 2), out
+        )  # (nframes * nloc, 3, 3)
+        out = out.view(nframes, nloc, 3, 3)
+        if self.shift_diag:
+            bias = self.constant_matrix[atype]
+
+            # (nframes, nloc, 1)
+            bias = bias.unsqueeze(-1) * self.scale[atype]
+
+            eye = torch.eye(3, device=env.DEVICE)
+            eye = eye.repeat(nframes, nloc, 1, 1)
+            # (nframes, nloc, 3, 3)
+            bias = bias.unsqueeze(-1) * eye
+            out = out + bias
+
+        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+
+    # make jit happy with torch 2.0.0
+    exclude_types: List[int]
diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pt/model/task/task.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pt/model/task/type_predict.py b/deepmd/pt/model/task/type_predict.py
new file mode 100644
index 0000000000..c696590043
--- /dev/null
+++ b/deepmd/pt/model/task/type_predict.py
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+)
+
+import torch
+
+from deepmd.pt.model.network.network import (
+    MaskLMHead,
+)
+from deepmd.pt.model.task import (
+    Fitting,
+)
+
+
+class TypePredictNet(Fitting):
+    def __init__(self, feature_dim, ntypes, activation_function="gelu", **kwargs):
+        """Construct a type predict net.
+
+        Args:
+        - feature_dim: Input dm.
+        - ntypes: Numer of types to predict.
+        - activation_function: Activate function.
+        """
+        super().__init__()
+        self.feature_dim = feature_dim
+        self.ntypes = ntypes
+        self.lm_head = MaskLMHead(
+            embed_dim=self.feature_dim,
+            output_dim=ntypes,
+            activation_fn=activation_function,
+            weight=None,
+        )
+
+    def forward(self, features, masked_tokens: Optional[torch.Tensor] = None):
+        """Calculate the predicted logits.
+        Args:
+        - features: Input features with shape [nframes, nloc, feature_dim].
+        - masked_tokens: Input masked tokens with shape [nframes, nloc].
+
+        Returns
+        -------
+        - logits: Predicted probs with shape [nframes, nloc, ntypes].
+        """
+        # [nframes, nloc, ntypes]
+        logits = self.lm_head(features, masked_tokens=masked_tokens)
+        return logits
diff --git a/deepmd/pt/optimizer/KFWrapper.py b/deepmd/pt/optimizer/KFWrapper.py
new file mode 100644
index 0000000000..3ab7ffe7a9
--- /dev/null
+++ b/deepmd/pt/optimizer/KFWrapper.py
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import math
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch.optim.optimizer import (
+    Optimizer,
+)
+
+
+class KFOptimizerWrapper:
+    def __init__(
+        self,
+        model: nn.Module,
+        optimizer: Optimizer,
+        atoms_selected: int,
+        atoms_per_group: int,
+        is_distributed: bool = False,
+    ) -> None:
+        self.model = model
+        self.optimizer = optimizer
+        self.atoms_selected = atoms_selected  # 24
+        self.atoms_per_group = atoms_per_group  # 6
+        self.is_distributed = is_distributed
+
+    def update_energy(
+        self, inputs: dict, Etot_label: torch.Tensor, update_prefactor: float = 1
+    ) -> None:
+        model_pred, _, _ = self.model(**inputs, inference_only=True)
+        Etot_predict = model_pred["energy"]
+        natoms_sum = int(inputs["atype"].shape[-1])
+        self.optimizer.set_grad_prefactor(natoms_sum)
+
+        self.optimizer.zero_grad()
+        bs = Etot_label.shape[0]
+        error = Etot_label - Etot_predict
+        error = error / natoms_sum
+        mask = error < 0
+
+        error = error * update_prefactor
+        error[mask] = -1 * error[mask]
+        error = error.mean()
+
+        if self.is_distributed:
+            dist.all_reduce(error)
+            error /= dist.get_world_size()
+
+        Etot_predict = update_prefactor * Etot_predict
+        Etot_predict[mask] = -Etot_predict[mask]
+
+        Etot_predict.sum().backward()
+        error = error * math.sqrt(bs)
+        self.optimizer.step(error)
+        return Etot_predict
+
+    def update_force(
+        self, inputs: dict, Force_label: torch.Tensor, update_prefactor: float = 1
+    ) -> None:
+        natoms_sum = int(inputs["atype"].shape[-1])
+        bs = Force_label.shape[0]
+        self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3)
+
+        index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum)
+
+        for i in range(index.shape[0]):
+            self.optimizer.zero_grad()
+            model_pred, _, _ = self.model(**inputs, inference_only=True)
+            Etot_predict = model_pred["energy"]
+            natoms_sum = int(inputs["atype"].shape[-1])
+            force_predict = model_pred["force"]
+            error_tmp = Force_label[:, index[i]] - force_predict[:, index[i]]
+            error_tmp = update_prefactor * error_tmp
+            mask = error_tmp < 0
+            error_tmp[mask] = -1 * error_tmp[mask]
+            error = error_tmp.mean() / natoms_sum
+
+            if self.is_distributed:
+                dist.all_reduce(error)
+                error /= dist.get_world_size()
+
+            tmp_force_predict = force_predict[:, index[i]] * update_prefactor
+            tmp_force_predict[mask] = -tmp_force_predict[mask]
+
+            # In order to solve a pytorch bug, reference: https://github.com/pytorch/pytorch/issues/43259
+            (tmp_force_predict.sum() + Etot_predict.sum() * 0).backward()
+            error = error * math.sqrt(bs)
+            self.optimizer.step(error)
+        return Etot_predict, force_predict
+
+    def update_denoise_coord(
+        self,
+        inputs: dict,
+        clean_coord: torch.Tensor,
+        update_prefactor: float = 1,
+        mask_loss_coord: bool = True,
+        coord_mask: torch.Tensor = None,
+    ) -> None:
+        natoms_sum = int(inputs["atype"].shape[-1])
+        bs = clean_coord.shape[0]
+        self.optimizer.set_grad_prefactor(natoms_sum * self.atoms_per_group * 3)
+
+        index = self.__sample(self.atoms_selected, self.atoms_per_group, natoms_sum)
+
+        for i in range(index.shape[0]):
+            self.optimizer.zero_grad()
+            model_pred, _, _ = self.model(**inputs, inference_only=True)
+            updated_coord = model_pred["updated_coord"]
+            natoms_sum = int(inputs["atype"].shape[-1])
+            error_tmp = clean_coord[:, index[i]] - updated_coord[:, index[i]]
+            error_tmp = update_prefactor * error_tmp
+            if mask_loss_coord:
+                error_tmp[~coord_mask[:, index[i]]] = 0
+            mask = error_tmp < 0
+            error_tmp[mask] = -1 * error_tmp[mask]
+            error = error_tmp.mean() / natoms_sum
+
+            if self.is_distributed:
+                dist.all_reduce(error)
+                error /= dist.get_world_size()
+
+            tmp_coord_predict = updated_coord[:, index[i]] * update_prefactor
+            tmp_coord_predict[mask] = -update_prefactor * tmp_coord_predict[mask]
+
+            # In order to solve a pytorch bug, reference: https://github.com/pytorch/pytorch/issues/43259
+            (tmp_coord_predict.sum() + updated_coord.sum() * 0).backward()
+            error = error * math.sqrt(bs)
+            self.optimizer.step(error)
+        return model_pred
+
+    def __sample(
+        self, atoms_selected: int, atoms_per_group: int, natoms: int
+    ) -> np.ndarray:
+        if atoms_selected % atoms_per_group:
+            raise Exception("divider")
+        index = range(natoms)
+        rng = np.random.default_rng()
+        res = rng.choice(index, atoms_selected).reshape(-1, atoms_per_group)
+        return res
+
+
+# with torch.autograd.profiler.profile(enabled=True, use_cuda=True, record_shapes=False) as prof:
+#     the code u wanna profile
+# print(prof.key_averages().table(sort_by="self_cpu_time_total"))
diff --git a/deepmd/pt/optimizer/LKF.py b/deepmd/pt/optimizer/LKF.py
new file mode 100644
index 0000000000..06b341d987
--- /dev/null
+++ b/deepmd/pt/optimizer/LKF.py
@@ -0,0 +1,320 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import math
+
+import torch
+import torch.distributed as dist
+from torch.optim.optimizer import (
+    Optimizer,
+)
+
+
+def distribute_indices(total_length, num_workers):
+    indices_per_worker = total_length // num_workers
+    remainder = total_length % num_workers
+
+    indices = []
+    start = 0
+
+    for i in range(num_workers):
+        end = start + indices_per_worker + (1 if i < remainder else 0)
+        indices.append((start, end))
+        start = end
+
+    return indices, remainder
+
+
+class LKFOptimizer(Optimizer):
+    def __init__(
+        self,
+        params,
+        kalman_lambda=0.98,
+        kalman_nue=0.9987,
+        block_size=5120,
+    ):
+        defaults = {"lr": 0.1, "kalman_nue": kalman_nue, "block_size": block_size}
+
+        super().__init__(params, defaults)
+
+        self._params = self.param_groups[0]["params"]
+
+        if len(self.param_groups) != 1 or len(self._params) == 0:
+            raise ValueError(
+                "LKF doesn't support per-parameter options " "(parameter groups)"
+            )
+
+        # NOTE: LKF has only global state, but we register it as state for
+        # the first param, because this helps with casting in load_state_dict
+        self._state = self.state[self._params[0]]
+        self._state.setdefault("kalman_lambda", kalman_lambda)
+        self.dist_init = dist.is_initialized()
+        self.rank = dist.get_rank() if self.dist_init else 0
+        self.dindex = []
+        self.remainder = 0
+        self.__init_P()
+
+    def __init_P(self):
+        param_nums = []
+        param_sum = 0
+        block_size = self.__get_blocksize()
+        data_type = self._params[0].dtype
+        device = self._params[0].device
+
+        for param_group in self.param_groups:
+            params = param_group["params"]
+            for param in params:
+                param_num = param.data.nelement()
+                if param_sum + param_num > block_size:
+                    if param_sum > 0:
+                        param_nums.append(param_sum)
+                    param_sum = param_num
+                else:
+                    param_sum += param_num
+
+        param_nums.append(param_sum)
+
+        P = []
+        params_packed_index = []
+        logging.info("LKF parameter nums: %s" % param_nums)
+        if self.dist_init:
+            block_num = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num += math.ceil(param_num / block_size)
+                else:
+                    block_num += 1
+            num_workers = dist.get_world_size()
+            self.dindex, self.remainder = distribute_indices(block_num, num_workers)
+            index = 0
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        device_id = self.get_device_id(index)
+                        index += 1
+                        dist_device = torch.device("cuda:" + str(device_id))
+                        if i != block_num - 1:
+                            params_packed_index.append(block_size)
+                            if self.rank == device_id:
+                                P.append(
+                                    torch.eye(
+                                        block_size,
+                                        dtype=data_type,
+                                        device=dist_device,
+                                    )
+                                )
+                            else:
+                                continue
+                        else:
+                            params_packed_index.append(param_num - block_size * i)
+                            if self.rank == device_id:
+                                P.append(
+                                    torch.eye(
+                                        param_num - block_size * i,
+                                        dtype=data_type,
+                                        device=dist_device,
+                                    )
+                                )
+                            else:
+                                continue
+
+                else:
+                    device_id = self.get_device_id(index)
+                    index += 1
+                    params_packed_index.append(param_num)
+                    if self.rank == device_id:
+                        dist_device = torch.device("cuda:" + str(device_id))
+                        P.append(
+                            torch.eye(param_num, dtype=data_type, device=dist_device)
+                        )
+        else:
+            for param_num in param_nums:
+                if param_num >= block_size:
+                    block_num = math.ceil(param_num / block_size)
+                    for i in range(block_num):
+                        if i != block_num - 1:
+                            P.append(
+                                torch.eye(
+                                    block_size,
+                                    dtype=data_type,
+                                    device=device,
+                                )
+                            )
+                            params_packed_index.append(block_size)
+                        else:
+                            P.append(
+                                torch.eye(
+                                    param_num - block_size * i,
+                                    dtype=data_type,
+                                    device=device,
+                                )
+                            )
+                            params_packed_index.append(param_num - block_size * i)
+                else:
+                    P.append(torch.eye(param_num, dtype=data_type, device=device))
+                    params_packed_index.append(param_num)
+
+        self._state.setdefault("P", P)
+        self._state.setdefault("weights_num", len(P))
+        self._state.setdefault("params_packed_index", params_packed_index)
+
+    def __get_blocksize(self):
+        return self.param_groups[0]["block_size"]
+
+    def __get_nue(self):
+        return self.param_groups[0]["kalman_nue"]
+
+    def __split_weights(self, weight):
+        block_size = self.__get_blocksize()
+        param_num = weight.nelement()
+        res = []
+        if param_num < block_size:
+            res.append(weight)
+        else:
+            block_num = math.ceil(param_num / block_size)
+            for i in range(block_num):
+                if i != block_num - 1:
+                    res.append(weight[i * block_size : (i + 1) * block_size])
+                else:
+                    res.append(weight[i * block_size :])
+        return res
+
+    def __update(self, H, error, weights):
+        P = self._state.get("P")
+        kalman_lambda = self._state.get("kalman_lambda")
+        weights_num = self._state.get("weights_num")
+        params_packed_index = self._state.get("params_packed_index")
+
+        block_size = self.__get_blocksize()
+        kalman_nue = self.__get_nue()
+
+        tmp = 0
+        for i in range(weights_num):
+            tmp = tmp + (kalman_lambda + torch.matmul(torch.matmul(H[i].T, P[i]), H[i]))
+        if self.dist_init:
+            dist.all_reduce(tmp, op=dist.ReduceOp.SUM)
+        A = 1 / tmp
+        for i in range(weights_num):
+            K = torch.matmul(P[i], H[i])
+
+            weights[i] = weights[i] + A * error * K
+
+            P[i] = (1 / kalman_lambda) * (P[i] - A * torch.matmul(K, K.T))
+        if self.dist_init:
+            device = torch.device("cuda:" + str(self.rank))
+            local_shape = [tensor.shape[0] for tensor in weights]
+            shape_list = [
+                torch.zeros_like(torch.empty(1), dtype=torch.float64, device=device)
+                for _ in range(dist.get_world_size())
+            ]
+            dist.all_gather_object(shape_list, local_shape)
+            weight_tensor = torch.cat(weights)
+            world_shape = [sum(inner_list) for inner_list in shape_list]
+            weight_list = [None] * len(world_shape)
+            for i in range(len(world_shape)):
+                weight_list[i] = torch.zeros(
+                    world_shape[i], dtype=torch.float64, device=device
+                )
+            dist.all_gather(weight_list, weight_tensor)
+            result = []
+            for i in range(dist.get_world_size()):
+                result = result + list(torch.split(weight_list[i], shape_list[i]))
+            weights = result
+        kalman_lambda = kalman_nue * kalman_lambda + 1 - kalman_nue
+        self._state.update({"kalman_lambda": kalman_lambda})
+
+        i = 0
+        param_sum = 0
+        for param_group in self.param_groups:
+            params = param_group["params"]
+            for param in params:
+                param_num = param.nelement()
+                weight_tmp = weights[i][param_sum : param_sum + param_num]
+                if param_num < block_size:
+                    if param.ndim > 1:
+                        param.data = weight_tmp.reshape(
+                            param.data.T.shape
+                        ).T.contiguous()
+                    else:
+                        param.data = weight_tmp.reshape(param.data.shape)
+
+                    param_sum += param_num
+
+                    if param_sum == params_packed_index[i]:
+                        i += 1
+                        param_sum = 0
+                else:
+                    block_num = math.ceil(param_num / block_size)
+                    for j in range(block_num):
+                        if j == 0:
+                            tmp_weight = weights[i]
+                        else:
+                            tmp_weight = torch.concat([tmp_weight, weights[i]], dim=0)
+                        i += 1
+                    param.data = tmp_weight.reshape(param.data.T.shape).T.contiguous()
+
+    def set_grad_prefactor(self, grad_prefactor):
+        self.grad_prefactor = grad_prefactor
+
+    def step(self, error):
+        params_packed_index = self._state.get("params_packed_index")
+
+        weights = []
+        H = []
+        param_index = 0
+        param_sum = 0
+
+        for param in self._params:
+            if param.ndim > 1:
+                tmp = param.data.T.contiguous().reshape(param.data.nelement(), 1)
+                if param.grad is None:
+                    tmp_grad = torch.zeros_like(tmp)
+                else:
+                    tmp_grad = (
+                        (param.grad / self.grad_prefactor)
+                        .T.contiguous()
+                        .reshape(param.grad.nelement(), 1)
+                    )
+            else:
+                tmp = param.data.reshape(param.data.nelement(), 1)
+                if param.grad is None:
+                    tmp_grad = torch.zeros_like(tmp)
+                else:
+                    tmp_grad = (param.grad / self.grad_prefactor).reshape(
+                        param.grad.nelement(), 1
+                    )
+
+            tmp = self.__split_weights(tmp)
+            tmp_grad = self.__split_weights(tmp_grad)
+
+            for split_grad, split_weight in zip(tmp_grad, tmp):
+                nelement = split_grad.nelement()
+
+                if param_sum == 0:
+                    res_grad = split_grad
+                    res = split_weight
+                else:
+                    res_grad = torch.concat((res_grad, split_grad), dim=0)
+                    res = torch.concat((res, split_weight), dim=0)
+
+                param_sum += nelement
+
+                if param_sum == params_packed_index[param_index]:
+                    param_sum = 0
+                    if self.dist_init:
+                        device_id = self.get_device_id(param_index)
+                        if self.rank == device_id:
+                            weights.append(res)
+                            H.append(res_grad)
+                    else:
+                        weights.append(res)
+                        H.append(res_grad)
+                    param_index += 1
+
+        self.__update(H, error, weights)
+
+    def get_device_id(self, index):
+        for i, (start, end) in enumerate(self.dindex):
+            if start <= index < end:
+                return i
+        return None
diff --git a/deepmd/pt/optimizer/__init__.py b/deepmd/pt/optimizer/__init__.py
new file mode 100644
index 0000000000..db340b3bb9
--- /dev/null
+++ b/deepmd/pt/optimizer/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from .KFWrapper import (
+    KFOptimizerWrapper,
+)
+from .LKF import (
+    LKFOptimizer,
+)
+
+__all__ = ["KFOptimizerWrapper", "LKFOptimizer"]
diff --git a/deepmd/pt/train/__init__.py b/deepmd/pt/train/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/pt/train/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
new file mode 100644
index 0000000000..ff1c350f47
--- /dev/null
+++ b/deepmd/pt/train/training.py
@@ -0,0 +1,1150 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
+import logging
+import time
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+    Dict,
+)
+
+import numpy as np
+import torch
+
+from deepmd.common import (
+    symlink_prefix_files,
+)
+from deepmd.loggers.training import (
+    format_training_message,
+    format_training_message_per_task,
+)
+from deepmd.pt.loss import (
+    DenoiseLoss,
+    DOSLoss,
+    EnergySpinLoss,
+    EnergyStdLoss,
+    TensorLoss,
+)
+from deepmd.pt.model.model import (
+    EnergyModel,
+    get_model,
+    get_zbl_model,
+)
+from deepmd.pt.optimizer import (
+    KFOptimizerWrapper,
+    LKFOptimizer,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt.utils import (
+    dp_random,
+)
+from deepmd.pt.utils.dataloader import (
+    BufferedIterator,
+    get_weighted_sampler,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+    JIT,
+    LOCAL_RANK,
+    NUM_WORKERS,
+    SAMPLER_RECORD,
+)
+from deepmd.pt.utils.learning_rate import (
+    LearningRateExp,
+)
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+if torch.__version__.startswith("2"):
+    import torch._dynamo
+
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import (
+    DataLoader,
+)
+
+from deepmd.utils.path import (
+    DPH5Path,
+)
+
+log = logging.getLogger(__name__)
+
+
+class Trainer:
+    def __init__(
+        self,
+        config: Dict[str, Any],
+        training_data,
+        stat_file_path=None,
+        validation_data=None,
+        init_model=None,
+        restart_model=None,
+        finetune_model=None,
+        force_load=False,
+        shared_links=None,
+        finetune_links=None,
+        init_frz_model=None,
+    ):
+        """Construct a DeePMD trainer.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        if init_model is not None:
+            resume_model = init_model
+        elif restart_model is not None:
+            resume_model = restart_model
+        elif finetune_model is not None:
+            resume_model = finetune_model
+        else:
+            resume_model = None
+        resuming = resume_model is not None
+        self.restart_training = restart_model is not None
+        model_params = config["model"]
+        training_params = config["training"]
+        self.multi_task = "model_dict" in model_params
+        self.finetune_links = finetune_links
+        self.model_keys = (
+            list(model_params["model_dict"]) if self.multi_task else ["Default"]
+        )
+        self.rank = dist.get_rank() if dist.is_initialized() else 0
+        self.world_size = dist.get_world_size() if dist.is_initialized() else 1
+        self.num_model = len(self.model_keys)
+
+        # Iteration config
+        self.num_steps = training_params["numb_steps"]
+        self.disp_file = training_params.get("disp_file", "lcurve.out")
+        self.disp_freq = training_params.get("disp_freq", 1000)
+        self.save_ckpt = training_params.get("save_ckpt", "model.ckpt")
+        self.save_freq = training_params.get("save_freq", 1000)
+        self.max_ckpt_keep = training_params.get("max_ckpt_keep", 5)
+        self.lcurve_should_print_header = True
+
+        def get_opt_param(params):
+            opt_type = params.get("opt_type", "Adam")
+            opt_param = {
+                "kf_blocksize": params.get("kf_blocksize", 5120),
+                "kf_start_pref_e": params.get("kf_start_pref_e", 1),
+                "kf_limit_pref_e": params.get("kf_limit_pref_e", 1),
+                "kf_start_pref_f": params.get("kf_start_pref_f", 1),
+                "kf_limit_pref_f": params.get("kf_limit_pref_f", 1),
+            }
+            return opt_type, opt_param
+
+        def get_data_loader(_training_data, _validation_data, _training_params):
+            def get_dataloader_and_buffer(_data, _params):
+                if "auto_prob" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data, _params["training_data"]["auto_prob"]
+                    )
+                elif "sys_probs" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data,
+                        _params["training_data"]["sys_probs"],
+                        sys_prob=True,
+                    )
+                else:
+                    _sampler = get_weighted_sampler(_data, "prob_sys_size")
+
+                if _sampler is None:
+                    log.warning(
+                        "Sampler not specified!"
+                    )  # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration.
+                _dataloader = DataLoader(
+                    _data,
+                    sampler=_sampler,
+                    batch_size=None,
+                    num_workers=NUM_WORKERS,  # setting to 0 diverges the behavior of its iterator; should be >=1
+                    drop_last=False,
+                    pin_memory=True,
+                )
+                with torch.device("cpu"):
+                    _data_buffered = BufferedIterator(iter(_dataloader))
+                return _dataloader, _data_buffered
+
+            training_dataloader, training_data_buffered = get_dataloader_and_buffer(
+                _training_data, _training_params
+            )
+
+            if _validation_data is not None:
+                (
+                    validation_dataloader,
+                    validation_data_buffered,
+                ) = get_dataloader_and_buffer(_validation_data, _training_params)
+                valid_numb_batch = _training_params["validation_data"].get(
+                    "numb_btch", 1
+                )
+            else:
+                validation_dataloader = None
+                validation_data_buffered = None
+                valid_numb_batch = 1
+            return (
+                training_dataloader,
+                training_data_buffered,
+                validation_dataloader,
+                validation_data_buffered,
+                valid_numb_batch,
+            )
+
+        def single_model_stat(
+            _model,
+            _data_stat_nbatch,
+            _training_data,
+            _validation_data,
+            _stat_file_path,
+            _data_requirement,
+        ):
+            if _model.get_dim_fparam() > 0:
+                fparam_requirement_items = [
+                    DataRequirementItem(
+                        "fparam", _model.get_dim_fparam(), atomic=False, must=True
+                    )
+                ]
+                _data_requirement += fparam_requirement_items
+            if _model.get_dim_aparam() > 0:
+                aparam_requirement_items = [
+                    DataRequirementItem(
+                        "aparam", _model.get_dim_aparam(), atomic=True, must=True
+                    )
+                ]
+                _data_requirement += aparam_requirement_items
+            has_spin = getattr(_model, "has_spin", False)
+            if callable(has_spin):
+                has_spin = has_spin()
+            if has_spin:
+                spin_requirement_items = [
+                    DataRequirementItem("spin", ndof=3, atomic=True, must=True)
+                ]
+                _data_requirement += spin_requirement_items
+            _training_data.add_data_requirement(_data_requirement)
+            if _validation_data is not None:
+                _validation_data.add_data_requirement(_data_requirement)
+
+            @functools.lru_cache
+            def get_sample():
+                sampled = make_stat_input(
+                    _training_data.systems,
+                    _training_data.dataloaders,
+                    _data_stat_nbatch,
+                )
+                return sampled
+
+            if not resuming and self.rank == 0:
+                _model.compute_or_load_stat(
+                    sampled_func=get_sample,
+                    stat_file_path=_stat_file_path,
+                )
+                if isinstance(_stat_file_path, DPH5Path):
+                    _stat_file_path.root.close()
+            return get_sample
+
+        def get_single_model(
+            _model_params,
+        ):
+            if "use_srtab" in _model_params:
+                model = get_zbl_model(deepcopy(_model_params)).to(DEVICE)
+            else:
+                model = get_model(deepcopy(_model_params)).to(DEVICE)
+            return model
+
+        def get_lr(lr_params):
+            assert (
+                lr_params.get("type", "exp") == "exp"
+            ), "Only learning rate `exp` is supported!"
+            lr_params["stop_steps"] = self.num_steps - self.warmup_steps
+            lr_exp = LearningRateExp(**lr_params)
+            return lr_exp
+
+        def get_loss(loss_params, start_lr, _ntypes, _model):
+            loss_type = loss_params.get("type", "ener")
+            if loss_type == "ener":
+                loss_params["starter_learning_rate"] = start_lr
+                return EnergyStdLoss(**loss_params)
+            elif loss_type == "dos":
+                loss_params["starter_learning_rate"] = start_lr
+                loss_params["numb_dos"] = _model.model_output_def()["dos"].output_size
+                return DOSLoss(**loss_params)
+            elif loss_type == "ener_spin":
+                loss_params["starter_learning_rate"] = start_lr
+                return EnergySpinLoss(**loss_params)
+            elif loss_type == "denoise":
+                loss_params["ntypes"] = _ntypes
+                return DenoiseLoss(**loss_params)
+            elif loss_type == "tensor":
+                model_output_type = _model.model_output_type()
+                if "mask" in model_output_type:
+                    model_output_type.pop(model_output_type.index("mask"))
+                tensor_name = model_output_type[0]
+                loss_params["tensor_name"] = tensor_name
+                loss_params["tensor_size"] = _model.model_output_def()[
+                    tensor_name
+                ].output_size
+                label_name = tensor_name
+                if label_name == "polar":
+                    label_name = "polarizability"
+                loss_params["label_name"] = label_name
+                return TensorLoss(**loss_params)
+            else:
+                raise NotImplementedError
+
+        # Optimizer
+        if self.multi_task and training_params.get("optim_dict", None) is not None:
+            self.optim_dict = training_params.get("optim_dict")
+            missing_keys = [
+                key for key in self.model_keys if key not in self.optim_dict
+            ]
+            assert (
+                not missing_keys
+            ), f"These keys are not in optim_dict: {missing_keys}!"
+            self.opt_type = {}
+            self.opt_param = {}
+            for model_key in self.model_keys:
+                self.opt_type[model_key], self.opt_param[model_key] = get_opt_param(
+                    self.optim_dict[model_key]
+                )
+        else:
+            self.opt_type, self.opt_param = get_opt_param(training_params)
+
+        # Model
+        dp_random.seed(training_params["seed"])
+        if not self.multi_task:
+            self.model = get_single_model(
+                model_params,
+            )
+        else:
+            self.model = {}
+            for model_key in self.model_keys:
+                self.model[model_key] = get_single_model(
+                    model_params["model_dict"][model_key],
+                )
+
+        # Loss
+        if not self.multi_task:
+            self.loss = get_loss(
+                config["loss"],
+                config["learning_rate"]["start_lr"],
+                len(model_params["type_map"]),
+                self.model,
+            )
+        else:
+            self.loss = {}
+            for model_key in self.model_keys:
+                loss_param = config["loss_dict"][model_key]
+                if config.get("learning_rate_dict", None) is not None:
+                    lr_param = config["learning_rate_dict"][model_key]["start_lr"]
+                else:
+                    lr_param = config["learning_rate"]["start_lr"]
+                ntypes = len(model_params["model_dict"][model_key]["type_map"])
+                self.loss[model_key] = get_loss(
+                    loss_param, lr_param, ntypes, self.model[model_key]
+                )
+
+        # Data
+        dp_random.seed(training_params["seed"])
+        if not self.multi_task:
+            self.get_sample_func = single_model_stat(
+                self.model,
+                model_params.get("data_stat_nbatch", 10),
+                training_data,
+                validation_data,
+                stat_file_path,
+                self.loss.label_requirement,
+            )
+            (
+                self.training_dataloader,
+                self.training_data,
+                self.validation_dataloader,
+                self.validation_data,
+                self.valid_numb_batch,
+            ) = get_data_loader(training_data, validation_data, training_params)
+            training_data.print_summary(
+                "training", to_numpy_array(self.training_dataloader.sampler.weights)
+            )
+            if validation_data is not None:
+                validation_data.print_summary(
+                    "validation",
+                    to_numpy_array(self.validation_dataloader.sampler.weights),
+                )
+        else:
+            (
+                self.training_dataloader,
+                self.training_data,
+                self.validation_dataloader,
+                self.validation_data,
+                self.valid_numb_batch,
+                self.get_sample_func,
+            ) = {}, {}, {}, {}, {}, {}
+            for model_key in self.model_keys:
+                self.get_sample_func[model_key] = single_model_stat(
+                    self.model[model_key],
+                    model_params["model_dict"][model_key].get("data_stat_nbatch", 10),
+                    training_data[model_key],
+                    validation_data[model_key],
+                    stat_file_path[model_key],
+                    self.loss[model_key].label_requirement,
+                )
+                (
+                    self.training_dataloader[model_key],
+                    self.training_data[model_key],
+                    self.validation_dataloader[model_key],
+                    self.validation_data[model_key],
+                    self.valid_numb_batch[model_key],
+                ) = get_data_loader(
+                    training_data[model_key],
+                    validation_data[model_key],
+                    training_params["data_dict"][model_key],
+                )
+
+                training_data[model_key].print_summary(
+                    f"training in {model_key}",
+                    to_numpy_array(self.training_dataloader[model_key].sampler.weights),
+                )
+                if (
+                    validation_data is not None
+                    and validation_data[model_key] is not None
+                ):
+                    validation_data[model_key].print_summary(
+                        f"validation in {model_key}",
+                        to_numpy_array(
+                            self.validation_dataloader[model_key].sampler.weights
+                        ),
+                    )
+
+        # Learning rate
+        self.warmup_steps = training_params.get("warmup_steps", 0)
+        self.gradient_max_norm = training_params.get("gradient_max_norm", 0.0)
+        assert (
+            self.num_steps - self.warmup_steps > 0 or self.warmup_steps == 0
+        ), "Warm up steps must be less than total training steps!"
+        if self.multi_task and config.get("learning_rate_dict", None) is not None:
+            self.lr_exp = {}
+            for model_key in self.model_keys:
+                self.lr_exp[model_key] = get_lr(config["learning_rate_dict"][model_key])
+        else:
+            self.lr_exp = get_lr(config["learning_rate"])
+
+        # JIT
+        if JIT:
+            self.model = torch.jit.script(self.model)
+
+        # Model Wrapper
+        self.wrapper = ModelWrapper(self.model, self.loss, model_params=model_params)
+        self.start_step = 0
+
+        # resuming and finetune
+        optimizer_state_dict = None
+        if resuming:
+            ntest = model_params.get("data_bias_nsample", 1)
+            origin_model = (
+                finetune_model if finetune_model is not None else resume_model
+            )
+            log.info(f"Resuming from {origin_model}.")
+            state_dict = torch.load(origin_model, map_location=DEVICE)
+            if "model" in state_dict:
+                optimizer_state_dict = (
+                    state_dict["optimizer"] if finetune_model is None else None
+                )
+                state_dict = state_dict["model"]
+            self.start_step = (
+                state_dict["_extra_state"]["train_infos"]["step"]
+                if self.restart_training
+                else 0
+            )
+            if self.rank == 0:
+                if force_load:
+                    input_keys = list(state_dict.keys())
+                    target_keys = list(self.wrapper.state_dict().keys())
+                    missing_keys = [
+                        item for item in target_keys if item not in input_keys
+                    ]
+                    if missing_keys:
+                        target_state_dict = self.wrapper.state_dict()
+                        slim_keys = []
+                        for item in missing_keys:
+                            state_dict[item] = target_state_dict[item].clone().detach()
+                            new_key = True
+                            for slim_key in slim_keys:
+                                if slim_key in item:
+                                    new_key = False
+                                    break
+                            if new_key:
+                                tmp_keys = ".".join(item.split(".")[:3])
+                                slim_keys.append(tmp_keys)
+                        slim_keys = [i + ".*" for i in slim_keys]
+                        log.warning(
+                            f"Force load mode allowed! These keys are not in ckpt and will re-init: {slim_keys}"
+                        )
+
+                if finetune_model is not None:
+                    new_state_dict = {}
+                    target_state_dict = self.wrapper.state_dict()
+
+                    def update_single_finetune_params(
+                        _model_key,
+                        _model_key_from,
+                        _new_state_dict,
+                        _origin_state_dict,
+                        _random_state_dict,
+                        _new_fitting=False,
+                    ):
+                        target_keys = [
+                            i
+                            for i in _random_state_dict.keys()
+                            if i != "_extra_state" and f".{_model_key}." in i
+                        ]
+                        for item_key in target_keys:
+                            if _new_fitting and ".fitting_net." in item_key:
+                                # print(f'Keep {item_key} in old model!')
+                                _new_state_dict[item_key] = (
+                                    _random_state_dict[item_key].clone().detach()
+                                )
+                            else:
+                                new_key = item_key.replace(
+                                    f".{_model_key}.", f".{_model_key_from}."
+                                )
+                                # print(f'Replace {item_key} with {new_key} in pretrained_model!')
+                                _new_state_dict[item_key] = (
+                                    _origin_state_dict[new_key].clone().detach()
+                                )
+
+                    if not self.multi_task:
+                        model_key = "Default"
+                        model_key_from = self.finetune_links[model_key]
+                        new_fitting = model_params.pop("new_fitting", False)
+                        update_single_finetune_params(
+                            model_key,
+                            model_key_from,
+                            new_state_dict,
+                            state_dict,
+                            target_state_dict,
+                            _new_fitting=new_fitting,
+                        )
+                    else:
+                        for model_key in self.model_keys:
+                            if model_key in self.finetune_links:
+                                model_key_from = self.finetune_links[model_key]
+                                new_fitting = model_params["model_dict"][model_key].pop(
+                                    "new_fitting", False
+                                )
+                            else:
+                                model_key_from = model_key
+                                new_fitting = False
+                            update_single_finetune_params(
+                                model_key,
+                                model_key_from,
+                                new_state_dict,
+                                state_dict,
+                                target_state_dict,
+                                _new_fitting=new_fitting,
+                            )
+                    state_dict = new_state_dict
+                    state_dict["_extra_state"] = self.wrapper.state_dict()[
+                        "_extra_state"
+                    ]
+                self.wrapper.load_state_dict(state_dict)
+
+                if finetune_model is not None:
+
+                    def single_model_finetune(
+                        _model,
+                        _model_params,
+                        _sample_func,
+                    ):
+                        old_type_map, new_type_map = (
+                            _model_params["type_map"],
+                            _model_params["new_type_map"],
+                        )
+                        if isinstance(_model, EnergyModel):
+                            _model.change_out_bias(
+                                _sample_func,
+                                bias_adjust_mode=_model_params.get(
+                                    "bias_adjust_mode", "change-by-statistic"
+                                ),
+                                origin_type_map=new_type_map,
+                                full_type_map=old_type_map,
+                            )
+                        else:
+                            # need to updated
+                            pass
+
+                    # finetune
+                    if not self.multi_task:
+                        single_model_finetune(
+                            self.model, model_params, self.get_sample_func
+                        )
+                    else:
+                        for model_key in self.model_keys:
+                            if model_key in self.finetune_links:
+                                log.info(
+                                    f"Model branch {model_key} will be fine-tuned. This may take a long time..."
+                                )
+                                single_model_finetune(
+                                    self.model[model_key],
+                                    model_params["model_dict"][model_key],
+                                    self.get_sample_func[model_key],
+                                )
+                            else:
+                                log.info(
+                                    f"Model branch {model_key} will resume training."
+                                )
+
+        if init_frz_model is not None:
+            frz_model = torch.jit.load(init_frz_model, map_location=DEVICE)
+            self.model.load_state_dict(frz_model.state_dict())
+
+        # Multi-task share params
+        if shared_links is not None:
+            self.wrapper.share_params(shared_links, resume=resuming or self.rank != 0)
+
+        if dist.is_initialized():
+            torch.cuda.set_device(LOCAL_RANK)
+            # DDP will guarantee the model parameters are identical across all processes
+            self.wrapper = DDP(
+                self.wrapper,
+                device_ids=[LOCAL_RANK],
+                find_unused_parameters=True,
+                output_device=LOCAL_RANK,
+            )
+
+        # TODO add lr warmups for multitask
+        # author: iProzd
+        def warm_up_linear(step, warmup_steps):
+            if step < warmup_steps:
+                return step / warmup_steps
+            else:
+                return self.lr_exp.value(step - warmup_steps) / self.lr_exp.start_lr
+
+        # TODO add optimizers for multitask
+        # author: iProzd
+        if self.opt_type == "Adam":
+            self.optimizer = torch.optim.Adam(
+                self.wrapper.parameters(), lr=self.lr_exp.start_lr
+            )
+            if optimizer_state_dict is not None and self.restart_training:
+                self.optimizer.load_state_dict(optimizer_state_dict)
+            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
+                self.optimizer,
+                lambda step: warm_up_linear(step + self.start_step, self.warmup_steps),
+            )
+        elif self.opt_type == "LKF":
+            self.optimizer = LKFOptimizer(
+                self.wrapper.parameters(), 0.98, 0.99870, self.opt_param["kf_blocksize"]
+            )
+        else:
+            raise ValueError("Not supported optimizer type '%s'" % self.opt_type)
+
+        # Get model prob for multi-task
+        if self.multi_task:
+            self.model_prob = np.array([0.0 for key in self.model_keys])
+            if training_params.get("model_prob", None) is not None:
+                model_prob = training_params["model_prob"]
+                for ii, model_key in enumerate(self.model_keys):
+                    if model_key in model_prob:
+                        self.model_prob[ii] += float(model_prob[model_key])
+            else:
+                for ii, model_key in enumerate(self.model_keys):
+                    self.model_prob[ii] += float(len(self.training_data[model_key]))
+            sum_prob = np.sum(self.model_prob)
+            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
+            self.model_prob = self.model_prob / sum_prob
+
+        # Tensorboard
+        self.enable_tensorboard = training_params.get("tensorboard", False)
+        self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log")
+        self.tensorboard_freq = training_params.get("tensorboard_freq", 1)
+        self.enable_profiler = training_params.get("enable_profiler", False)
+
+    def run(self):
+        fout = (
+            open(self.disp_file, mode="w", buffering=1) if self.rank == 0 else None
+        )  # line buffered
+        if SAMPLER_RECORD:
+            record_file = f"Sample_rank_{self.rank}.txt"
+            fout1 = open(record_file, mode="w", buffering=1)
+        log.info("Start to train %d steps.", self.num_steps)
+        if dist.is_initialized():
+            log.info(f"Rank: {dist.get_rank()}/{dist.get_world_size()}")
+        if self.enable_tensorboard:
+            from torch.utils.tensorboard import (
+                SummaryWriter,
+            )
+
+            writer = SummaryWriter(log_dir=self.tensorboard_log_dir)
+        if self.enable_profiler:
+            prof = torch.profiler.profile(
+                schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=1),
+                on_trace_ready=torch.profiler.tensorboard_trace_handler(
+                    self.tensorboard_log_dir
+                ),
+                record_shapes=True,
+                with_stack=True,
+            )
+            prof.start()
+
+        def step(_step_id, task_key="Default"):
+            # PyTorch Profiler
+            if self.enable_profiler:
+                prof.step()
+            self.wrapper.train()
+            if isinstance(self.lr_exp, dict):
+                _lr = self.lr_exp[task_key]
+            else:
+                _lr = self.lr_exp
+            cur_lr = _lr.value(_step_id)
+            pref_lr = cur_lr
+            self.optimizer.zero_grad(set_to_none=True)
+            input_dict, label_dict, log_dict = self.get_data(
+                is_train=True, task_key=task_key
+            )
+            if SAMPLER_RECORD:
+                print_str = f"Step {_step_id}: sample system{log_dict['sid']}  frame{log_dict['fid']}\n"
+                fout1.write(print_str)
+                fout1.flush()
+            if self.opt_type == "Adam":
+                cur_lr = self.scheduler.get_last_lr()[0]
+                if _step_id < self.warmup_steps:
+                    pref_lr = _lr.start_lr
+                else:
+                    pref_lr = cur_lr
+                model_pred, loss, more_loss = self.wrapper(
+                    **input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
+                )
+                loss.backward()
+                if self.gradient_max_norm > 0.0:
+                    grad_norm = torch.nn.utils.clip_grad_norm_(
+                        self.wrapper.parameters(), self.gradient_max_norm
+                    )
+                    if not torch.isfinite(grad_norm).all():
+                        # check local gradnorm single GPU case, trigger NanDetector
+                        raise FloatingPointError("gradients are Nan/Inf")
+                with torch.device("cpu"):
+                    self.optimizer.step()
+                self.scheduler.step()
+            elif self.opt_type == "LKF":
+                if isinstance(self.loss, EnergyStdLoss):
+                    KFOptWrapper = KFOptimizerWrapper(
+                        self.wrapper, self.optimizer, 24, 6, dist.is_initialized()
+                    )
+                    pref_e = self.opt_param["kf_start_pref_e"] * (
+                        self.opt_param["kf_limit_pref_e"]
+                        / self.opt_param["kf_start_pref_e"]
+                    ) ** (_step_id / self.num_steps)
+                    _ = KFOptWrapper.update_energy(
+                        input_dict, label_dict["energy"], pref_e
+                    )
+                    pref_f = self.opt_param["kf_start_pref_f"] * (
+                        self.opt_param["kf_limit_pref_f"]
+                        / self.opt_param["kf_start_pref_f"]
+                    ) ** (_step_id / self.num_steps)
+                    p_energy, p_force = KFOptWrapper.update_force(
+                        input_dict, label_dict["force"], pref_f
+                    )
+                    # [coord, atype, natoms, mapping, shift, nlist, box]
+                    model_pred = {"energy": p_energy, "force": p_force}
+                    module = (
+                        self.wrapper.module if dist.is_initialized() else self.wrapper
+                    )
+
+                    def fake_model():
+                        return model_pred
+
+                    _, loss, more_loss = module.loss[task_key](
+                        {},
+                        fake_model,
+                        label_dict,
+                        int(input_dict["atype"].shape[-1]),
+                        learning_rate=pref_lr,
+                    )
+                elif isinstance(self.loss, DenoiseLoss):
+                    KFOptWrapper = KFOptimizerWrapper(
+                        self.wrapper, self.optimizer, 24, 6, dist.is_initialized()
+                    )
+                    module = (
+                        self.wrapper.module if dist.is_initialized() else self.wrapper
+                    )
+                    model_pred = KFOptWrapper.update_denoise_coord(
+                        input_dict,
+                        label_dict["clean_coord"],
+                        1,
+                        module.loss[task_key].mask_loss_coord,
+                        label_dict["coord_mask"],
+                    )
+                    loss, more_loss = module.loss[task_key](
+                        model_pred,
+                        label_dict,
+                        input_dict["natoms"],
+                        learning_rate=pref_lr,
+                    )
+            else:
+                raise ValueError("Not supported optimizer type '%s'" % self.opt_type)
+
+            # Log and persist
+            if _step_id % self.disp_freq == 0:
+                self.wrapper.eval()
+
+                def log_loss_train(_loss, _more_loss, _task_key="Default"):
+                    results = {}
+                    rmse_val = {
+                        item: _more_loss[item]
+                        for item in _more_loss
+                        if "l2_" not in item
+                    }
+                    for item in sorted(rmse_val.keys()):
+                        results[item] = rmse_val[item]
+                    return results
+
+                def log_loss_valid(_task_key="Default"):
+                    single_results = {}
+                    sum_natoms = 0
+                    if not self.multi_task:
+                        valid_numb_batch = self.valid_numb_batch
+                    else:
+                        valid_numb_batch = self.valid_numb_batch[_task_key]
+                    for ii in range(valid_numb_batch):
+                        self.optimizer.zero_grad()
+                        input_dict, label_dict, _ = self.get_data(
+                            is_train=False, task_key=_task_key
+                        )
+                        if input_dict == {}:
+                            # no validation data
+                            return {}
+                        _, loss, more_loss = self.wrapper(
+                            **input_dict,
+                            cur_lr=pref_lr,
+                            label=label_dict,
+                            task_key=_task_key,
+                        )
+                        # more_loss.update({"rmse": math.sqrt(loss)})
+                        natoms = int(input_dict["atype"].shape[-1])
+                        sum_natoms += natoms
+                        for k, v in more_loss.items():
+                            if "l2_" not in k:
+                                single_results[k] = (
+                                    single_results.get(k, 0.0) + v * natoms
+                                )
+                    results = {k: v / sum_natoms for k, v in single_results.items()}
+                    return results
+
+                if not self.multi_task:
+                    train_results = log_loss_train(loss, more_loss)
+                    valid_results = log_loss_valid()
+                    if self.rank == 0:
+                        log.info(
+                            format_training_message_per_task(
+                                batch=_step_id,
+                                task_name="trn",
+                                rmse=train_results,
+                                learning_rate=cur_lr,
+                            )
+                        )
+                        if valid_results:
+                            log.info(
+                                format_training_message_per_task(
+                                    batch=_step_id,
+                                    task_name="val",
+                                    rmse=valid_results,
+                                    learning_rate=None,
+                                )
+                            )
+                else:
+                    train_results = {_key: {} for _key in self.model_keys}
+                    valid_results = {_key: {} for _key in self.model_keys}
+                    train_results[task_key] = log_loss_train(
+                        loss, more_loss, _task_key=task_key
+                    )
+                    for _key in self.model_keys:
+                        if _key != task_key:
+                            self.optimizer.zero_grad()
+                            input_dict, label_dict, _ = self.get_data(
+                                is_train=True, task_key=_key
+                            )
+                            _, loss, more_loss = self.wrapper(
+                                **input_dict,
+                                cur_lr=pref_lr,
+                                label=label_dict,
+                                task_key=_key,
+                            )
+                            train_results[_key] = log_loss_train(
+                                loss, more_loss, _task_key=_key
+                            )
+                        valid_results[_key] = log_loss_valid(_task_key=_key)
+                        if self.rank == 0:
+                            log.info(
+                                format_training_message_per_task(
+                                    batch=_step_id,
+                                    task_name=_key + "_trn",
+                                    rmse=train_results[_key],
+                                    learning_rate=cur_lr,
+                                )
+                            )
+                            if valid_results is not None and valid_results[_key]:
+                                log.info(
+                                    format_training_message_per_task(
+                                        batch=_step_id,
+                                        task_name=_key + "_val",
+                                        rmse=valid_results[_key],
+                                        learning_rate=None,
+                                    )
+                                )
+
+                current_time = time.time()
+                train_time = current_time - self.t0
+                self.t0 = current_time
+                if self.rank == 0:
+                    log.info(
+                        format_training_message(
+                            batch=_step_id,
+                            wall_time=train_time,
+                        )
+                    )
+
+                if fout:
+                    if self.lcurve_should_print_header:
+                        self.print_header(fout, train_results, valid_results)
+                        self.lcurve_should_print_header = False
+                    self.print_on_training(
+                        fout, _step_id, cur_lr, train_results, valid_results
+                    )
+
+            if (
+                ((_step_id + 1) % self.save_freq == 0 and _step_id != self.start_step)
+                or (_step_id + 1) == self.num_steps
+            ) and (self.rank == 0 or dist.get_rank() == 0):
+                # Handle the case if rank 0 aborted and re-assigned
+                self.latest_model = Path(self.save_ckpt + f"-{_step_id + 1}.pt")
+
+                module = self.wrapper.module if dist.is_initialized() else self.wrapper
+                self.save_model(self.latest_model, lr=cur_lr, step=_step_id)
+                log.info(f"Saved model to {self.latest_model}")
+                symlink_prefix_files(self.latest_model.stem, self.save_ckpt)
+                with open("checkpoint", "w") as f:
+                    f.write(str(self.latest_model))
+
+            # tensorboard
+            if self.enable_tensorboard and _step_id % self.tensorboard_freq == 0:
+                writer.add_scalar(f"{task_key}/lr", cur_lr, _step_id)
+                writer.add_scalar(f"{task_key}/loss", loss, _step_id)
+                for item in more_loss:
+                    writer.add_scalar(f"{task_key}/{item}", more_loss[item], _step_id)
+
+        self.t0 = time.time()
+        for step_id in range(self.num_steps):
+            if step_id < self.start_step:
+                continue
+            if self.multi_task:
+                chosen_index_list = dp_random.choice(
+                    np.arange(self.num_model),
+                    p=np.array(self.model_prob),
+                    size=self.world_size,
+                    replace=True,
+                )
+                assert chosen_index_list.size == self.world_size
+                model_index = chosen_index_list[self.rank]
+                model_key = self.model_keys[model_index]
+            else:
+                model_key = "Default"
+            step(step_id, model_key)
+            if JIT:
+                break
+
+        if (
+            self.rank == 0 or dist.get_rank() == 0
+        ):  # Handle the case if rank 0 aborted and re-assigned
+            if self.num_steps == 0:
+                # when num_steps is 0, the checkpoint is never not saved
+                self.latest_model = Path(self.save_ckpt + "-0.pt")
+                self.save_model(self.latest_model, lr=0, step=0)
+                log.info(f"Saved model to {self.latest_model}")
+                symlink_prefix_files(self.latest_model.stem, self.save_ckpt)
+                with open("checkpoint", "w") as f:
+                    f.write(str(self.latest_model))
+
+            if JIT:
+                pth_model_path = (
+                    "frozen_model.pth"  # We use .pth to denote the frozen model
+                )
+                self.model.save(pth_model_path)
+                log.info(
+                    f"Frozen model for inferencing has been saved to {pth_model_path}"
+                )
+            log.info(f"Trained model has been saved to: {self.save_ckpt}")
+
+        if fout:
+            fout.close()
+        if SAMPLER_RECORD:
+            fout1.close()
+        if self.enable_tensorboard:
+            writer.close()
+        if self.enable_profiler:
+            prof.stop()
+
+    def save_model(self, save_path, lr=0.0, step=0):
+        module = self.wrapper.module if dist.is_initialized() else self.wrapper
+        module.train_infos["lr"] = lr
+        module.train_infos["step"] = step
+        torch.save(
+            {"model": module.state_dict(), "optimizer": self.optimizer.state_dict()},
+            save_path,
+        )
+        checkpoint_dir = save_path.parent
+        checkpoint_files = [
+            f
+            for f in checkpoint_dir.glob("*.pt")
+            if not f.is_symlink() and f.name.startswith(self.save_ckpt)
+        ]
+        if len(checkpoint_files) > self.max_ckpt_keep:
+            checkpoint_files.sort(key=lambda x: x.stat().st_mtime)
+            checkpoint_files[0].unlink()
+
+    def get_data(self, is_train=True, task_key="Default"):
+        if not self.multi_task:
+            if is_train:
+                try:
+                    batch_data = next(iter(self.training_data))
+                except StopIteration:
+                    # Refresh the status of the dataloader to start from a new epoch
+                    with torch.device("cpu"):
+                        self.training_data = BufferedIterator(
+                            iter(self.training_dataloader)
+                        )
+                    batch_data = next(iter(self.training_data))
+            else:
+                if self.validation_data is None:
+                    return {}, {}, {}
+                try:
+                    batch_data = next(iter(self.validation_data))
+                except StopIteration:
+                    self.validation_data = BufferedIterator(
+                        iter(self.validation_dataloader)
+                    )
+                    batch_data = next(iter(self.validation_data))
+        else:
+            if is_train:
+                try:
+                    batch_data = next(iter(self.training_data[task_key]))
+                except StopIteration:
+                    # Refresh the status of the dataloader to start from a new epoch
+                    self.training_data[task_key] = BufferedIterator(
+                        iter(self.training_dataloader[task_key])
+                    )
+                    batch_data = next(iter(self.training_data[task_key]))
+            else:
+                if self.validation_data[task_key] is None:
+                    return {}, {}, {}
+                try:
+                    batch_data = next(iter(self.validation_data[task_key]))
+                except StopIteration:
+                    self.validation_data[task_key] = BufferedIterator(
+                        iter(self.validation_dataloader[task_key])
+                    )
+                    batch_data = next(iter(self.validation_data[task_key]))
+
+        for key in batch_data.keys():
+            if key == "sid" or key == "fid" or key == "box":
+                continue
+            elif not isinstance(batch_data[key], list):
+                if batch_data[key] is not None:
+                    batch_data[key] = batch_data[key].to(DEVICE, non_blocking=True)
+            else:
+                batch_data[key] = [
+                    item.to(DEVICE, non_blocking=True) for item in batch_data[key]
+                ]
+        # we may need a better way to classify which are inputs and which are labels
+        # now wrapper only supports the following inputs:
+        input_keys = [
+            "coord",
+            "atype",
+            "spin",
+            "box",
+            "fparam",
+            "aparam",
+        ]
+        input_dict = {item_key: None for item_key in input_keys}
+        label_dict = {}
+        for item_key in batch_data:
+            if item_key in input_keys:
+                input_dict[item_key] = batch_data[item_key]
+            else:
+                if item_key not in ["sid", "fid"]:
+                    label_dict[item_key] = batch_data[item_key]
+        log_dict = {}
+        if "fid" in batch_data:
+            log_dict["fid"] = batch_data["fid"]
+        log_dict["sid"] = batch_data["sid"]
+        return input_dict, label_dict, log_dict
+
+    def print_header(self, fout, train_results, valid_results):
+        train_keys = sorted(train_results.keys())
+        print_str = ""
+        print_str += "# %5s" % "step"
+        if not self.multi_task:
+            if valid_results is not None:
+                prop_fmt = "   %11s %11s"
+                for k in train_keys:
+                    print_str += prop_fmt % (k + "_val", k + "_trn")
+            else:
+                prop_fmt = "   %11s"
+                for k in train_keys:
+                    print_str += prop_fmt % (k + "_trn")
+        else:
+            for model_key in self.model_keys:
+                if valid_results[model_key] is not None:
+                    prop_fmt = "   %11s %11s"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (
+                            k + f"_val_{model_key}",
+                            k + f"_trn_{model_key}",
+                        )
+                else:
+                    prop_fmt = "   %11s"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (k + f"_trn_{model_key}")
+        print_str += "   %8s\n" % "lr"
+        print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n"
+        fout.write(print_str)
+        fout.flush()
+
+    def print_on_training(self, fout, step_id, cur_lr, train_results, valid_results):
+        train_keys = sorted(train_results.keys())
+        print_str = ""
+        print_str += "%7d" % step_id
+        if not self.multi_task:
+            if valid_results:
+                prop_fmt = "   %11.2e %11.2e"
+                for k in train_keys:
+                    print_str += prop_fmt % (valid_results[k], train_results[k])
+            else:
+                prop_fmt = "   %11.2e"
+                for k in train_keys:
+                    print_str += prop_fmt % (train_results[k])
+        else:
+            for model_key in self.model_keys:
+                if valid_results[model_key]:
+                    prop_fmt = "   %11.2e %11.2e"
+                    for k in sorted(valid_results[model_key].keys()):
+                        print_str += prop_fmt % (
+                            valid_results[model_key][k],
+                            train_results[model_key][k],
+                        )
+                else:
+                    prop_fmt = "   %11.2e"
+                    for k in sorted(train_results[model_key].keys()):
+                        print_str += prop_fmt % (train_results[model_key][k])
+        print_str += "   %8.1e\n" % cur_lr
+        fout.write(print_str)
+        fout.flush()
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
new file mode 100644
index 0000000000..6bc7cdc87a
--- /dev/null
+++ b/deepmd/pt/train/wrapper.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    Dict,
+    Optional,
+    Union,
+)
+
+import torch
+
+if torch.__version__.startswith("2"):
+    import torch._dynamo
+
+
+log = logging.getLogger(__name__)
+
+
+class ModelWrapper(torch.nn.Module):
+    def __init__(
+        self,
+        model: Union[torch.nn.Module, Dict],
+        loss: Union[torch.nn.Module, Dict] = None,
+        model_params=None,
+        shared_links=None,
+    ):
+        """Construct a DeePMD model wrapper.
+
+        Args:
+        - config: The Dict-like configuration with training options.
+        """
+        super().__init__()
+        self.model_params = model_params if model_params is not None else {}
+        self.train_infos = {
+            "lr": 0,
+            "step": 0,
+        }
+        self.multi_task = False
+        self.model = torch.nn.ModuleDict()
+        # Model
+        if isinstance(model, torch.nn.Module):
+            self.model["Default"] = model
+        elif isinstance(model, dict):
+            self.multi_task = True
+            for task_key in model:
+                assert isinstance(
+                    model[task_key], torch.nn.Module
+                ), f"{task_key} in model_dict is not a torch.nn.Module!"
+                self.model[task_key] = model[task_key]
+        # Loss
+        self.loss = None
+        if loss is not None:
+            self.loss = torch.nn.ModuleDict()
+            if isinstance(loss, torch.nn.Module):
+                self.loss["Default"] = loss
+            elif isinstance(loss, dict):
+                for task_key in loss:
+                    assert isinstance(
+                        loss[task_key], torch.nn.Module
+                    ), f"{task_key} in loss_dict is not a torch.nn.Module!"
+                    self.loss[task_key] = loss[task_key]
+        self.inference_only = self.loss is None
+
+    def share_params(self, shared_links, resume=False):
+        """
+        Share the parameters of classes following rules defined in shared_links during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        supported_types = ["descriptor", "fitting_net"]
+        for shared_item in shared_links:
+            class_name = shared_links[shared_item]["type"]
+            shared_base = shared_links[shared_item]["links"][0]
+            class_type_base = shared_base["shared_type"]
+            model_key_base = shared_base["model_key"]
+            shared_level_base = shared_base["shared_level"]
+            if "descriptor" in class_type_base:
+                if class_type_base == "descriptor":
+                    base_class = self.model[model_key_base].get_descriptor()
+                elif "hybrid" in class_type_base:
+                    hybrid_index = int(class_type_base.split("_")[-1])
+                    base_class = (
+                        self.model[model_key_base]
+                        .get_descriptor()
+                        .descriptor_list[hybrid_index]
+                    )
+                else:
+                    raise RuntimeError(f"Unknown class_type {class_type_base}!")
+                for link_item in shared_links[shared_item]["links"][1:]:
+                    class_type_link = link_item["shared_type"]
+                    model_key_link = link_item["model_key"]
+                    shared_level_link = int(link_item["shared_level"])
+                    assert (
+                        shared_level_link >= shared_level_base
+                    ), "The shared_links must be sorted by shared_level!"
+                    assert (
+                        "descriptor" in class_type_link
+                    ), f"Class type mismatched: {class_type_base} vs {class_type_link}!"
+                    if class_type_link == "descriptor":
+                        link_class = self.model[model_key_link].get_descriptor()
+                    elif "hybrid" in class_type_link:
+                        hybrid_index = int(class_type_link.split("_")[-1])
+                        link_class = (
+                            self.model[model_key_link]
+                            .get_descriptor()
+                            .descriptor_list[hybrid_index]
+                        )
+                    else:
+                        raise RuntimeError(f"Unknown class_type {class_type_link}!")
+                    link_class.share_params(
+                        base_class, shared_level_link, resume=resume
+                    )
+                    log.warning(
+                        f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
+                    )
+            else:
+                if hasattr(self.model[model_key_base], class_type_base):
+                    base_class = self.model[model_key_base].__getattr__(class_type_base)
+                    for link_item in shared_links[shared_item]["links"][1:]:
+                        class_type_link = link_item["shared_type"]
+                        model_key_link = link_item["model_key"]
+                        shared_level_link = int(link_item["shared_level"])
+                        assert (
+                            shared_level_link >= shared_level_base
+                        ), "The shared_links must be sorted by shared_level!"
+                        assert (
+                            class_type_base == class_type_link
+                        ), f"Class type mismatched: {class_type_base} vs {class_type_link}!"
+                        link_class = self.model[model_key_link].__getattr__(
+                            class_type_link
+                        )
+                        link_class.share_params(
+                            base_class, shared_level_link, resume=resume
+                        )
+                        log.warning(
+                            f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
+                        )
+
+    def forward(
+        self,
+        coord,
+        atype,
+        spin: Optional[torch.Tensor] = None,
+        box: Optional[torch.Tensor] = None,
+        cur_lr: Optional[torch.Tensor] = None,
+        label: Optional[torch.Tensor] = None,
+        task_key: Optional[torch.Tensor] = None,
+        inference_only=False,
+        do_atomic_virial=False,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        if not self.multi_task:
+            task_key = "Default"
+        else:
+            assert (
+                task_key is not None
+            ), f"Multitask model must specify the inference task! Supported tasks are {list(self.model.keys())}."
+        input_dict = {
+            "coord": coord,
+            "atype": atype,
+            "box": box,
+            "do_atomic_virial": do_atomic_virial,
+            "fparam": fparam,
+            "aparam": aparam,
+        }
+        has_spin = getattr(self.model[task_key], "has_spin", False)
+        if callable(has_spin):
+            has_spin = has_spin()
+        if has_spin:
+            input_dict["spin"] = spin
+
+        if self.inference_only or inference_only:
+            model_pred = self.model[task_key](**input_dict)
+            return model_pred, None, None
+        else:
+            natoms = atype.shape[-1]
+            model_pred, loss, more_loss = self.loss[task_key](
+                input_dict,
+                self.model[task_key],
+                label,
+                natoms=natoms,
+                learning_rate=cur_lr,
+            )
+            return model_pred, loss, more_loss
+
+    def set_extra_state(self, state: Dict):
+        self.model_params = state["model_params"]
+        self.train_infos = state["train_infos"]
+        return None
+
+    def get_extra_state(self) -> Dict:
+        state = {
+            "model_params": self.model_params,
+            "train_infos": self.train_infos,
+        }
+        return state
diff --git a/deepmd/pt/utils/__init__.py b/deepmd/pt/utils/__init__.py
new file mode 100644
index 0000000000..7e1043eda4
--- /dev/null
+++ b/deepmd/pt/utils/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+from .exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+
+__all__ = [
+    "PairExcludeMask",
+    "AtomExcludeMask",
+]
diff --git a/deepmd/pt/utils/ase_calc.py b/deepmd/pt/utils/ase_calc.py
new file mode 100644
index 0000000000..6bcb9cdc5e
--- /dev/null
+++ b/deepmd/pt/utils/ase_calc.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.calculator import DP as DPCalculator
+
+__all__ = [
+    "DPCalculator",
+]
diff --git a/deepmd/pt/utils/auto_batch_size.py b/deepmd/pt/utils/auto_batch_size.py
new file mode 100644
index 0000000000..13264a336c
--- /dev/null
+++ b/deepmd/pt/utils/auto_batch_size.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
+
+
+class AutoBatchSize(AutoBatchSizeBase):
+    """Auto batch size.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
+        is not set
+    factor : float, default: 2.
+        increased factor
+
+    """
+
+    def __init__(
+        self,
+        initial_batch_size: int = 1024,
+        factor: float = 2.0,
+    ):
+        super().__init__(
+            initial_batch_size=initial_batch_size,
+            factor=factor,
+        )
+
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+        return torch.cuda.is_available()
+
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+        """
+        return isinstance(e, RuntimeError) and "CUDA out of memory." in e.args[0]
+
+    def execute_all(
+        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
+    ) -> Tuple[Union[np.ndarray, torch.Tensor]]:
+        """Excuate a method with all given data.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept *args and **kwargs as input and return the similiar array.
+        total_size : int
+            Total size
+        natoms : int
+            The number of atoms
+        *args
+            Variable length argument list.
+        **kwargs
+            If 2D np.ndarray or torch.Tensor, assume the first axis is batch; otherwise do nothing.
+        """
+
+        def execute_with_batch_size(
+            batch_size: int, start_index: int
+        ) -> Tuple[int, Tuple[torch.Tensor]]:
+            end_index = start_index + batch_size
+            end_index = min(end_index, total_size)
+            return (end_index - start_index), callable(
+                *[
+                    (
+                        vv[start_index:end_index]
+                        if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor))
+                        and vv.ndim > 1
+                        else vv
+                    )
+                    for vv in args
+                ],
+                **{
+                    kk: (
+                        vv[start_index:end_index]
+                        if (isinstance(vv, np.ndarray) or isinstance(vv, torch.Tensor))
+                        and vv.ndim > 1
+                        else vv
+                    )
+                    for kk, vv in kwargs.items()
+                },
+            )
+
+        index = 0
+        results = None
+        returned_dict = None
+        while index < total_size:
+            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
+            returned_dict = (
+                isinstance(result, dict) if returned_dict is None else returned_dict
+            )
+            if not returned_dict:
+                result = (result,) if not isinstance(result, tuple) else result
+            index += n_batch
+
+            def append_to_list(res_list, res):
+                if n_batch:
+                    res_list.append(res)
+                return res_list
+
+            if not returned_dict:
+                results = [] if results is None else results
+                results = append_to_list(results, result)
+            else:
+                results = (
+                    {kk: [] for kk in result.keys()} if results is None else results
+                )
+                results = {
+                    kk: append_to_list(results[kk], result[kk]) for kk in result.keys()
+                }
+        assert results is not None
+        assert returned_dict is not None
+
+        def concate_result(r):
+            if isinstance(r[0], np.ndarray):
+                ret = np.concatenate(r, axis=0)
+            elif isinstance(r[0], torch.Tensor):
+                ret = torch.cat(r, dim=0)
+            else:
+                raise RuntimeError(f"Unexpected result type {type(r[0])}")
+            return ret
+
+        if not returned_dict:
+            r_list = [concate_result(r) for r in zip(*results)]
+            r = tuple(r_list)
+            if len(r) == 1:
+                # avoid returning tuple if callable doesn't return tuple
+                r = r[0]
+        else:
+            r = {kk: concate_result(vv) for kk, vv in results.items()}
+        return r
diff --git a/deepmd/pt/utils/cache.py b/deepmd/pt/utils/cache.py
new file mode 100644
index 0000000000..c40c4050b7
--- /dev/null
+++ b/deepmd/pt/utils/cache.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy as copy_lib
+import functools
+
+
+def lru_cache(maxsize=16, typed=False, copy=False, deepcopy=False):
+    if deepcopy:
+
+        def decorator(f):
+            cached_func = functools.lru_cache(maxsize, typed)(f)
+
+            @functools.wraps(f)
+            def wrapper(*args, **kwargs):
+                return copy_lib.deepcopy(cached_func(*args, **kwargs))
+
+            return wrapper
+
+    elif copy:
+
+        def decorator(f):
+            cached_func = functools.lru_cache(maxsize, typed)(f)
+
+            @functools.wraps(f)
+            def wrapper(*args, **kwargs):
+                return copy_lib.copy(cached_func(*args, **kwargs))
+
+            return wrapper
+
+    else:
+        decorator = functools.lru_cache(maxsize, typed)
+    return decorator
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
new file mode 100644
index 0000000000..361bc4b0b6
--- /dev/null
+++ b/deepmd/pt/utils/dataloader.py
@@ -0,0 +1,305 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+import queue
+import time
+from multiprocessing.dummy import (
+    Pool,
+)
+from threading import (
+    Thread,
+)
+from typing import (
+    List,
+)
+
+import h5py
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.multiprocessing
+from torch.utils.data import (
+    DataLoader,
+    Dataset,
+    WeightedRandomSampler,
+)
+from torch.utils.data._utils.collate import (
+    collate_tensor_fn,
+)
+from torch.utils.data.distributed import (
+    DistributedSampler,
+)
+
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+from deepmd.utils.data_system import (
+    print_summary,
+    prob_sys_size_ext,
+    process_sys_probs,
+)
+
+log = logging.getLogger(__name__)
+torch.multiprocessing.set_sharing_strategy("file_system")
+
+
+def setup_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+
+
+class DpLoaderSet(Dataset):
+    """A dataset for storing DataLoaders to multiple Systems.
+
+    Parameters
+    ----------
+    sys_path
+            Path to the data system
+    batch_size
+            Max frame count in a batch.
+    type_map
+            Gives the name of different atom types
+    seed
+            Random seed for dataloader
+    shuffle
+            If the data are shuffled (Only effective in serial mode. Always shuffle in distributed data parallelism)
+    """
+
+    def __init__(
+        self,
+        systems,
+        batch_size,
+        type_map,
+        seed=10,
+        shuffle=True,
+    ):
+        setup_seed(seed)
+        if isinstance(systems, str):
+            with h5py.File(systems) as file:
+                systems = [os.path.join(systems, item) for item in file.keys()]
+
+        self.systems: List[DeepmdDataSetForLoader] = []
+        if len(systems) >= 100:
+            log.info(f"Constructing DataLoaders from {len(systems)} systems")
+
+        def construct_dataset(system):
+            return DeepmdDataSetForLoader(
+                system=system,
+                type_map=type_map,
+            )
+
+        with Pool(
+            os.cpu_count()
+            // (int(os.environ["LOCAL_WORLD_SIZE"]) if dist.is_initialized() else 1)
+        ) as pool:
+            self.systems = pool.map(construct_dataset, systems)
+
+        self.sampler_list: List[DistributedSampler] = []
+        self.index = []
+        self.total_batch = 0
+
+        self.dataloaders = []
+        self.batch_sizes = []
+        if isinstance(batch_size, str):
+            if batch_size == "auto":
+                rule = 32
+            elif batch_size.startswith("auto:"):
+                rule = int(batch_size.split(":")[1])
+            else:
+                rule = None
+                log.error("Unsupported batch size type")
+            for ii in self.systems:
+                ni = ii._natoms
+                bsi = rule // ni
+                if bsi * ni < rule:
+                    bsi += 1
+                self.batch_sizes.append(bsi)
+        elif isinstance(batch_size, list):
+            self.batch_sizes = batch_size
+        else:
+            self.batch_sizes = batch_size * np.ones(len(systems), dtype=int)
+        assert len(self.systems) == len(self.batch_sizes)
+        for system, batch_size in zip(self.systems, self.batch_sizes):
+            if dist.is_initialized():
+                system_sampler = DistributedSampler(system)
+                self.sampler_list.append(system_sampler)
+            else:
+                system_sampler = None
+            system_dataloader = DataLoader(
+                dataset=system,
+                batch_size=int(batch_size),
+                num_workers=0,  # Should be 0 to avoid too many threads forked
+                sampler=system_sampler,
+                collate_fn=collate_batch,
+                shuffle=(not dist.is_initialized()) and shuffle,
+            )
+            self.dataloaders.append(system_dataloader)
+            self.index.append(len(system_dataloader))
+            self.total_batch += len(system_dataloader)
+        # Initialize iterator instances for DataLoader
+        self.iters = []
+        with torch.device("cpu"):
+            for item in self.dataloaders:
+                self.iters.append(iter(item))
+
+    def set_noise(self, noise_settings):
+        # noise_settings['noise_type'] # "trunc_normal", "normal", "uniform"
+        # noise_settings['noise'] # float, default 1.0
+        # noise_settings['noise_mode'] # "prob", "fix_num"
+        # noise_settings['mask_num'] # if "fix_num", int
+        # noise_settings['mask_prob'] # if "prob", float
+        # noise_settings['same_mask'] # coord and type same mask?
+        for system in self.systems:
+            system.set_noise(noise_settings)
+
+    def __len__(self):
+        return len(self.dataloaders)
+
+    def __getitem__(self, idx):
+        # log.warning(str(torch.distributed.get_rank())+" idx: "+str(idx)+" index: "+str(self.index[idx]))
+        try:
+            batch = next(self.iters[idx])
+        except StopIteration:
+            self.iters[idx] = iter(self.dataloaders[idx])
+            batch = next(self.iters[idx])
+        batch["sid"] = idx
+        return batch
+
+    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
+        """Add data requirement for each system in multiple systems."""
+        for system in self.systems:
+            system.add_data_requirement(data_requirement)
+
+    def print_summary(
+        self,
+        name: str,
+        prob: List[float],
+    ):
+        print_summary(
+            name,
+            len(self.systems),
+            [ss.system for ss in self.systems],
+            [ss._natoms for ss in self.systems],
+            self.batch_sizes,
+            [
+                ss._data_system.get_sys_numb_batch(self.batch_sizes[ii])
+                for ii, ss in enumerate(self.systems)
+            ],
+            prob,
+            [ss._data_system.pbc for ss in self.systems],
+        )
+
+
+_sentinel = object()
+QUEUESIZE = 32
+
+
+class BackgroundConsumer(Thread):
+    def __init__(self, queue, source, max_len):
+        Thread.__init__(self)
+        self._queue = queue
+        self._source = source  # Main DL iterator
+        self._max_len = max_len  #
+
+    def run(self):
+        for item in self._source:
+            self._queue.put(item)  # Blocking if the queue is full
+
+        # Signal the consumer we are done.
+        self._queue.put(_sentinel)
+
+
+class BufferedIterator:
+    def __init__(self, iterable):
+        self._queue = queue.Queue(QUEUESIZE)
+        self._iterable = iterable
+        self._consumer = None
+
+        self.start_time = time.time()
+        self.warning_time = None
+        self.total = len(iterable)
+
+    def _create_consumer(self):
+        self._consumer = BackgroundConsumer(self._queue, self._iterable, self.total)
+        self._consumer.daemon = True
+        self._consumer.start()
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return self.total
+
+    def __next__(self):
+        # Create consumer if not created yet
+        if self._consumer is None:
+            self._create_consumer()
+        # Notify the user if there is a data loading bottleneck
+        if self._queue.qsize() < min(2, max(1, self._queue.maxsize // 2)):
+            if time.time() - self.start_time > 5 * 60:
+                if (
+                    self.warning_time is None
+                    or time.time() - self.warning_time > 15 * 60
+                ):
+                    log.warning(
+                        "Data loading buffer is empty or nearly empty. This may "
+                        "indicate a data loading bottleneck, and increasing the "
+                        "number of workers (--num-workers) may help."
+                    )
+                    self.warning_time = time.time()
+
+        # Get next example
+        item = self._queue.get()
+        if isinstance(item, Exception):
+            raise item
+        if item is _sentinel:
+            raise StopIteration
+        return item
+
+
+def collate_batch(batch):
+    example = batch[0]
+    result = {}
+    for key in example.keys():
+        if "find_" in key:
+            result[key] = batch[0][key]
+        else:
+            if batch[0][key] is None:
+                result[key] = None
+            elif key == "fid":
+                result[key] = [d[key] for d in batch]
+            elif key == "type":
+                continue
+            else:
+                result[key] = collate_tensor_fn(
+                    [torch.as_tensor(d[key]) for d in batch]
+                )
+    return result
+
+
+def get_weighted_sampler(training_data, prob_style, sys_prob=False):
+    if sys_prob is False:
+        if prob_style == "prob_uniform":
+            prob_v = 1.0 / float(training_data.__len__())
+            probs = [prob_v for ii in range(training_data.__len__())]
+        else:  # prob_sys_size;A:B:p1;C:D:p2 or prob_sys_size = prob_sys_size;0:nsys:1.0
+            if prob_style == "prob_sys_size":
+                style = f"prob_sys_size;0:{len(training_data)}:1.0"
+            else:
+                style = prob_style
+            probs = prob_sys_size_ext(style, len(training_data), training_data.index)
+    else:
+        probs = process_sys_probs(prob_style, training_data.index)
+    log.debug("Generated weighted sampler with prob array: " + str(probs))
+    # training_data.total_batch is the size of one epoch, you can increase it to avoid too many  rebuilding of iteraters
+    len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1)
+    with torch.device("cpu"):
+        sampler = WeightedRandomSampler(probs, len_sampler, replacement=True)
+    return sampler
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
new file mode 100644
index 0000000000..dbe4d92a0f
--- /dev/null
+++ b/deepmd/pt/utils/dataset.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+from typing import (
+    List,
+    Optional,
+)
+
+from torch.utils.data import (
+    Dataset,
+)
+
+from deepmd.utils.data import (
+    DataRequirementItem,
+    DeepmdData,
+)
+
+
+class DeepmdDataSetForLoader(Dataset):
+    def __init__(self, system: str, type_map: Optional[List[str]] = None):
+        """Construct DeePMD-style dataset containing frames cross different systems.
+
+        Args:
+        - systems: Paths to systems.
+        - type_map: Atom types.
+        """
+        self.system = system
+        self._type_map = type_map
+        self._data_system = DeepmdData(sys_path=system, type_map=self._type_map)
+        self.mixed_type = self._data_system.mixed_type
+        self._ntypes = self._data_system.get_ntypes()
+        self._natoms = self._data_system.get_natoms()
+        self._natoms_vec = self._data_system.get_natoms_vec(self._ntypes)
+
+    def __len__(self):
+        return self._data_system.nframes
+
+    def __getitem__(self, index):
+        """Get a frame from the selected system."""
+        b_data = self._data_system.get_item_torch(index)
+        b_data["natoms"] = self._natoms_vec
+        return b_data
+
+    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
+        """Add data requirement for this data system."""
+        for data_item in data_requirement:
+            self._data_system.add(
+                data_item["key"],
+                data_item["ndof"],
+                atomic=data_item["atomic"],
+                must=data_item["must"],
+                high_prec=data_item["high_prec"],
+                type_sel=data_item["type_sel"],
+                repeat=data_item["repeat"],
+                default=data_item["default"],
+                dtype=data_item["dtype"],
+                output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"],
+            )
diff --git a/deepmd/pt/utils/dp_random.py b/deepmd/pt/utils/dp_random.py
new file mode 100644
index 0000000000..e81488c506
--- /dev/null
+++ b/deepmd/pt/utils/dp_random.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.utils.random import (
+    choice,
+    random,
+    seed,
+    shuffle,
+)
+
+__all__ = [
+    "choice",
+    "random",
+    "seed",
+    "shuffle",
+]
diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py
new file mode 100644
index 0000000000..d841a9b73c
--- /dev/null
+++ b/deepmd/pt/utils/env.py
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+
+import numpy as np
+import torch
+
+from deepmd.common import (
+    VALID_PRECISION,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    get_default_nthreads,
+    set_default_nthreads,
+)
+
+SAMPLER_RECORD = os.environ.get("SAMPLER_RECORD", False)
+try:
+    # only linux
+    ncpus = len(os.sched_getaffinity(0))
+except AttributeError:
+    ncpus = os.cpu_count()
+NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(8, ncpus)))
+# Make sure DDP uses correct device if applicable
+LOCAL_RANK = os.environ.get("LOCAL_RANK")
+LOCAL_RANK = int(0 if LOCAL_RANK is None else LOCAL_RANK)
+
+if os.environ.get("DEVICE") == "cpu" or torch.cuda.is_available() is False:
+    DEVICE = torch.device("cpu")
+else:
+    DEVICE = torch.device(f"cuda:{LOCAL_RANK}")
+
+JIT = False
+CACHE_PER_SYS = 5  # keep at most so many sets per sys in memory
+ENERGY_BIAS_TRAINABLE = True
+
+PRECISION_DICT = {
+    "float16": torch.float16,
+    "float32": torch.float32,
+    "float64": torch.float64,
+    "half": torch.float16,
+    "single": torch.float32,
+    "double": torch.float64,
+    "int32": torch.int32,
+    "int64": torch.int64,
+    "bfloat16": torch.bfloat16,
+}
+GLOBAL_PT_FLOAT_PRECISION = PRECISION_DICT[np.dtype(GLOBAL_NP_FLOAT_PRECISION).name]
+GLOBAL_PT_ENER_FLOAT_PRECISION = PRECISION_DICT[
+    np.dtype(GLOBAL_ENER_FLOAT_PRECISION).name
+]
+PRECISION_DICT["default"] = GLOBAL_PT_FLOAT_PRECISION
+assert VALID_PRECISION.issubset(PRECISION_DICT.keys())
+# cannot automatically generated
+RESERVED_PRECISON_DICT = {
+    torch.float16: "float16",
+    torch.float32: "float32",
+    torch.float64: "float64",
+    torch.int32: "int32",
+    torch.int64: "int64",
+    torch.bfloat16: "bfloat16",
+}
+assert set(PRECISION_DICT.values()) == set(RESERVED_PRECISON_DICT.keys())
+DEFAULT_PRECISION = "float64"
+
+# throw warnings if threads not set
+set_default_nthreads()
+inter_nthreads, intra_nthreads = get_default_nthreads()
+if inter_nthreads > 0:  # the behavior of 0 is not documented
+    torch.set_num_interop_threads(inter_nthreads)
+if intra_nthreads > 0:
+    torch.set_num_threads(intra_nthreads)
+
+__all__ = [
+    "GLOBAL_ENER_FLOAT_PRECISION",
+    "GLOBAL_NP_FLOAT_PRECISION",
+    "GLOBAL_PT_FLOAT_PRECISION",
+    "GLOBAL_PT_ENER_FLOAT_PRECISION",
+    "DEFAULT_PRECISION",
+    "PRECISION_DICT",
+    "RESERVED_PRECISON_DICT",
+    "SAMPLER_RECORD",
+    "NUM_WORKERS",
+    "DEVICE",
+    "JIT",
+    "CACHE_PER_SYS",
+    "ENERGY_BIAS_TRAINABLE",
+    "LOCAL_RANK",
+]
diff --git a/deepmd/pt/utils/env_mat_stat.py b/deepmd/pt/utils/env_mat_stat.py
new file mode 100644
index 0000000000..47e17e9eaa
--- /dev/null
+++ b/deepmd/pt/utils/env_mat_stat.py
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    Iterator,
+    List,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.common import (
+    get_hash,
+)
+from deepmd.pt.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.utils.env_mat_stat import EnvMatStat as BaseEnvMatStat
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+
+if TYPE_CHECKING:
+    from deepmd.pt.model.descriptor import (
+        DescriptorBlock,
+    )
+
+
+class EnvMatStat(BaseEnvMatStat):
+    def compute_stat(self, env_mat: Dict[str, torch.Tensor]) -> Dict[str, StatItem]:
+        """Compute the statistics of the environment matrix for a single system.
+
+        Parameters
+        ----------
+        env_mat : torch.Tensor
+            The environment matrix.
+
+        Returns
+        -------
+        Dict[str, StatItem]
+            The statistics of the environment matrix.
+        """
+        stats = {}
+        for kk, vv in env_mat.items():
+            stats[kk] = StatItem(
+                number=vv.numel(),
+                sum=vv.sum().item(),
+                squared_sum=torch.square(vv).sum().item(),
+            )
+        return stats
+
+
+class EnvMatStatSe(EnvMatStat):
+    """Environmental matrix statistics for the se_a/se_r environemntal matrix.
+
+    Parameters
+    ----------
+    descriptor : DescriptorBlock
+        The descriptor of the model.
+    """
+
+    def __init__(self, descriptor: "DescriptorBlock"):
+        super().__init__()
+        self.descriptor = descriptor
+        self.last_dim = (
+            self.descriptor.ndescrpt // self.descriptor.nnei
+        )  # se_r=1, se_a=4
+
+    def iter(
+        self, data: List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]]
+    ) -> Iterator[Dict[str, StatItem]]:
+        """Get the iterator of the environment matrix.
+
+        Parameters
+        ----------
+        data : List[Dict[str, Union[torch.Tensor, List[Tuple[int, int]]]]]
+            The data.
+
+        Yields
+        ------
+        Dict[str, StatItem]
+            The statistics of the environment matrix.
+        """
+        zero_mean = torch.zeros(
+            self.descriptor.get_ntypes(),
+            self.descriptor.get_nsel(),
+            self.last_dim,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        one_stddev = torch.ones(
+            self.descriptor.get_ntypes(),
+            self.descriptor.get_nsel(),
+            self.last_dim,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        if self.last_dim == 4:
+            radial_only = False
+        elif self.last_dim == 1:
+            radial_only = True
+        else:
+            raise ValueError(
+                "last_dim should be 1 for raial-only or 4 for full descriptor."
+            )
+        for system in data:
+            coord, atype, box, natoms = (
+                system["coord"],
+                system["atype"],
+                system["box"],
+                system["natoms"],
+            )
+            (
+                extended_coord,
+                extended_atype,
+                mapping,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                coord,
+                atype,
+                self.descriptor.get_rcut(),
+                self.descriptor.get_sel(),
+                mixed_types=self.descriptor.mixed_types(),
+                box=box,
+            )
+            env_mat, _, _ = prod_env_mat(
+                extended_coord,
+                nlist,
+                atype,
+                zero_mean,
+                one_stddev,
+                self.descriptor.get_rcut(),
+                # TODO: export rcut_smth from DescriptorBlock
+                self.descriptor.rcut_smth,
+                radial_only,
+                protection=self.descriptor.env_protection,
+            )
+            # reshape to nframes * nloc at the atom level,
+            # so nframes/mixed_type do not matter
+            env_mat = env_mat.view(
+                coord.shape[0] * coord.shape[1],
+                self.descriptor.get_nsel(),
+                self.last_dim,
+            )
+            atype = atype.view(coord.shape[0] * coord.shape[1])
+            # (1, nloc) eq (ntypes, 1), so broadcast is possible
+            # shape: (ntypes, nloc)
+            type_idx = torch.eq(
+                atype.view(1, -1),
+                torch.arange(
+                    self.descriptor.get_ntypes(), device=env.DEVICE, dtype=torch.int32
+                ).view(-1, 1),
+            )
+            if "pair_exclude_types" in system:
+                # shape: (1, nloc, nnei)
+                exclude_mask = PairExcludeMask(
+                    self.descriptor.get_ntypes(), system["pair_exclude_types"]
+                )(nlist, extended_atype).view(1, coord.shape[0] * coord.shape[1], -1)
+                # shape: (ntypes, nloc, nnei)
+                type_idx = torch.logical_and(type_idx.unsqueeze(-1), exclude_mask)
+            for type_i in range(self.descriptor.get_ntypes()):
+                dd = env_mat[type_idx[type_i]]
+                dd = dd.reshape([-1, self.last_dim])  # typen_atoms * unmasked_nnei, 4
+                env_mats = {}
+                env_mats[f"r_{type_i}"] = dd[:, :1]
+                if self.last_dim == 4:
+                    env_mats[f"a_{type_i}"] = dd[:, 1:]
+                yield self.compute_stat(env_mats)
+
+    def get_hash(self) -> str:
+        """Get the hash of the environment matrix.
+
+        Returns
+        -------
+        str
+            The hash of the environment matrix.
+        """
+        dscpt_type = "se_a" if self.last_dim == 4 else "se_r"
+        return get_hash(
+            {
+                "type": dscpt_type,
+                "ntypes": self.descriptor.get_ntypes(),
+                "rcut": round(self.descriptor.get_rcut(), 2),
+                "rcut_smth": round(self.descriptor.rcut_smth, 2),
+                "nsel": self.descriptor.get_nsel(),
+                "sel": self.descriptor.get_sel(),
+                "mixed_types": self.descriptor.mixed_types(),
+            }
+        )
+
+    def __call__(self):
+        avgs = self.get_avg()
+        stds = self.get_std()
+
+        all_davg = []
+        all_dstd = []
+
+        for type_i in range(self.descriptor.get_ntypes()):
+            if self.last_dim == 4:
+                davgunit = [[avgs[f"r_{type_i}"], 0, 0, 0]]
+                dstdunit = [
+                    [
+                        stds[f"r_{type_i}"],
+                        stds[f"a_{type_i}"],
+                        stds[f"a_{type_i}"],
+                        stds[f"a_{type_i}"],
+                    ]
+                ]
+            elif self.last_dim == 1:
+                davgunit = [[avgs[f"r_{type_i}"]]]
+                dstdunit = [
+                    [
+                        stds[f"r_{type_i}"],
+                    ]
+                ]
+            davg = np.tile(davgunit, [self.descriptor.get_nsel(), 1])
+            dstd = np.tile(dstdunit, [self.descriptor.get_nsel(), 1])
+            all_davg.append(davg)
+            all_dstd.append(dstd)
+
+        mean = np.stack(all_davg)
+        stddev = np.stack(all_dstd)
+        return mean, stddev
diff --git a/deepmd/pt/utils/exclude_mask.py b/deepmd/pt/utils/exclude_mask.py
new file mode 100644
index 0000000000..9ddae3a416
--- /dev/null
+++ b/deepmd/pt/utils/exclude_mask.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Set,
+    Tuple,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils.utils import (
+    to_torch_tensor,
+)
+
+
+class AtomExcludeMask(torch.nn.Module):
+    """Computes the type exclusion mask for atoms."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: List[int] = [],
+    ):
+        super().__init__()
+        self.reinit(ntypes, exclude_types)
+
+    def reinit(
+        self,
+        ntypes: int,
+        exclude_types: List[int] = [],
+    ):
+        self.ntypes = ntypes
+        self.exclude_types = exclude_types
+        self.type_mask = np.array(
+            [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)],
+            dtype=np.int32,
+        )
+        self.type_mask = to_torch_tensor(self.type_mask).view([-1])
+
+    def get_exclude_types(self):
+        return self.exclude_types
+
+    def get_type_mask(self):
+        return self.type_mask
+
+    def forward(
+        self,
+        atype: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute type exclusion mask for atoms.
+
+        Parameters
+        ----------
+        atype
+            The extended atom types. shape: nf x natom
+
+        Returns
+        -------
+        mask
+            The type exclusion mask for atoms. shape: nf x natom
+            Element [ff,ii] being 0 if type(ii) is excluded,
+            otherwise being 1.
+
+        """
+        nf, natom = atype.shape
+        return self.type_mask[atype].view(nf, natom)
+
+
+class PairExcludeMask(torch.nn.Module):
+    """Computes the type exclusion mask for atom pairs."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        super().__init__()
+        self.reinit(ntypes, exclude_types)
+
+    def reinit(
+        self,
+        ntypes: int,
+        exclude_types: List[Tuple[int, int]] = [],
+    ):
+        self.ntypes = ntypes
+        self._exclude_types: Set[Tuple[int, int]] = set()
+        for tt in exclude_types:
+            assert len(tt) == 2
+            self._exclude_types.add((tt[0], tt[1]))
+            self._exclude_types.add((tt[1], tt[0]))
+        # ntypes + 1 for nlist masks
+        self.type_mask = np.array(
+            [
+                [
+                    1 if (tt_i, tt_j) not in self._exclude_types else 0
+                    for tt_i in range(ntypes + 1)
+                ]
+                for tt_j in range(ntypes + 1)
+            ],
+            dtype=np.int32,
+        )
+        # (ntypes+1 x ntypes+1)
+        self.type_mask = to_torch_tensor(self.type_mask).view([-1])
+        self.no_exclusion = len(self._exclude_types) == 0
+
+    def get_exclude_types(self):
+        return self._exclude_types
+
+    # may have a better place for this method...
+    def forward(
+        self,
+        nlist: torch.Tensor,
+        atype_ext: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute type exclusion mask.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        atype_ext
+            The extended aotm types. shape: nf x nall
+
+        Returns
+        -------
+        mask
+            The type exclusion mask of shape: nf x nloc x nnei.
+            Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded,
+            otherwise being 1.
+
+        """
+        if self.no_exclusion:
+            # safely return 1 if nothing is excluded.
+            return torch.ones_like(nlist, dtype=torch.int32, device=nlist.device)
+        nf, nloc, nnei = nlist.shape
+        nall = atype_ext.shape[1]
+        # add virtual atom of type ntypes. nf x nall+1
+        ae = torch.cat(
+            [
+                atype_ext,
+                self.ntypes
+                * torch.ones([nf, 1], dtype=atype_ext.dtype, device=atype_ext.device),
+            ],
+            dim=-1,
+        )
+        type_i = atype_ext[:, :nloc].view(nf, nloc) * (self.ntypes + 1)
+        # nf x nloc x nnei
+        index = torch.where(nlist == -1, nall, nlist).view(nf, nloc * nnei)
+        type_j = torch.gather(ae, 1, index).view(nf, nloc, nnei)
+        type_ij = type_i[:, :, None] + type_j
+        # nf x (nloc x nnei)
+        type_ij = type_ij.view(nf, nloc * nnei)
+        mask = self.type_mask[type_ij].view(nf, nloc, nnei)
+        return mask
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
new file mode 100644
index 0000000000..2de4214070
--- /dev/null
+++ b/deepmd/pt/utils/finetune.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from copy import (
+    deepcopy,
+)
+
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+
+log = logging.getLogger(__name__)
+
+
+def change_finetune_model_params_single(
+    _single_param_target,
+    _model_param_pretrained,
+    from_multitask=False,
+    model_branch="Default",
+    model_branch_from="",
+):
+    single_config = deepcopy(_single_param_target)
+    trainable_param = {
+        "descriptor": True,
+        "fitting_net": True,
+    }
+    for net_type in trainable_param:
+        if net_type in single_config:
+            trainable_param[net_type] = single_config[net_type].get("trainable", True)
+    if not from_multitask:
+        old_type_map, new_type_map = (
+            _model_param_pretrained["type_map"],
+            single_config["type_map"],
+        )
+        assert set(new_type_map).issubset(
+            old_type_map
+        ), "Only support for smaller type map when finetuning or resuming."
+        single_config = deepcopy(_model_param_pretrained)
+        log.info(
+            f"Change the '{model_branch}' model configurations according to the pretrained one..."
+        )
+        single_config["new_type_map"] = new_type_map
+    else:
+        model_dict_params = _model_param_pretrained["model_dict"]
+        new_fitting = False
+        if model_branch_from == "":
+            model_branch_chosen = next(iter(model_dict_params.keys()))
+            new_fitting = True
+            single_config["bias_adjust_mode"] = (
+                "set-by-statistic"  # fitting net re-init
+            )
+            log.warning(
+                "The fitting net will be re-init instead of using that in the pretrained model! "
+                "The bias_adjust_mode will be set-by-statistic!"
+            )
+        else:
+            model_branch_chosen = model_branch_from
+        assert model_branch_chosen in model_dict_params, (
+            f"No model branch named '{model_branch_chosen}'! "
+            f"Available ones are {list(model_dict_params.keys())}."
+        )
+        single_config_chosen = deepcopy(model_dict_params[model_branch_chosen])
+        old_type_map, new_type_map = (
+            single_config_chosen["type_map"],
+            single_config["type_map"],
+        )
+        assert set(new_type_map).issubset(
+            old_type_map
+        ), "Only support for smaller type map when finetuning or resuming."
+        for key_item in ["type_map", "descriptor"]:
+            if key_item in single_config_chosen:
+                single_config[key_item] = single_config_chosen[key_item]
+        if not new_fitting:
+            single_config["fitting_net"] = single_config_chosen["fitting_net"]
+        log.info(
+            f"Change the '{model_branch}' model configurations according to the model branch "
+            f"'{model_branch_chosen}' in the pretrained one..."
+        )
+        single_config["new_type_map"] = new_type_map
+        single_config["model_branch_chosen"] = model_branch_chosen
+        single_config["new_fitting"] = new_fitting
+    for net_type in trainable_param:
+        if net_type in single_config:
+            single_config[net_type]["trainable"] = trainable_param[net_type]
+        else:
+            single_config[net_type] = {"trainable": trainable_param[net_type]}
+    return single_config
+
+
+def change_finetune_model_params(finetune_model, model_config, model_branch=""):
+    """
+    Load model_params according to the pretrained one.
+    This function modifies the fine-tuning input in different modes as follows:
+    1. Single-task fine-tuning from a single-task pretrained model:
+        - Updates the model parameters based on the pretrained model.
+    2. Single-task fine-tuning from a multi-task pretrained model:
+        - Updates the model parameters based on the selected branch in the pretrained model.
+        - The chosen branch can be defined from the command-line or `finetune_head` input parameter.
+        - If not defined, model parameters in the fitting network will be randomly initialized.
+    3. Multi-task fine-tuning from a single-task pretrained model:
+        - Updates model parameters in each branch based on the single branch ('Default') in the pretrained model.
+        - If `finetune_head` is not set to 'Default',
+          model parameters in the fitting network of the branch will be randomly initialized.
+    4. Multi-task fine-tuning from a multi-task pretrained model:
+        - Updates model parameters in each branch based on the selected branch in the pretrained model.
+        - The chosen branches can be defined from the `finetune_head` input parameter of each model.
+        - If `finetune_head` is not defined and the model_key is the same as in the pretrained model,
+          it will resume from the model_key branch without fine-tuning.
+        - If `finetune_head` is not defined and a new model_key is used,
+          model parameters in the fitting network of the branch will be randomly initialized.
+
+    Parameters
+    ----------
+    finetune_model
+        The pretrained model.
+    model_config
+        The fine-tuning input parameters.
+    model_branch
+        The model branch chosen in command-line mode, only for single-task fine-tuning.
+
+    Returns
+    -------
+    model_config:
+        Updated model parameters.
+    finetune_links:
+        Fine-tuning rules in a dict format, with `model_branch`: `model_branch_from` pairs.
+        If `model_key` is not in this dict, it will do just resuming instead of fine-tuning.
+    """
+    multi_task = "model_dict" in model_config
+    state_dict = torch.load(finetune_model, map_location=env.DEVICE)
+    if "model" in state_dict:
+        state_dict = state_dict["model"]
+    last_model_params = state_dict["_extra_state"]["model_params"]
+    finetune_from_multi_task = "model_dict" in last_model_params
+    finetune_links = {}
+    if not multi_task:
+        # use command-line first
+        if model_branch == "" and "finetune_head" in model_config:
+            model_branch = model_config["finetune_head"]
+        model_config = change_finetune_model_params_single(
+            model_config,
+            last_model_params,
+            from_multitask=finetune_from_multi_task,
+            model_branch="Default",
+            model_branch_from=model_branch,
+        )
+        finetune_links["Default"] = (
+            model_config["model_branch_chosen"]
+            if finetune_from_multi_task
+            else "Default"
+        )
+    else:
+        assert model_branch == "", (
+            "Multi-task fine-tuning does not support command-line branches chosen!"
+            "Please define the 'finetune_head' in each model params!"
+        )
+        target_keys = model_config["model_dict"].keys()
+        if not finetune_from_multi_task:
+            pretrained_keys = ["Default"]
+        else:
+            pretrained_keys = last_model_params["model_dict"].keys()
+        for model_key in target_keys:
+            if "finetune_head" in model_config["model_dict"][model_key]:
+                pretrained_key = model_config["model_dict"][model_key]["finetune_head"]
+                assert pretrained_key in pretrained_keys, (
+                    f"'{pretrained_key}' head chosen to finetune not exist in the pretrained model!"
+                    f"Available heads are: {list(pretrained_keys)}"
+                )
+                model_branch_from = pretrained_key
+                finetune_links[model_key] = model_branch_from
+            elif model_key in pretrained_keys:
+                # not do anything if not defined "finetune_head" in heads that exist in the pretrained model
+                # this will just do resuming
+                model_branch_from = model_key
+            else:
+                # if not defined "finetune_head" in new heads, the fitting net will bre randomly initialized
+                model_branch_from = ""
+                finetune_links[model_key] = next(iter(pretrained_keys))
+            model_config["model_dict"][model_key] = change_finetune_model_params_single(
+                model_config["model_dict"][model_key],
+                last_model_params,
+                from_multitask=finetune_from_multi_task,
+                model_branch=model_key,
+                model_branch_from=model_branch_from,
+            )
+    return model_config, finetune_links
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
new file mode 100644
index 0000000000..94c657abd4
--- /dev/null
+++ b/deepmd/pt/utils/learning_rate.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+class LearningRateExp:
+    def __init__(
+        self,
+        start_lr,
+        stop_lr,
+        decay_steps,
+        stop_steps,
+        decay_rate=None,
+        **kwargs,
+    ):
+        """
+        Construct an exponential-decayed learning rate.
+
+        Parameters
+        ----------
+        start_lr
+            The learning rate at the start of the training.
+        stop_lr
+            The desired learning rate at the end of the training.
+            When decay_rate is explicitly set, this value will serve as
+            the minimum learning rate during training. In other words,
+            if the learning rate decays below stop_lr, stop_lr will be applied instead.
+        decay_steps
+            The learning rate is decaying every this number of training steps.
+        stop_steps
+            The total training steps for learning rate scheduler.
+        decay_rate
+            The decay rate for the learning rate.
+            If provided, the decay rate will be set instead of
+            calculating it through interpolation between start_lr and stop_lr.
+        """
+        self.start_lr = start_lr
+        default_ds = 100 if stop_steps // 10 > 100 else stop_steps // 100 + 1
+        self.decay_steps = decay_steps
+        if self.decay_steps >= stop_steps:
+            self.decay_steps = default_ds
+        self.decay_rate = np.exp(
+            np.log(stop_lr / self.start_lr) / (stop_steps / self.decay_steps)
+        )
+        if decay_rate is not None:
+            self.decay_rate = decay_rate
+        self.min_lr = stop_lr
+
+    def value(self, step):
+        """Get the learning rate at the given step."""
+        step_lr = self.start_lr * np.power(self.decay_rate, step // self.decay_steps)
+        if step_lr < self.min_lr:
+            step_lr = self.min_lr
+        return step_lr
diff --git a/deepmd/pt/utils/multi_task.py b/deepmd/pt/utils/multi_task.py
new file mode 100644
index 0000000000..e2076b3b2b
--- /dev/null
+++ b/deepmd/pt/utils/multi_task.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from copy import (
+    deepcopy,
+)
+
+from deepmd.pt.model.descriptor import (
+    BaseDescriptor,
+)
+from deepmd.pt.model.task import (
+    BaseFitting,
+)
+
+
+def preprocess_shared_params(model_config):
+    """Preprocess the model params for multitask model, and generate the links dict for further sharing.
+
+    Args:
+        model_config: Model params of multitask model.
+
+    Returns
+    -------
+    model_config: Preprocessed model params of multitask model.
+        Those string names are replaced with real params in `shared_dict` of model params.
+    shared_links: Dict of link infos for further sharing.
+        Each item, whose key must be in `shared_dict`, is a dict with following keys:
+        - "type": The real class type of this item.
+        - "links": List of shared settings, each sub-item is a dict with following keys:
+            - "model_key": Model key in the `model_dict` to share this item.
+            - "shared_type": Type of this shard item.
+            - "shared_level": Shared level (int) of this item in this model.
+                Lower for more params to share, 0 means to share all params in this item.
+            This list are sorted by "shared_level".
+    For example, if one has `model_config` like this:
+    "model": {
+        "shared_dict": {
+            "my_type_map": ["foo", "bar"],
+            "my_des1": {
+                "type": "se_e2_a",
+                "neuron": [10, 20, 40]
+                },
+        },
+        "model_dict": {
+            "model_1": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            },
+            "model_2": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+            "model_3": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1:1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+        }
+    }
+    The above config will init three model branches named `model_1` and `model_2` and `model_3`,
+    in which:
+        - `model_2` and `model_3` will have the same `type_map` as that in `model_1`.
+        - `model_2` will share all the parameters of `descriptor` with `model_1`,
+        while `model_3` will share part of parameters of `descriptor` with `model_1`
+        on human-defined share-level `1` (default is `0`, meaning share all the parameters).
+        - `model_1`, `model_2` and `model_3` have three different `fitting_net`s.
+    The returned `model_config` will automatically fulfill the input `model_config` as if there's no sharing,
+    and the `shared_links` will keep all the sharing information with looking:
+    {
+    'my_des1': {
+        'type': 'DescrptSeA',
+        'links': [
+            {'model_key': 'model_1',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_2',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_3',
+            'shared_type': 'descriptor',
+            'shared_level': 1}
+            ]
+        }
+    }
+
+    """
+    assert "model_dict" in model_config, "only multi-task model can use this method!"
+    supported_types = ["type_map", "descriptor", "fitting_net"]
+    shared_dict = model_config.get("shared_dict", {})
+    shared_links = {}
+    type_map_keys = []
+
+    def replace_one_item(params_dict, key_type, key_in_dict, suffix="", index=None):
+        shared_type = key_type
+        shared_key = key_in_dict
+        shared_level = 0
+        if ":" in key_in_dict:
+            shared_key = key_in_dict.split(":")[0]
+            shared_level = int(key_in_dict.split(":")[1])
+        assert (
+            shared_key in shared_dict
+        ), f"Appointed {shared_type} {shared_key} are not in the shared_dict! Please check the input params."
+        if index is None:
+            params_dict[shared_type] = deepcopy(shared_dict[shared_key])
+        else:
+            params_dict[index] = deepcopy(shared_dict[shared_key])
+        if shared_type == "type_map":
+            if key_in_dict not in type_map_keys:
+                type_map_keys.append(key_in_dict)
+        else:
+            if shared_key not in shared_links:
+                class_name = get_class_name(shared_type, shared_dict[shared_key])
+                shared_links[shared_key] = {"type": class_name, "links": []}
+            link_item = {
+                "model_key": model_key,
+                "shared_type": shared_type + suffix,
+                "shared_level": shared_level,
+            }
+            shared_links[shared_key]["links"].append(link_item)
+
+    for model_key in model_config["model_dict"]:
+        model_params_item = model_config["model_dict"][model_key]
+        for item_key in model_params_item:
+            if item_key in supported_types:
+                item_params = model_params_item[item_key]
+                if isinstance(item_params, str):
+                    replace_one_item(model_params_item, item_key, item_params)
+                elif item_params.get("type", "") == "hybrid":
+                    for ii, hybrid_item in enumerate(item_params["list"]):
+                        if isinstance(hybrid_item, str):
+                            replace_one_item(
+                                model_params_item[item_key]["list"],
+                                item_key,
+                                hybrid_item,
+                                suffix=f"_hybrid_{ii}",
+                                index=ii,
+                            )
+    for shared_key in shared_links:
+        shared_links[shared_key]["links"] = sorted(
+            shared_links[shared_key]["links"],
+            key=lambda x: x["shared_level"]
+            - ("spin" in model_config["model_dict"][x["model_key"]]) * 100,
+        )
+        # little trick to make spin models in the front to be the base models,
+        # because its type embeddings are more general.
+    assert len(type_map_keys) == 1, "Multitask model must have only one type_map!"
+    return model_config, shared_links
+
+
+def get_class_name(item_key, item_params):
+    if item_key == "descriptor":
+        return BaseDescriptor.get_class_by_type(item_params.get("type", "se_e2_a"))
+    elif item_key == "fitting_net":
+        return BaseFitting.get_class_by_type(item_params.get("type", "ener"))
+    else:
+        raise RuntimeError(f"Unknown class_name type {item_key}")
diff --git a/deepmd/pt/utils/neighbor_stat.py b/deepmd/pt/utils/neighbor_stat.py
new file mode 100644
index 0000000000..d5b5c74bdc
--- /dev/null
+++ b/deepmd/pt/utils/neighbor_stat.py
@@ -0,0 +1,192 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Iterator,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+)
+from deepmd.pt.utils.nlist import (
+    extend_coord_with_ghosts,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat
+
+
+class NeighborStatOP(torch.nn.Module):
+    """Class for getting neighbor statics data information.
+
+    Parameters
+    ----------
+    ntypes
+        The num of atom types
+    rcut
+        The cut-off radius
+    mixed_types : bool, optional
+        If True, treat neighbors of all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_types: bool,
+    ) -> None:
+        super().__init__()
+        self.rcut = rcut
+        self.ntypes = ntypes
+        self.mixed_types = mixed_types
+
+    def forward(
+        self,
+        coord: torch.Tensor,
+        atype: torch.Tensor,
+        cell: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Calculate the neareest neighbor distance between atoms, maximum nbor size of
+        atoms and the output data range of the environment matrix.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+
+        Returns
+        -------
+        torch.Tensor
+            The minimal squared distance between two atoms, in the shape of (nframes,)
+        torch.Tensor
+            The maximal number of neighbors
+        """
+        nframes = coord.shape[0]
+        coord = coord.view(nframes, -1, 3)
+        nloc = coord.shape[1]
+        coord = coord.view(nframes, nloc * 3)
+        extend_coord, extend_atype, _ = extend_coord_with_ghosts(
+            coord, atype, cell, self.rcut
+        )
+
+        coord1 = extend_coord.reshape(nframes, -1)
+        nall = coord1.shape[1] // 3
+        coord0 = coord1[:, : nloc * 3]
+        diff = (
+            coord1.reshape([nframes, -1, 3])[:, None, :, :]
+            - coord0.reshape([nframes, -1, 3])[:, :, None, :]
+        )
+        assert list(diff.shape) == [nframes, nloc, nall, 3]
+        # remove the diagonal elements
+        mask = torch.eye(nloc, nall, dtype=torch.bool, device=diff.device)
+        diff[:, mask] = torch.inf
+        rr2 = torch.sum(torch.square(diff), dim=-1)
+        min_rr2, _ = torch.min(rr2, dim=-1)
+        # count the number of neighbors
+        if not self.mixed_types:
+            mask = rr2 < self.rcut**2
+            nnei = torch.zeros(
+                (nframes, nloc, self.ntypes), dtype=torch.int32, device=mask.device
+            )
+            for ii in range(self.ntypes):
+                nnei[:, :, ii] = torch.sum(
+                    mask & extend_atype.eq(ii)[:, None, :], dim=-1
+                )
+        else:
+            mask = rr2 < self.rcut**2
+            # virtual types (<0) are not counted
+            nnei = torch.sum(mask & extend_atype.ge(0)[:, None, :], dim=-1).view(
+                nframes, nloc, 1
+            )
+        max_nnei, _ = torch.max(nnei, dim=1)
+        return min_rr2, max_nnei
+
+
+class NeighborStat(BaseNeighborStat):
+    """Neighbor statistics using pure NumPy.
+
+    Parameters
+    ----------
+    ntypes : int
+        The num of atom types
+    rcut : float
+        The cut-off radius
+    mixed_type : bool, optional, default=False
+        Treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_type: bool = False,
+    ) -> None:
+        super().__init__(ntypes, rcut, mixed_type)
+        op = NeighborStatOP(ntypes, rcut, mixed_type)
+        self.op = torch.jit.script(op)
+        self.auto_batch_size = AutoBatchSize()
+
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+        """Abstract method for producing data.
+
+        Yields
+        ------
+        np.ndarray
+            The maximal number of neighbors
+        float
+            The squared minimal distance between two atoms
+        str
+            The directory of the data system
+        """
+        for ii in range(len(data.system_dirs)):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]
+                data_set_data = data_set._load_set(jj)
+                minrr2, max_nnei = self.auto_batch_size.execute_all(
+                    self._execute,
+                    data_set_data["coord"].shape[0],
+                    data_set.get_natoms(),
+                    data_set_data["coord"],
+                    data_set_data["type"],
+                    data_set_data["box"] if data_set.pbc else None,
+                )
+                yield np.max(max_nnei, axis=0), np.min(minrr2), jj
+
+    def _execute(
+        self,
+        coord: np.ndarray,
+        atype: np.ndarray,
+        cell: Optional[np.ndarray],
+    ):
+        """Execute the operation.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+        """
+        minrr2, max_nnei = self.op(
+            torch.from_numpy(coord).to(DEVICE),
+            torch.from_numpy(atype).to(DEVICE),
+            torch.from_numpy(cell).to(DEVICE) if cell is not None else None,
+        )
+        minrr2 = minrr2.detach().cpu().numpy()
+        max_nnei = max_nnei.detach().cpu().numpy()
+        return minrr2, max_nnei
diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
new file mode 100644
index 0000000000..cdee6e3722
--- /dev/null
+++ b/deepmd/pt/utils/nlist.py
@@ -0,0 +1,356 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.region import (
+    normalize_coord,
+    to_face_distance,
+)
+
+
+def extend_input_and_build_neighbor_list(
+    coord,
+    atype,
+    rcut: float,
+    sel: List[int],
+    mixed_types: bool = False,
+    box: Optional[torch.Tensor] = None,
+):
+    nframes, nloc = atype.shape[:2]
+    if box is not None:
+        box_gpu = box.to(coord.device, non_blocking=True)
+        coord_normalized = normalize_coord(
+            coord.view(nframes, nloc, 3),
+            box_gpu.reshape(nframes, 3, 3),
+        )
+    else:
+        box_gpu = None
+        coord_normalized = coord.clone()
+    extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+        coord_normalized, atype, box_gpu, rcut, box
+    )
+    nlist = build_neighbor_list(
+        extended_coord,
+        extended_atype,
+        nloc,
+        rcut,
+        sel,
+        distinguish_types=(not mixed_types),
+    )
+    extended_coord = extended_coord.view(nframes, -1, 3)
+    return extended_coord, extended_atype, mapping, nlist
+
+
+def build_neighbor_list(
+    coord: torch.Tensor,
+    atype: torch.Tensor,
+    nloc: int,
+    rcut: float,
+    sel: Union[int, List[int]],
+    distinguish_types: bool = True,
+) -> torch.Tensor:
+    """Build neightbor list for a single frame. keeps nsel neighbors.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        exptended coordinates of shape [batch_size, nall x 3]
+    atype : torch.Tensor
+        extended atomic types of shape [batch_size, nall]
+        if type < 0 the atom is treat as virtual atoms.
+    nloc : int
+        number of local atoms.
+    rcut : float
+        cut-off radius
+    sel : int or List[int]
+        maximal number of neighbors (of each type).
+        if distinguish_types==True, nsel should be list and
+        the length of nsel should be equal to number of
+        types.
+    distinguish_types : bool
+        distinguish different types.
+
+    Returns
+    -------
+    neighbor_list : torch.Tensor
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        are stored in an ascending order. If the number of
+        neighbors is less than nsel, the positions are masked
+        with -1. The neighbor list of an atom looks like
+        |------ nsel ------|
+        xx xx xx xx -1 -1 -1
+        if distinguish_types==True and we have two types
+        |---- nsel[0] -----| |---- nsel[1] -----|
+        xx xx xx xx -1 -1 -1 xx xx xx -1 -1 -1 -1
+        For virtual atoms all neighboring positions are filled with -1.
+
+    """
+    batch_size = coord.shape[0]
+    coord = coord.view(batch_size, -1)
+    nall = coord.shape[1] // 3
+    # fill virtual atoms with large coords so they are not neighbors of any
+    # real atom.
+    xmax = torch.max(coord) + 2.0 * rcut
+    # nf x nall
+    is_vir = atype < 0
+    coord1 = torch.where(is_vir[:, :, None], xmax, coord.view(-1, nall, 3)).view(
+        -1, nall * 3
+    )
+    if isinstance(sel, int):
+        sel = [sel]
+    nsel = sum(sel)
+    # nloc x 3
+    coord0 = coord1[:, : nloc * 3]
+    # nloc x nall x 3
+    diff = coord1.view([batch_size, -1, 3]).unsqueeze(1) - coord0.view(
+        [batch_size, -1, 3]
+    ).unsqueeze(2)
+    assert list(diff.shape) == [batch_size, nloc, nall, 3]
+    # nloc x nall
+    rr = torch.linalg.norm(diff, dim=-1)
+    # if central atom has two zero distances, sorting sometimes can not exclude itself
+    rr -= torch.eye(nloc, nall, dtype=rr.dtype, device=rr.device).unsqueeze(0)
+    rr, nlist = torch.sort(rr, dim=-1)
+    # nloc x (nall-1)
+    rr = rr[:, :, 1:]
+    nlist = nlist[:, :, 1:]
+    # nloc x nsel
+    nnei = rr.shape[2]
+    if nsel <= nnei:
+        rr = rr[:, :, :nsel]
+        nlist = nlist[:, :, :nsel]
+    else:
+        rr = torch.cat(
+            [rr, torch.ones([batch_size, nloc, nsel - nnei], device=rr.device) + rcut],
+            dim=-1,
+        )
+        nlist = torch.cat(
+            [
+                nlist,
+                torch.ones(
+                    [batch_size, nloc, nsel - nnei], dtype=nlist.dtype, device=rr.device
+                ),
+            ],
+            dim=-1,
+        )
+    assert list(nlist.shape) == [batch_size, nloc, nsel]
+    nlist = torch.where(
+        torch.logical_or((rr > rcut), is_vir[:, :nloc, None]), -1, nlist
+    )
+
+    if distinguish_types:
+        return nlist_distinguish_types(nlist, atype, sel)
+    else:
+        return nlist
+
+
+def nlist_distinguish_types(
+    nlist: torch.Tensor,
+    atype: torch.Tensor,
+    sel: List[int],
+):
+    """Given a nlist that does not distinguish atom types, return a nlist that
+    distinguish atom types.
+
+    """
+    nf, nloc, nnei = nlist.shape
+    ret_nlist = []
+    # nloc x nall
+    tmp_atype = torch.tile(atype.unsqueeze(1), [1, nloc, 1])
+    mask = nlist == -1
+    # nloc x s(nsel)
+    tnlist = torch.gather(
+        tmp_atype,
+        2,
+        nlist.masked_fill(mask, 0),
+    )
+    tnlist = tnlist.masked_fill(mask, -1)
+    snsel = tnlist.shape[2]
+    for ii, ss in enumerate(sel):
+        # nloc x s(nsel)
+        # to int because bool cannot be sort on GPU
+        pick_mask = (tnlist == ii).to(torch.int32)
+        # nloc x s(nsel), stable sort, nearer neighbors first
+        pick_mask, imap = torch.sort(pick_mask, dim=-1, descending=True, stable=True)
+        # nloc x s(nsel)
+        inlist = torch.gather(nlist, 2, imap)
+        inlist = inlist.masked_fill(~(pick_mask.to(torch.bool)), -1)
+        # nloc x nsel[ii]
+        ret_nlist.append(torch.split(inlist, [ss, snsel - ss], dim=-1)[0])
+    return torch.concat(ret_nlist, dim=-1)
+
+
+# build_neighbor_list = torch.vmap(
+#   build_neighbor_list_lower,
+#   in_dims=(0,0,None,None,None),
+#   out_dims=(0),
+# )
+
+
+def get_multiple_nlist_key(
+    rcut: float,
+    nsel: int,
+) -> str:
+    return str(rcut) + "_" + str(nsel)
+
+
+def build_multiple_neighbor_list(
+    coord: torch.Tensor,
+    nlist: torch.Tensor,
+    rcuts: List[float],
+    nsels: List[int],
+) -> Dict[str, torch.Tensor]:
+    """Input one neighbor list, and produce multiple neighbor lists with
+    different cutoff radius and numbers of selection out of it.  The
+    required rcuts and nsels should be smaller or equal to the input nlist.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        exptended coordinates of shape [batch_size, nall x 3]
+    nlist : torch.Tensor
+        Neighbor list of shape [batch_size, nloc, nsel], the neighbors
+        should be stored in an ascending order.
+    rcuts : List[float]
+        list of cut-off radius in ascending order.
+    nsels : List[int]
+        maximal number of neighbors in ascending order.
+
+    Returns
+    -------
+    nlist_dict : Dict[str, torch.Tensor]
+        A dict of nlists, key given by get_multiple_nlist_key(rc, nsel)
+        value being the corresponding nlist.
+
+    """
+    assert len(rcuts) == len(nsels)
+    if len(rcuts) == 0:
+        return {}
+    nb, nloc, nsel = nlist.shape
+    if nsel < nsels[-1]:
+        pad = -1 * torch.ones(
+            [nb, nloc, nsels[-1] - nsel],
+            dtype=nlist.dtype,
+            device=nlist.device,
+        )
+        # nb x nloc x nsel
+        nlist = torch.cat([nlist, pad], dim=-1)
+        nsel = nsels[-1]
+    # nb x nall x 3
+    coord1 = coord.view(nb, -1, 3)
+    nall = coord1.shape[1]
+    # nb x nloc x 3
+    coord0 = coord1[:, :nloc, :]
+    nlist_mask = nlist == -1
+    # nb x (nloc x nsel) x 3
+    index = (
+        nlist.masked_fill(nlist_mask, 0)
+        .view(nb, nloc * nsel)
+        .unsqueeze(-1)
+        .expand(-1, -1, 3)
+    )
+    # nb x nloc x nsel x 3
+    coord2 = torch.gather(coord1, dim=1, index=index).view(nb, nloc, nsel, 3)
+    # nb x nloc x nsel x 3
+    diff = coord2 - coord0[:, :, None, :]
+    # nb x nloc x nsel
+    rr = torch.linalg.norm(diff, dim=-1)
+    rr.masked_fill(nlist_mask, float("inf"))
+    nlist0 = nlist
+    ret = {}
+    for rc, ns in zip(rcuts[::-1], nsels[::-1]):
+        nlist0 = nlist0[:, :, :ns].masked_fill(rr[:, :, :ns] > rc, -1)
+        ret[get_multiple_nlist_key(rc, ns)] = nlist0
+    return ret
+
+
+def extend_coord_with_ghosts(
+    coord: torch.Tensor,
+    atype: torch.Tensor,
+    cell: Optional[torch.Tensor],
+    rcut: float,
+    cell_cpu: Optional[torch.Tensor] = None,
+):
+    """Extend the coordinates of the atoms by appending peridoc images.
+    The number of images is large enough to ensure all the neighbors
+    within rcut are appended.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        original coordinates of shape [-1, nloc*3].
+    atype : torch.Tensor
+        atom type of shape [-1, nloc].
+    cell : torch.Tensor
+        simulation cell tensor of shape [-1, 9].
+    rcut : float
+        the cutoff radius
+    cell_cpu : torch.Tensor
+        cell on cpu for performance
+
+    Returns
+    -------
+    extended_coord: torch.Tensor
+        extended coordinates of shape [-1, nall*3].
+    extended_atype: torch.Tensor
+        extended atom type of shape [-1, nall].
+    index_mapping: torch.Tensor
+        maping extended index to the local index
+
+    """
+    device = coord.device
+    nf, nloc = atype.shape
+    aidx = torch.tile(torch.arange(nloc, device=device).unsqueeze(0), [nf, 1])
+    if cell is None:
+        nall = nloc
+        extend_coord = coord.clone()
+        extend_atype = atype.clone()
+        extend_aidx = aidx.clone()
+    else:
+        coord = coord.view([nf, nloc, 3])
+        cell = cell.view([nf, 3, 3])
+        cell_cpu = cell_cpu.view([nf, 3, 3]) if cell_cpu is not None else cell
+        # nf x 3
+        to_face = to_face_distance(cell_cpu)
+        # nf x 3
+        # *2: ghost copies on + and - directions
+        # +1: central cell
+        nbuff = torch.ceil(rcut / to_face).to(torch.long)
+        # 3
+        nbuff = torch.max(nbuff, dim=0, keepdim=False).values
+        nbuff_cpu = nbuff.cpu()
+        xi = torch.arange(-nbuff_cpu[0], nbuff_cpu[0] + 1, 1, device="cpu")
+        yi = torch.arange(-nbuff_cpu[1], nbuff_cpu[1] + 1, 1, device="cpu")
+        zi = torch.arange(-nbuff_cpu[2], nbuff_cpu[2] + 1, 1, device="cpu")
+        eye_3 = torch.eye(3, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device="cpu")
+        xyz = xi.view(-1, 1, 1, 1) * eye_3[0]
+        xyz = xyz + yi.view(1, -1, 1, 1) * eye_3[1]
+        xyz = xyz + zi.view(1, 1, -1, 1) * eye_3[2]
+        xyz = xyz.view(-1, 3)
+        xyz = xyz.to(device=device, non_blocking=True)
+        # ns x 3
+        shift_idx = xyz[torch.argsort(torch.norm(xyz, dim=1))]
+        ns, _ = shift_idx.shape
+        nall = ns * nloc
+        # nf x ns x 3
+        shift_vec = torch.einsum("sd,fdk->fsk", shift_idx, cell)
+        # nf x ns x nloc x 3
+        extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :]
+        # nf x ns x nloc
+        extend_atype = torch.tile(atype.unsqueeze(-2), [1, ns, 1])
+        # nf x ns x nloc
+        extend_aidx = torch.tile(aidx.unsqueeze(-2), [1, ns, 1])
+    return (
+        extend_coord.reshape([nf, nall * 3]).to(device),
+        extend_atype.view([nf, nall]).to(device),
+        extend_aidx.view([nf, nall]).to(device),
+    )
diff --git a/deepmd/pt/utils/plugin.py b/deepmd/pt/utils/plugin.py
new file mode 100644
index 0000000000..aa901c06e8
--- /dev/null
+++ b/deepmd/pt/utils/plugin.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Base of plugin systems."""
+
+from deepmd.utils.plugin import (
+    Plugin,
+    PluginVariant,
+    VariantABCMeta,
+    VariantMeta,
+)
+
+__all__ = [
+    "Plugin",
+    "VariantMeta",
+    "VariantABCMeta",
+    "PluginVariant",
+]
diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py
new file mode 100644
index 0000000000..ed46292f84
--- /dev/null
+++ b/deepmd/pt/utils/preprocess.py
@@ -0,0 +1,305 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    Union,
+)
+
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+
+log = logging.getLogger(__name__)
+
+
+class Region3D:
+    def __init__(self, boxt):
+        """Construct a simulation box."""
+        boxt = boxt.reshape([3, 3])
+        self.boxt = boxt  # convert physical coordinates to internal ones
+        self.rec_boxt = torch.linalg.inv(
+            self.boxt
+        )  # convert internal coordinates to physical ones
+
+        self.volume = torch.linalg.det(self.boxt)  # compute the volume
+
+        # boxt = boxt.permute(1, 0)
+        c_yz = torch.cross(boxt[1], boxt[2])
+        self._h2yz = self.volume / torch.linalg.norm(c_yz)
+        c_zx = torch.cross(boxt[2], boxt[0])
+        self._h2zx = self.volume / torch.linalg.norm(c_zx)
+        c_xy = torch.cross(boxt[0], boxt[1])
+        self._h2xy = self.volume / torch.linalg.norm(c_xy)
+
+    def phys2inter(self, coord):
+        """Convert physical coordinates to internal ones."""
+        return coord @ self.rec_boxt
+
+    def inter2phys(self, coord):
+        """Convert internal coordinates to physical ones."""
+        return coord @ self.boxt
+
+    def get_face_distance(self):
+        """Return face distinces to each surface of YZ, ZX, XY."""
+        return torch.stack([self._h2yz, self._h2zx, self._h2xy])
+
+
+def normalize_coord(coord, region: Region3D, nloc: int):
+    """Move outer atoms into region by mirror.
+
+    Args:
+    - coord: shape is [nloc*3]
+    """
+    tmp_coord = coord.clone()
+    inter_cood = torch.remainder(region.phys2inter(tmp_coord), 1.0)
+    tmp_coord = region.inter2phys(inter_cood)
+    return tmp_coord
+
+
+def compute_serial_cid(cell_offset, ncell):
+    """Tell the sequential cell ID in its 3D space.
+
+    Args:
+    - cell_offset: shape is [3]
+    - ncell: shape is [3]
+    """
+    cell_offset[:, 0] *= ncell[1] * ncell[2]
+    cell_offset[:, 1] *= ncell[2]
+    return cell_offset.sum(-1)
+
+
+def compute_pbc_shift(cell_offset, ncell):
+    """Tell shift count to move the atom into region."""
+    shift = torch.zeros_like(cell_offset)
+    shift = shift + (cell_offset < 0) * -(
+        torch.div(cell_offset, ncell, rounding_mode="floor")
+    )
+    shift = shift + (cell_offset >= ncell) * -(
+        torch.div((cell_offset - ncell), ncell, rounding_mode="floor") + 1
+    )
+    assert torch.all(cell_offset + shift * ncell >= 0)
+    assert torch.all(cell_offset + shift * ncell < ncell)
+    return shift
+
+
+def build_inside_clist(coord, region: Region3D, ncell):
+    """Build cell list on atoms inside region.
+
+    Args:
+    - coord: shape is [nloc*3]
+    - ncell: shape is [3]
+    """
+    loc_ncell = int(torch.prod(ncell))  # num of local cells
+    nloc = coord.numel() // 3  # num of local atoms
+    inter_cell_size = 1.0 / ncell
+
+    inter_cood = region.phys2inter(coord.view(-1, 3))
+    cell_offset = torch.floor(inter_cood / inter_cell_size).to(torch.long)
+    # numerical error brought by conversion from phys to inter back and force
+    # may lead to negative value
+    cell_offset[cell_offset < 0] = 0
+    delta = cell_offset - ncell
+    a2c = compute_serial_cid(cell_offset, ncell)  # cell id of atoms
+    arange = torch.arange(0, loc_ncell, 1)
+    cellid = a2c == arange.unsqueeze(-1)  # one hot cellid
+    c2a = cellid.nonzero()
+    lst = []
+    cnt = 0
+    bincount = torch.bincount(a2c, minlength=loc_ncell)
+    for i in range(loc_ncell):
+        n = bincount[i]
+        lst.append(c2a[cnt : cnt + n, 1])
+        cnt += n
+    return a2c, lst
+
+
+def append_neighbors(coord, region: Region3D, atype, rcut: float):
+    """Make ghost atoms who are valid neighbors.
+
+    Args:
+    - coord: shape is [nloc*3]
+    - atype: shape is [nloc]
+    """
+    to_face = region.get_face_distance()
+
+    # compute num and size of local cells
+    ncell = torch.floor(to_face / rcut).to(torch.long)
+    ncell[ncell == 0] = 1
+    cell_size = to_face / ncell
+    ngcell = (
+        torch.floor(rcut / cell_size).to(torch.long) + 1
+    )  # num of cells out of local, which contain ghost atoms
+
+    # add ghost atoms
+    a2c, c2a = build_inside_clist(coord, region, ncell)
+    xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1)
+    yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1)
+    zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1)
+    xyz = xi.view(-1, 1, 1, 1) * torch.tensor([1, 0, 0], dtype=torch.long)
+    xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor([0, 1, 0], dtype=torch.long)
+    xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor([0, 0, 1], dtype=torch.long)
+    xyz = xyz.view(-1, 3)
+    mask_a = (xyz >= 0).all(dim=-1)
+    mask_b = (xyz < ncell).all(dim=-1)
+    mask = ~torch.logical_and(mask_a, mask_b)
+    xyz = xyz[mask]  # cell coord
+    shift = compute_pbc_shift(xyz, ncell)
+    coord_shift = region.inter2phys(shift.to(env.GLOBAL_PT_FLOAT_PRECISION))
+    mirrored = shift * ncell + xyz
+    cid = compute_serial_cid(mirrored, ncell)
+
+    n_atoms = coord.shape[0]
+    aid = [c2a[ci] + i * n_atoms for i, ci in enumerate(cid)]
+    aid = torch.cat(aid)
+    tmp = torch.div(aid, n_atoms, rounding_mode="trunc")
+    aid = aid % n_atoms
+    tmp_coord = coord[aid] - coord_shift[tmp]
+    tmp_atype = atype[aid]
+
+    # merge local and ghost atoms
+    merged_coord = torch.cat([coord, tmp_coord])
+    merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]])
+    merged_atype = torch.cat([atype, tmp_atype])
+    merged_mapping = torch.cat([torch.arange(atype.numel()), aid])
+    return merged_coord_shift, merged_atype, merged_mapping
+
+
+def build_neighbor_list(
+    nloc: int, coord, atype, rcut: float, sec, mapping, type_split=True, min_check=False
+):
+    """For each atom inside region, build its neighbor list.
+
+    Args:
+    - coord: shape is [nall*3]
+    - atype: shape is [nall]
+    """
+    nall = coord.numel() // 3
+    coord = coord.float()
+    nlist = [[] for _ in range(nloc)]
+    coord_l = coord.view(-1, 1, 3)[:nloc]
+    coord_r = coord.view(1, -1, 3)
+    distance = coord_l - coord_r
+    distance = torch.linalg.norm(distance, dim=-1)
+    DISTANCE_INF = distance.max().detach() + rcut
+    distance[:nloc, :nloc] += torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF
+    if min_check:
+        if distance.min().abs() < 1e-6:
+            RuntimeError("Atom dist too close!")
+    if not type_split:
+        sec = sec[-1:]
+    lst = []
+    nlist = torch.zeros((nloc, sec[-1].item())).long() - 1
+    nlist_loc = torch.zeros((nloc, sec[-1].item())).long() - 1
+    nlist_type = torch.zeros((nloc, sec[-1].item())).long() - 1
+    for i, nnei in enumerate(sec):
+        if i > 0:
+            nnei = nnei - sec[i - 1]
+        if not type_split:
+            tmp = distance
+        else:
+            mask = atype.unsqueeze(0) == i
+            tmp = distance + (~mask) * DISTANCE_INF
+        if tmp.shape[1] >= nnei:
+            _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False)
+        else:
+            # when nnei > nall
+            indices = torch.zeros((nloc, nnei)).long() - 1
+            _sorted = torch.ones((nloc, nnei)).long() * DISTANCE_INF
+            _sorted_nnei, indices_nnei = torch.topk(
+                tmp, tmp.shape[1], dim=1, largest=False
+            )
+            _sorted[:, : tmp.shape[1]] = _sorted_nnei
+            indices[:, : tmp.shape[1]] = indices_nnei
+        mask = (_sorted < rcut).to(torch.long)
+        indices_loc = mapping[indices]
+        indices = indices * mask + -1 * (1 - mask)  # -1 for padding
+        indices_loc = indices_loc * mask + -1 * (1 - mask)  # -1 for padding
+        if i == 0:
+            start = 0
+        else:
+            start = sec[i - 1]
+        end = min(sec[i], start + indices.shape[1])
+        nlist[:, start:end] = indices[:, :nnei]
+        nlist_loc[:, start:end] = indices_loc[:, :nnei]
+        nlist_type[:, start:end] = atype[indices[:, :nnei]] * mask + -1 * (1 - mask)
+    return nlist, nlist_loc, nlist_type
+
+
+def compute_smooth_weight(distance, rmin: float, rmax: float):
+    """Compute smooth weight for descriptor elements."""
+    if rmin >= rmax:
+        raise ValueError("rmin should be less than rmax.")
+    min_mask = distance <= rmin
+    max_mask = distance >= rmax
+    mid_mask = torch.logical_not(torch.logical_or(min_mask, max_mask))
+    uu = (distance - rmin) / (rmax - rmin)
+    vv = uu * uu * uu * (-6 * uu * uu + 15 * uu - 10) + 1
+    return vv * mid_mask + min_mask
+
+
+def make_env_mat(
+    coord,
+    atype,
+    region,
+    rcut: Union[float, list],
+    sec,
+    pbc=True,
+    type_split=True,
+    min_check=False,
+):
+    """Based on atom coordinates, return environment matrix.
+
+    Returns
+    -------
+    nlist: nlist, [nloc, nnei]
+    merged_coord_shift: shift on nall atoms, [nall, 3]
+    merged_mapping: mapping from nall index to nloc index, [nall]
+    """
+    # move outer atoms into cell
+    hybrid = isinstance(rcut, list)
+    _rcut = rcut
+    if hybrid:
+        _rcut = max(rcut)
+    if pbc:
+        merged_coord_shift, merged_atype, merged_mapping = append_neighbors(
+            coord, region, atype, _rcut
+        )
+        merged_coord = coord[merged_mapping] - merged_coord_shift
+        if merged_coord.shape[0] <= coord.shape[0]:
+            log.warning("No ghost atom is added for system ")
+    else:
+        merged_coord_shift = torch.zeros_like(coord)
+        merged_atype = atype.clone()
+        merged_mapping = torch.arange(atype.numel())
+        merged_coord = coord.clone()
+
+    # build nlist
+    if not hybrid:
+        nlist, nlist_loc, nlist_type = build_neighbor_list(
+            coord.shape[0],
+            merged_coord,
+            merged_atype,
+            rcut,
+            sec,
+            merged_mapping,
+            type_split=type_split,
+            min_check=min_check,
+        )
+    else:
+        nlist, nlist_loc, nlist_type = [], [], []
+        for ii, single_rcut in enumerate(rcut):
+            nlist_tmp, nlist_loc_tmp, nlist_type_tmp = build_neighbor_list(
+                coord.shape[0],
+                merged_coord,
+                merged_atype,
+                single_rcut,
+                sec[ii],
+                merged_mapping,
+                type_split=type_split,
+                min_check=min_check,
+            )
+            nlist.append(nlist_tmp)
+            nlist_loc.append(nlist_loc_tmp)
+            nlist_type.append(nlist_type_tmp)
+    return nlist, nlist_loc, nlist_type, merged_coord_shift, merged_mapping
diff --git a/deepmd/pt/utils/region.py b/deepmd/pt/utils/region.py
new file mode 100644
index 0000000000..9d811acb9b
--- /dev/null
+++ b/deepmd/pt/utils/region.py
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+
+
+def phys2inter(
+    coord: torch.Tensor,
+    cell: torch.Tensor,
+) -> torch.Tensor:
+    """Convert physical coordinates to internal(direct) coordinates.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        physical coordinates of shape [*, na, 3].
+    cell : torch.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    inter_coord: torch.Tensor
+        the internal coordinates
+
+    """
+    rec_cell, _ = torch.linalg.inv_ex(cell)
+    return torch.matmul(coord, rec_cell)
+
+
+def inter2phys(
+    coord: torch.Tensor,
+    cell: torch.Tensor,
+) -> torch.Tensor:
+    """Convert internal(direct) coordinates to physical coordinates.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        internal coordinates of shape [*, na, 3].
+    cell : torch.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    phys_coord: torch.Tensor
+        the physical coordinates
+
+    """
+    return torch.matmul(coord, cell)
+
+
+def to_face_distance(
+    cell: torch.Tensor,
+) -> torch.Tensor:
+    """Compute the to-face-distance of the simulation cell.
+
+    Parameters
+    ----------
+    cell : torch.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    dist: torch.Tensor
+        the to face distances of shape [*, 3]
+
+    """
+    cshape = cell.shape
+    dist = b_to_face_distance(cell.view([-1, 3, 3]))
+    return dist.view(list(cshape[:-2]) + [3])  # noqa:RUF005
+
+
+def _to_face_distance(cell):
+    volume = torch.linalg.det(cell)
+    c_yz = torch.cross(cell[1], cell[2])
+    _h2yz = volume / torch.linalg.norm(c_yz)
+    c_zx = torch.cross(cell[2], cell[0])
+    _h2zx = volume / torch.linalg.norm(c_zx)
+    c_xy = torch.cross(cell[0], cell[1])
+    _h2xy = volume / torch.linalg.norm(c_xy)
+    return torch.stack([_h2yz, _h2zx, _h2xy])
+
+
+def b_to_face_distance(cell):
+    volume = torch.linalg.det(cell)
+    c_yz = torch.cross(cell[:, 1], cell[:, 2], dim=-1)
+    _h2yz = volume / torch.linalg.norm(c_yz, dim=-1)
+    c_zx = torch.cross(cell[:, 2], cell[:, 0], dim=-1)
+    _h2zx = volume / torch.linalg.norm(c_zx, dim=-1)
+    c_xy = torch.cross(cell[:, 0], cell[:, 1], dim=-1)
+    _h2xy = volume / torch.linalg.norm(c_xy, dim=-1)
+    return torch.stack([_h2yz, _h2zx, _h2xy], dim=1)
+
+
+# b_to_face_distance = torch.vmap(
+#   _to_face_distance, in_dims=(0), out_dims=(0))
+
+
+def normalize_coord(
+    coord: torch.Tensor,
+    cell: torch.Tensor,
+) -> torch.Tensor:
+    """Apply PBC according to the atomic coordinates.
+
+    Parameters
+    ----------
+    coord : torch.Tensor
+        orignal coordinates of shape [*, na, 3].
+
+    Returns
+    -------
+    wrapped_coord: torch.Tensor
+        wrapped coordinates of shape [*, na, 3].
+
+    """
+    icoord = phys2inter(coord, cell)
+    icoord = torch.remainder(icoord, 1.0)
+    return inter2phys(icoord, cell)
diff --git a/deepmd/pt/utils/serialization.py b/deepmd/pt/utils/serialization.py
new file mode 100644
index 0000000000..c99ddbb3c6
--- /dev/null
+++ b/deepmd/pt/utils/serialization.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+
+import torch
+
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+
+
+def serialize_from_file(model_file: str) -> dict:
+    """Serialize the model file to a dictionary.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be serialized.
+
+    Returns
+    -------
+    dict
+        The serialized model data.
+    """
+    if model_file.endswith(".pth"):
+        saved_model = torch.jit.load(model_file, map_location="cpu")
+        model_def_script = json.loads(saved_model.model_def_script)
+        model = get_model(model_def_script)
+        model.load_state_dict(saved_model.state_dict())
+    elif model_file.endswith(".pt"):
+        state_dict = torch.load(model_file, map_location="cpu")
+        if "model" in state_dict:
+            state_dict = state_dict["model"]
+        model_def_script = state_dict["_extra_state"]["model_params"]
+        model = get_model(model_def_script)
+        modelwrapper = ModelWrapper(model)
+        modelwrapper.load_state_dict(state_dict)
+        model = modelwrapper.model["Default"]
+    else:
+        raise ValueError("PyTorch backend only supports converting .pth or .pt file")
+
+    model_dict = model.serialize()
+    data = {
+        "backend": "PyTorch",
+        "pt_version": torch.__version__,
+        "model": model_dict,
+        "model_def_script": model_def_script,
+        # TODO
+        "@variables": {},
+    }
+    return data
+
+
+def deserialize_to_file(model_file: str, data: dict) -> None:
+    """Deserialize the dictionary to a model file.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be saved.
+    data : dict
+        The dictionary to be deserialized.
+    """
+    if not model_file.endswith(".pth"):
+        raise ValueError("PyTorch backend only supports converting .pth file")
+    model = BaseModel.deserialize(data["model"])
+    # JIT will happy in this way...
+    model.model_def_script = json.dumps(data["model_def_script"])
+    model = torch.jit.script(model)
+    torch.jit.save(model, model_file)
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
new file mode 100644
index 0000000000..a29d98addc
--- /dev/null
+++ b/deepmd/pt/utils/stat.py
@@ -0,0 +1,254 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils import (
+    AtomExcludeMask,
+)
+from deepmd.pt.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+from deepmd.pt.utils.utils import (
+    dict_to_device,
+    to_numpy_array,
+    to_torch_tensor,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_redu,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+def make_stat_input(datasets, dataloaders, nbatches):
+    """Pack data for statistics.
+
+    Args:
+    - dataset: A list of dataset to analyze.
+    - nbatches: Batch count for collecting stats.
+
+    Returns
+    -------
+    - a list of dicts, each of which contains data from a system
+    """
+    lst = []
+    log.info(f"Packing data for statistics from {len(datasets)} systems")
+    for i in range(len(datasets)):
+        sys_stat = {}
+        with torch.device("cpu"):
+            iterator = iter(dataloaders[i])
+            for _ in range(nbatches):
+                try:
+                    stat_data = next(iterator)
+                except StopIteration:
+                    iterator = iter(dataloaders[i])
+                    stat_data = next(iterator)
+                for dd in stat_data:
+                    if stat_data[dd] is None:
+                        sys_stat[dd] = None
+                    elif isinstance(stat_data[dd], torch.Tensor):
+                        if dd not in sys_stat:
+                            sys_stat[dd] = []
+                        sys_stat[dd].append(stat_data[dd])
+                    elif isinstance(stat_data[dd], np.float32):
+                        sys_stat[dd] = stat_data[dd]
+                    else:
+                        pass
+
+        for key in sys_stat:
+            if isinstance(sys_stat[key], np.float32):
+                pass
+            elif sys_stat[key] is None or sys_stat[key][0] is None:
+                sys_stat[key] = None
+            elif isinstance(stat_data[dd], torch.Tensor):
+                sys_stat[key] = torch.cat(sys_stat[key], dim=0)
+        dict_to_device(sys_stat)
+        lst.append(sys_stat)
+    return lst
+
+
+def restore_from_file(
+    stat_file_path: DPPath,
+    keys: List[str] = ["energy"],
+) -> Optional[dict]:
+    if stat_file_path is None:
+        return None
+    stat_files = [stat_file_path / f"bias_atom_{kk}" for kk in keys]
+    if any(not (ii.is_file()) for ii in stat_files):
+        return None
+    ret = {}
+
+    for kk in keys:
+        fp = stat_file_path / f"bias_atom_{kk}"
+        assert fp.is_file()
+        ret[kk] = fp.load_numpy()
+    return ret
+
+
+def save_to_file(
+    stat_file_path: DPPath,
+    results: dict,
+):
+    assert stat_file_path is not None
+    stat_file_path.mkdir(exist_ok=True, parents=True)
+    for kk, vv in results.items():
+        fp = stat_file_path / f"bias_atom_{kk}"
+        fp.save_numpy(vv)
+
+
+def compute_output_stats(
+    merged: Union[Callable[[], List[dict]], List[dict]],
+    ntypes: int,
+    keys: List[str] = ["energy"],
+    stat_file_path: Optional[DPPath] = None,
+    rcond: Optional[float] = None,
+    atom_ener: Optional[List[float]] = None,
+    model_forward: Optional[Callable[..., torch.Tensor]] = None,
+):
+    """
+    Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+    Parameters
+    ----------
+    merged : Union[Callable[[], List[dict]], List[dict]]
+        - List[dict]: A list of data samples from various data systems.
+            Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+            originating from the `i`-th data system.
+        - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+            only when needed. Since the sampling process can be slow and memory-intensive,
+            the lazy function helps by only sampling once.
+    ntypes : int
+        The number of atom types.
+    stat_file_path : DPPath, optional
+        The path to the stat file.
+    rcond : float, optional
+        The condition number for the regression of atomic energy.
+    atom_ener : List[float], optional
+        Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+    model_forward : Callable[..., torch.Tensor], optional
+        The wrapped forward function of atomic model.
+        If not None, the model will be utilized to generate the original energy prediction,
+        which will be subtracted from the energy label of the data.
+        The difference will then be used to calculate the delta complement energy bias for each type.
+    """
+    bias_atom_e = restore_from_file(stat_file_path, keys)
+
+    if bias_atom_e is None:
+        if callable(merged):
+            # only get data for once
+            sampled = merged()
+        else:
+            sampled = merged
+        outputs = {kk: [item[kk] for item in sampled] for kk in keys}
+        data_mixed_type = "real_natoms_vec" in sampled[0]
+        natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
+        for system in sampled:
+            if "atom_exclude_types" in system:
+                type_mask = AtomExcludeMask(
+                    ntypes, system["atom_exclude_types"]
+                ).get_type_mask()
+                system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)
+        input_natoms = [item[natoms_key] for item in sampled]
+        # shape: (nframes, ndim)
+        merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys}
+        # shape: (nframes, ntypes)
+        merged_natoms = to_numpy_array(torch.cat(input_natoms)[:, 2:])
+        if atom_ener is not None and len(atom_ener) > 0:
+            assigned_atom_ener = np.array(
+                [ee if ee is not None else np.nan for ee in atom_ener]
+            )
+        else:
+            assigned_atom_ener = None
+        if model_forward is None:
+            # only use statistics result
+            # [0]: take the first otuput (mean) of compute_stats_from_redu
+            bias_atom_e = {
+                kk: compute_stats_from_redu(
+                    merged_output[kk],
+                    merged_natoms,
+                    assigned_bias=assigned_atom_ener,
+                    rcond=rcond,
+                )[0]
+                for kk in keys
+            }
+        else:
+            # subtract the model bias and output the delta bias
+            auto_batch_size = AutoBatchSize()
+            model_predict = {kk: [] for kk in keys}
+            for system in sampled:
+                nframes = system["coord"].shape[0]
+                coord, atype, box, natoms = (
+                    system["coord"],
+                    system["atype"],
+                    system["box"],
+                    system["natoms"],
+                )
+                fparam = system.get("fparam", None)
+                aparam = system.get("aparam", None)
+
+                def model_forward_auto_batch_size(*args, **kwargs):
+                    return auto_batch_size.execute_all(
+                        model_forward,
+                        nframes,
+                        system["atype"].shape[-1],
+                        *args,
+                        **kwargs,
+                    )
+
+                sample_predict = model_forward_auto_batch_size(
+                    coord, atype, box, fparam=fparam, aparam=aparam
+                )
+
+                for kk in keys:
+                    model_predict[kk].append(
+                        to_numpy_array(
+                            torch.sum(sample_predict[kk], dim=1)  # nf x nloc x odims
+                        )
+                    )
+
+            model_predict = {kk: np.concatenate(model_predict[kk]) for kk in keys}
+
+            bias_diff = {kk: merged_output[kk] - model_predict[kk] for kk in keys}
+            bias_atom_e = {
+                kk: compute_stats_from_redu(
+                    bias_diff[kk],
+                    merged_natoms,
+                    assigned_bias=assigned_atom_ener,
+                    rcond=rcond,
+                )[0]
+                for kk in keys
+            }
+            unbias_e = {
+                kk: model_predict[kk] + merged_natoms @ bias_atom_e[kk] for kk in keys
+            }
+            atom_numbs = merged_natoms.sum(-1)
+            for kk in keys:
+                rmse_ae = np.sqrt(
+                    np.mean(
+                        np.square(
+                            (unbias_e[kk].ravel() - merged_output[kk].ravel())
+                            / atom_numbs
+                        )
+                    )
+                )
+                log.info(
+                    f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}."
+                )
+
+        if stat_file_path is not None:
+            save_to_file(stat_file_path, bias_atom_e)
+
+    ret = {kk: to_torch_tensor(bias_atom_e[kk]) for kk in keys}
+
+    return ret
diff --git a/deepmd/pt/utils/update_sel.py b/deepmd/pt/utils/update_sel.py
new file mode 100644
index 0000000000..8c2d0699f2
--- /dev/null
+++ b/deepmd/pt/utils/update_sel.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Type,
+)
+
+from deepmd.pt.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.update_sel import (
+    BaseUpdateSel,
+)
+
+
+class UpdateSel(BaseUpdateSel):
+    @property
+    def neighbor_stat(self) -> Type[NeighborStat]:
+        return NeighborStat
+
+    def hook(self, min_nbor_dist, max_nbor_size):
+        # TODO: save to the model in UpdateSel.hook
+        pass
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
new file mode 100644
index 0000000000..d1ef089e49
--- /dev/null
+++ b/deepmd/pt/utils/utils.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    overload,
+)
+
+import ml_dtypes
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from deepmd.dpmodel.common import PRECISION_DICT as NP_PRECISION_DICT
+
+from .env import (
+    DEVICE,
+)
+from .env import PRECISION_DICT as PT_PRECISION_DICT
+
+
+class ActivationFn(torch.nn.Module):
+    def __init__(self, activation: Optional[str]):
+        super().__init__()
+        self.activation: str = activation if activation is not None else "linear"
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Returns the tensor after applying activation function corresponding to `activation`."""
+        # See jit supported types: https://pytorch.org/docs/stable/jit_language_reference.html#supported-type
+
+        if self.activation.lower() == "relu":
+            return F.relu(x)
+        elif self.activation.lower() == "gelu" or self.activation.lower() == "gelu_tf":
+            return F.gelu(x, approximate="tanh")
+        elif self.activation.lower() == "tanh":
+            return torch.tanh(x)
+        elif self.activation.lower() == "relu6":
+            return F.relu6(x)
+        elif self.activation.lower() == "softplus":
+            return F.softplus(x)
+        elif self.activation.lower() == "sigmoid":
+            return torch.sigmoid(x)
+        elif self.activation.lower() == "linear" or self.activation.lower() == "none":
+            return x
+        else:
+            raise RuntimeError(f"activation function {self.activation} not supported")
+
+
+@overload
+def to_numpy_array(xx: torch.Tensor) -> np.ndarray: ...
+
+
+@overload
+def to_numpy_array(xx: None) -> None: ...
+
+
+def to_numpy_array(
+    xx,
+):
+    if xx is None:
+        return None
+    assert xx is not None
+    # Create a reverse mapping of PT_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(xx.dtype, None)
+    prec = NP_PRECISION_DICT.get(prec, None)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    if xx.dtype == torch.bfloat16:
+        # https://github.com/pytorch/pytorch/issues/109873
+        xx = xx.float()
+    return xx.detach().cpu().numpy().astype(prec)
+
+
+@overload
+def to_torch_tensor(xx: np.ndarray) -> torch.Tensor: ...
+
+
+@overload
+def to_torch_tensor(xx: None) -> None: ...
+
+
+def to_torch_tensor(
+    xx,
+):
+    if xx is None:
+        return None
+    assert xx is not None
+    # Create a reverse mapping of NP_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(xx.dtype.type, None)
+    prec = PT_PRECISION_DICT.get(prec, None)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    if xx.dtype == ml_dtypes.bfloat16:
+        # https://github.com/pytorch/pytorch/issues/109873
+        xx = xx.astype(np.float32)
+    return torch.tensor(xx, dtype=prec, device=DEVICE)
+
+
+def dict_to_device(sample_dict):
+    for key in sample_dict:
+        if isinstance(sample_dict[key], list):
+            sample_dict[key] = [item.to(DEVICE) for item in sample_dict[key]]
+        if isinstance(sample_dict[key], np.float32):
+            sample_dict[key] = (
+                torch.ones(1, dtype=torch.float32, device=DEVICE) * sample_dict[key]
+            )
+        else:
+            if sample_dict[key] is not None:
+                sample_dict[key] = sample_dict[key].to(DEVICE)
diff --git a/deepmd/__about__.py b/deepmd/tf/__about__.py
similarity index 100%
rename from deepmd/__about__.py
rename to deepmd/tf/__about__.py
diff --git a/deepmd/tf/__init__.py b/deepmd/tf/__init__.py
new file mode 100644
index 0000000000..65aa03b39e
--- /dev/null
+++ b/deepmd/tf/__init__.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Root of the deepmd package, exposes all public classes and submodules."""
+
+try:
+    from importlib import (
+        metadata,
+    )
+except ImportError:  # for Python<3.8
+    import importlib_metadata as metadata
+
+import deepmd.tf.utils.network as network
+
+from . import (
+    cluster,
+    descriptor,
+    fit,
+    loss,
+    nvnmd,
+    utils,
+)
+from .env import (
+    set_mkl,
+)
+from .infer import (
+    DeepEval,
+    DeepPotential,
+)
+from .infer.data_modifier import (
+    DipoleChargeModifier,
+)
+
+set_mkl()
+
+try:
+    from deepmd._version import version as __version__
+except ImportError:
+    from .__about__ import (
+        __version__,
+    )
+
+# load third-party plugins
+try:
+    eps = metadata.entry_points(group="deepmd")
+except TypeError:
+    eps = metadata.entry_points().get("deepmd", [])
+for ep in eps:
+    ep.load()
+
+__all__ = [
+    "__version__",
+    "descriptor",
+    "fit",
+    "loss",
+    "utils",
+    "cluster",
+    "network",
+    "DeepEval",
+    "DeepPotential",
+    "DipoleChargeModifier",
+    "nvnmd",
+]
diff --git a/deepmd/tf/__main__.py b/deepmd/tf/__main__.py
new file mode 100644
index 0000000000..6026b1c269
--- /dev/null
+++ b/deepmd/tf/__main__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Package dp entry point."""
+
+from .entrypoints.main import (
+    main,
+)
+
+if __name__ == "__main__":
+    main()
diff --git a/deepmd/tf/calculator.py b/deepmd/tf/calculator.py
new file mode 100644
index 0000000000..5fc4b59f5f
--- /dev/null
+++ b/deepmd/tf/calculator.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.calculator import (
+    DP,
+)
+
+__all__ = [
+    "DP",
+]
diff --git a/deepmd/cluster/__init__.py b/deepmd/tf/cluster/__init__.py
similarity index 74%
rename from deepmd/cluster/__init__.py
rename to deepmd/tf/cluster/__init__.py
index 3c15778fe5..6735ce92f4 100644
--- a/deepmd/cluster/__init__.py
+++ b/deepmd/tf/cluster/__init__.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Module that reads node resources, auto detects if running local or on SLURM."""
 
-import os
 from typing import (
     List,
     Optional,
@@ -9,7 +8,6 @@
 )
 
 from .local import get_resource as get_local_res
-from .slurm import get_resource as get_slurm_res
 
 __all__ = ["get_resource"]
 
@@ -22,7 +20,4 @@ def get_resource() -> Tuple[str, List[str], Optional[List[int]]]:
     Tuple[str, List[str], Optional[List[int]]]
         nodename, nodelist, and gpus
     """
-    if "SLURM_JOB_NODELIST" in os.environ:
-        return get_slurm_res()
-    else:
-        return get_local_res()
+    return get_local_res()
diff --git a/deepmd/cluster/local.py b/deepmd/tf/cluster/local.py
similarity index 92%
rename from deepmd/cluster/local.py
rename to deepmd/tf/cluster/local.py
index 3c12c9dc85..60961a0d65 100644
--- a/deepmd/cluster/local.py
+++ b/deepmd/tf/cluster/local.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Get local GPU resources."""
 
-import socket
 import subprocess as sp
 import sys
 from typing import (
@@ -10,9 +9,12 @@
     Tuple,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
+from deepmd.utils.hostlist import (
+    get_host_names,
+)
 
 __all__ = ["get_gpus", "get_resource"]
 
@@ -57,7 +59,6 @@ def get_resource() -> Tuple[str, List[str], Optional[List[int]]]:
     Tuple[str, List[str], Optional[List[int]]]
         nodename, nodelist, and gpus
     """
-    nodename = socket.gethostname()
-    nodelist = [nodename]
+    nodename, nodelist = get_host_names()
     gpus = get_gpus()
     return nodename, nodelist, gpus
diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py
new file mode 100644
index 0000000000..5f2d0d882e
--- /dev/null
+++ b/deepmd/tf/common.py
@@ -0,0 +1,291 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Collection of functions and classes used throughout the whole package."""
+
+import warnings
+from functools import (
+    wraps,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Union,
+)
+
+import tensorflow
+from tensorflow.python.framework import (
+    tensor_util,
+)
+
+from deepmd.common import (
+    VALID_ACTIVATION,
+    VALID_PRECISION,
+    add_data_requirement,
+    data_requirement,
+    expand_sys_str,
+    get_np_precision,
+    j_loader,
+    j_must_have,
+    make_default_mesh,
+    select_idx_map,
+)
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+
+if TYPE_CHECKING:
+    from deepmd.common import (
+        _ACTIVATION,
+        _PRECISION,
+    )
+
+__all__ = [
+    # from deepmd.common
+    "data_requirement",
+    "add_data_requirement",
+    "select_idx_map",
+    "make_default_mesh",
+    "j_must_have",
+    "j_loader",
+    "expand_sys_str",
+    "get_np_precision",
+    # from self
+    "PRECISION_DICT",
+    "gelu",
+    "gelu_tf",
+    "ACTIVATION_FN_DICT",
+    "get_activation_func",
+    "get_precision",
+    "safe_cast_tensor",
+    "cast_precision",
+    "clear_session",
+]
+
+# define constants
+PRECISION_DICT = {
+    "default": GLOBAL_TF_FLOAT_PRECISION,
+    "float16": tf.float16,
+    "float32": tf.float32,
+    "float64": tf.float64,
+    "bfloat16": tf.bfloat16,
+}
+assert VALID_PRECISION.issubset(PRECISION_DICT.keys())
+
+
+def gelu(x: tf.Tensor) -> tf.Tensor:
+    """Gaussian Error Linear Unit.
+
+    This is a smoother version of the RELU, implemented by custom operator.
+
+    Parameters
+    ----------
+    x : tf.Tensor
+        float Tensor to perform activation
+
+    Returns
+    -------
+    tf.Tensor
+        `x` with the GELU activation applied
+
+    References
+    ----------
+    Original paper
+    https://arxiv.org/abs/1606.08415
+    """
+    return op_module.gelu_custom(x)
+
+
+def gelu_tf(x: tf.Tensor) -> tf.Tensor:
+    """Gaussian Error Linear Unit.
+
+    This is a smoother version of the RELU, implemented by TF.
+
+    Parameters
+    ----------
+    x : tf.Tensor
+        float Tensor to perform activation
+
+    Returns
+    -------
+    tf.Tensor
+        `x` with the GELU activation applied
+
+    References
+    ----------
+    Original paper
+    https://arxiv.org/abs/1606.08415
+    """
+
+    def gelu_wrapper(x):
+        try:
+            return tensorflow.nn.gelu(x, approximate=True)
+        except AttributeError:
+            warnings.warn(
+                "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator."
+            )
+            return op_module.gelu_custom(x)
+
+    return (lambda x: gelu_wrapper(x))(x)
+
+
+ACTIVATION_FN_DICT = {
+    "relu": tf.nn.relu,
+    "relu6": tf.nn.relu6,
+    "softplus": tf.nn.softplus,
+    "sigmoid": tf.sigmoid,
+    "tanh": tf.nn.tanh,
+    "gelu": gelu,
+    "gelu_tf": gelu_tf,
+    "linear": lambda x: x,
+    "none": lambda x: x,
+}
+assert VALID_ACTIVATION.issubset(ACTIVATION_FN_DICT.keys())
+
+
+def get_activation_func(
+    activation_fn: Union["_ACTIVATION", None],
+) -> Callable[[tf.Tensor], tf.Tensor]:
+    """Get activation function callable based on string name.
+
+    Parameters
+    ----------
+    activation_fn : _ACTIVATION
+        one of the defined activation functions
+
+    Returns
+    -------
+    Callable[[tf.Tensor], tf.Tensor]
+        correspondingg TF callable
+
+    Raises
+    ------
+    RuntimeError
+        if unknown activation function is specified
+    """
+    if activation_fn is None:
+        activation_fn = "none"
+    assert activation_fn is not None
+    if activation_fn.lower() not in ACTIVATION_FN_DICT:
+        raise RuntimeError(f"{activation_fn} is not a valid activation function")
+    return ACTIVATION_FN_DICT[activation_fn.lower()]
+
+
+def get_precision(precision: "_PRECISION") -> Any:
+    """Convert str to TF DType constant.
+
+    Parameters
+    ----------
+    precision : _PRECISION
+        one of the allowed precisions
+
+    Returns
+    -------
+    tf.python.framework.dtypes.DType
+        appropriate TF constant
+
+    Raises
+    ------
+    RuntimeError
+        if supplied precision string does not have acorresponding TF constant
+    """
+    if precision not in PRECISION_DICT:
+        raise RuntimeError(f"{precision} is not a valid precision")
+    return PRECISION_DICT[precision]
+
+
+def safe_cast_tensor(
+    input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType
+) -> tf.Tensor:
+    """Convert a Tensor from a precision to another precision.
+
+    If input is not a Tensor or without the specific precision, the method will not
+    cast it.
+
+    Parameters
+    ----------
+    input : tf.Tensor
+        input tensor
+    from_precision : tf.DType
+        Tensor data type that is casted from
+    to_precision : tf.DType
+        Tensor data type that casts to
+
+    Returns
+    -------
+    tf.Tensor
+        casted Tensor
+    """
+    if tensor_util.is_tensor(input) and input.dtype == from_precision:
+        return tf.cast(input, to_precision)
+    return input
+
+
+def cast_precision(func: Callable) -> Callable:
+    """A decorator that casts and casts back the input
+    and output tensor of a method.
+
+    The decorator should be used in a classmethod.
+
+    The decorator will do the following thing:
+    (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION`
+    to precision defined by property `precision`.
+    (2) It casts output Tensors from `precision` to
+    `GLOBAL_TF_FLOAT_PRECISION`.
+    (3) It checks inputs and outputs and only casts when
+    input or output is a Tensor and its dtype matches
+    `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively.
+    If it does not match (e.g. it is an integer), the decorator
+    will do nothing on it.
+
+    Returns
+    -------
+    Callable
+        a decorator that casts and casts back the input and
+        output tensor of a method
+
+    Examples
+    --------
+    >>> class A:
+    ...     @property
+    ...     def precision(self):
+    ...         return tf.float32
+    ...
+    ...     @cast_precision
+    ...     def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor:
+    ...         return x**2 + y
+    """
+
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        # only convert tensors
+        returned_tensor = func(
+            self,
+            *[
+                safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
+                for vv in args
+            ],
+            **{
+                kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
+                for kk, vv in kwargs.items()
+            },
+        )
+        if isinstance(returned_tensor, tuple):
+            return tuple(
+                safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION)
+                for vv in returned_tensor
+            )
+        else:
+            return safe_cast_tensor(
+                returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION
+            )
+
+    return wrapper
+
+
+def clear_session():
+    """Reset all state generated by DeePMD-kit."""
+    tf.reset_default_graph()
+    # TODO: remove this line when data_requirement is not a global variable
+    data_requirement.clear()
diff --git a/deepmd/descriptor/__init__.py b/deepmd/tf/descriptor/__init__.py
similarity index 100%
rename from deepmd/descriptor/__init__.py
rename to deepmd/tf/descriptor/__init__.py
diff --git a/deepmd/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py
similarity index 88%
rename from deepmd/descriptor/descriptor.py
rename to deepmd/tf/descriptor/descriptor.py
index bd731004cb..82b09c95fb 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/tf/descriptor/descriptor.py
@@ -4,7 +4,6 @@
 )
 from typing import (
     Any,
-    Callable,
     Dict,
     List,
     Optional,
@@ -13,17 +12,22 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.utils import (
-    Plugin,
+from deepmd.tf.utils import (
     PluginVariant,
 )
+from deepmd.utils.plugin import (
+    make_plugin_registry,
+)
 
 
-class Descriptor(PluginVariant):
+class Descriptor(PluginVariant, make_plugin_registry("descriptor")):
     r"""The abstract class for descriptors. All specific descriptors should
     be based on this class.
 
@@ -32,9 +36,9 @@ class Descriptor(PluginVariant):
 
     Examples
     --------
-    >>> descript = Descriptor(type="se_e2_a", rcut=6., rcut_smth=0.5, sel=[50])
+    >>> descript = Descriptor(type="se_e2_a", rcut=6.0, rcut_smth=0.5, sel=[50])
     >>> type(descript)
-    <class 'deepmd.descriptor.se_a.DescrptSeA'>
+    <class 'deepmd.tf.descriptor.se_a.DescrptSeA'>
 
     Notes
     -----
@@ -42,44 +46,9 @@ class Descriptor(PluginVariant):
     that can be called by other classes.
     """
 
-    __plugins = Plugin()
-
-    @staticmethod
-    def register(key: str) -> Callable:
-        """Register a descriptor plugin.
-
-        Parameters
-        ----------
-        key : str
-            the key of a descriptor
-
-        Returns
-        -------
-        Descriptor
-            the registered descriptor
-
-        Examples
-        --------
-        >>> @Descriptor.register("some_descrpt")
-            class SomeDescript(Descriptor):
-                pass
-        """
-        return Descriptor.__plugins.register(key)
-
-    @classmethod
-    def get_class_by_input(cls, input: dict):
-        try:
-            descrpt_type = input["type"]
-        except KeyError:
-            raise KeyError("the type of descriptor should be set by `type`")
-        if descrpt_type in Descriptor.__plugins.plugins:
-            return Descriptor.__plugins.plugins[descrpt_type]
-        else:
-            raise RuntimeError("Unknown descriptor type: " + descrpt_type)
-
     def __new__(cls, *args, **kwargs):
         if cls is Descriptor:
-            cls = cls.get_class_by_input(kwargs)
+            cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__))
         return super().__new__(cls)
 
     @abstractmethod
@@ -133,9 +102,6 @@ def get_dim_rot_mat_1(self) -> int:
         int
             the first dimension of the rotation matrix
         """
-        # TODO: I think this method should be implemented as it's called by dipole and
-        # polar fitting network. However, currently not all descriptors have this
-        # method.
         raise NotImplementedError
 
     def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
@@ -152,8 +118,6 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         sel_r : list[int]
             The number of neighbors with only radial information
         """
-        # TODO: I think this method should be implemented as it's called by energy
-        # model. However, se_ar and hybrid doesn't have this method.
         raise NotImplementedError
 
     @abstractmethod
@@ -174,18 +138,18 @@ def compute_input_stats(
         ----------
         data_coord : list[np.ndarray]
             The coordinates. Can be generated by
-            :meth:`deepmd.model.model_stat.make_stat_input`
+            :meth:`deepmd.tf.model.model_stat.make_stat_input`
         data_box : list[np.ndarray]
             The box. Can be generated by
-            :meth:`deepmd.model.model_stat.make_stat_input`
+            :meth:`deepmd.tf.model.model_stat.make_stat_input`
         data_atype : list[np.ndarray]
-            The atom types. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input`
+            The atom types. Can be generated by :meth:`deepmd.tf.model.model_stat.make_stat_input`
         natoms_vec : list[np.ndarray]
             The vector for the number of atoms of the system and different types of
-            atoms. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input`
+            atoms. Can be generated by :meth:`deepmd.tf.model.model_stat.make_stat_input`
         mesh : list[np.ndarray]
             The mesh for neighbor searching. Can be generated by
-            :meth:`deepmd.model.model_stat.make_stat_input`
+            :meth:`deepmd.tf.model.model_stat.make_stat_input`
         input_dict : dict[str, list[np.ndarray]]
             Dictionary for additional input
         **kwargs
@@ -507,5 +471,45 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
             The local data refer to the current class
         """
         # call subprocess
-        cls = cls.get_class_by_input(local_jdata)
+        cls = cls.get_class_by_type(j_get_type(local_jdata, cls.__name__))
         return cls.update_sel(global_jdata, local_jdata)
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor":
+        """Deserialize the model.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this descriptor
+
+        Returns
+        -------
+        Descriptor
+            The deserialized descriptor
+        """
+        if cls is Descriptor:
+            return Descriptor.get_class_by_type(
+                j_get_type(data, cls.__name__)
+            ).deserialize(data, suffix=suffix)
+        raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this descriptor
+        """
+        raise NotImplementedError("Not implemented in class %s" % self.__name__)
diff --git a/deepmd/descriptor/hybrid.py b/deepmd/tf/descriptor/hybrid.py
similarity index 87%
rename from deepmd/descriptor/hybrid.py
rename to deepmd/tf/descriptor/hybrid.py
index 5ee5ec884b..4e7eaa2c92 100644
--- a/deepmd/descriptor/hybrid.py
+++ b/deepmd/tf/descriptor/hybrid.py
@@ -1,26 +1,32 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Any,
+    Dict,
     List,
     Optional,
     Tuple,
+    Union,
 )
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
-# from deepmd.descriptor import DescrptLocFrame
-# from deepmd.descriptor import DescrptSeA
-# from deepmd.descriptor import DescrptSeT
-# from deepmd.descriptor import DescrptSeAEbd
-# from deepmd.descriptor import DescrptSeAEf
-# from deepmd.descriptor import DescrptSeR
+# from deepmd.tf.descriptor import DescrptLocFrame
+# from deepmd.tf.descriptor import DescrptSeA
+# from deepmd.tf.descriptor import DescrptSeT
+# from deepmd.tf.descriptor import DescrptSeAEbd
+# from deepmd.tf.descriptor import DescrptSeAEf
+# from deepmd.tf.descriptor import DescrptSeR
 from .descriptor import (
     Descriptor,
 )
@@ -32,13 +38,14 @@ class DescrptHybrid(Descriptor):
 
     Parameters
     ----------
-    list : list
+    list : list : List[Union[Descriptor, Dict[str, Any]]]
             Build a descriptor from the concatenation of the list of descriptors.
+            The descriptor can be either an object or a dictionary.
     """
 
     def __init__(
         self,
-        list: list,
+        list: List[Union[Descriptor, Dict[str, Any]]],
         multi_task: bool = False,
         ntypes: Optional[int] = None,
         spin: Optional[Spin] = None,
@@ -146,15 +153,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         mixed_type
@@ -434,3 +441,30 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
             for sub_jdata in local_jdata["list"]
         ]
         return local_jdata_cpy
+
+    def serialize(self, suffix: str = "") -> dict:
+        return {
+            "@class": "Descriptor",
+            "type": "hybrid",
+            "@version": 1,
+            "list": [
+                descrpt.serialize(suffix=f"{suffix}_{idx}")
+                for idx, descrpt in enumerate(self.descrpt_list)
+            ],
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = "") -> "DescrptHybrid":
+        data = data.copy()
+        class_name = data.pop("@class")
+        assert class_name == "Descriptor"
+        class_type = data.pop("type")
+        assert class_type == "hybrid"
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        obj = cls(
+            list=[
+                Descriptor.deserialize(ii, suffix=f"{suffix}_{idx}")
+                for idx, ii in enumerate(data["list"])
+            ],
+        )
+        return obj
diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/tf/descriptor/loc_frame.py
similarity index 95%
rename from deepmd/descriptor/loc_frame.py
rename to deepmd/tf/descriptor/loc_frame.py
index 0765be55f8..ee414fc0bb 100644
--- a/deepmd/descriptor/loc_frame.py
+++ b/deepmd/tf/descriptor/loc_frame.py
@@ -7,17 +7,17 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
@@ -35,11 +35,11 @@ class DescrptLocFrame(Descriptor):
     ----------
     rcut
             The cut-off radius
-    sel_a : list[str]
+    sel_a : list[int]
             The length of the list should be the same as the number of atom types in the system.
             `sel_a[i]` gives the selected number of type-i neighbors.
             The full relative coordinates of the neighbors are used by the descriptor.
-    sel_r : list[str]
+    sel_r : list[int]
             The length of the list should be the same as the number of atom types in the system.
             `sel_r[i]` gives the selected number of type-i neighbors.
             Only relative distance of the neighbors are used by the descriptor.
@@ -168,15 +168,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
@@ -343,7 +343,10 @@ def prod_force_virial(
         tf.summary.histogram("net_derivative", net_deriv)
         net_deriv_reshape = tf.reshape(
             net_deriv,
-            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+            [
+                np.asarray(-1, dtype=np.int64),
+                natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64),
+            ],
         )
         force = op_module.prod_force(
             net_deriv_reshape,
diff --git a/deepmd/tf/descriptor/se.py b/deepmd/tf/descriptor/se.py
new file mode 100644
index 0000000000..4232503464
--- /dev/null
+++ b/deepmd/tf/descriptor/se.py
@@ -0,0 +1,325 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import re
+from typing import (
+    List,
+    Set,
+    Tuple,
+)
+
+from deepmd.dpmodel.utils.network import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.tf.env import (
+    EMBEDDING_NET_PATTERN,
+    tf,
+)
+from deepmd.tf.utils.graph import (
+    get_embedding_net_variables_from_graph_def,
+    get_tensor_by_name_from_graph,
+)
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+
+
+class DescrptSe(Descriptor):
+    """A base class for smooth version of descriptors.
+
+    Notes
+    -----
+    All of these descriptors have an environmental matrix and an
+    embedding network (:meth:`deepmd.tf.utils.network.embedding_net`), so
+    they can share some similiar methods without defining them twice.
+
+    Attributes
+    ----------
+    embedding_net_variables : dict
+        initial embedding network variables
+    descrpt_reshape : tf.Tensor
+        the reshaped descriptor
+    descrpt_deriv : tf.Tensor
+        the descriptor derivative
+    rij : tf.Tensor
+        distances between two atoms
+    nlist : tf.Tensor
+        the neighbor list
+
+    """
+
+    def _identity_tensors(self, suffix: str = "") -> None:
+        """Identify tensors which are expected to be stored and restored.
+
+        Notes
+        -----
+        These tensors will be indentitied:
+            self.descrpt_reshape : o_rmat
+            self.descrpt_deriv : o_rmat_deriv
+            self.rij : o_rij
+            self.nlist : o_nlist
+        Thus, this method should be called during building the descriptor and
+        after these tensors are initialized.
+
+        Parameters
+        ----------
+        suffix : str
+            The suffix of the scope
+        """
+        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix)
+        self.descrpt_deriv = tf.identity(
+            self.descrpt_deriv, name="o_rmat_deriv" + suffix
+        )
+        self.rij = tf.identity(self.rij, name="o_rij" + suffix)
+        self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix)
+
+    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
+        """Get names of tensors.
+
+        Parameters
+        ----------
+        suffix : str
+            The suffix of the scope
+
+        Returns
+        -------
+        Tuple[str]
+            Names of tensors
+        """
+        return (
+            f"o_rmat{suffix}:0",
+            f"o_rmat_deriv{suffix}:0",
+            f"o_rij{suffix}:0",
+            f"o_nlist{suffix}:0",
+        )
+
+    def pass_tensors_from_frz_model(
+        self,
+        descrpt_reshape: tf.Tensor,
+        descrpt_deriv: tf.Tensor,
+        rij: tf.Tensor,
+        nlist: tf.Tensor,
+    ):
+        """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def.
+
+        Parameters
+        ----------
+        descrpt_reshape
+            The passed descrpt_reshape tensor
+        descrpt_deriv
+            The passed descrpt_deriv tensor
+        rij
+            The passed rij tensor
+        nlist
+            The passed nlist tensor
+        """
+        self.rij = rij
+        self.nlist = nlist
+        self.descrpt_deriv = descrpt_deriv
+        self.descrpt_reshape = descrpt_reshape
+
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
+    ) -> None:
+        """Init the embedding net variables with the given dict.
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str, optional
+            The suffix of the scope
+        """
+        self.embedding_net_variables = get_embedding_net_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
+
+    @property
+    def precision(self) -> tf.DType:
+        """Precision of filter network."""
+        return self.filter_precision
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        # default behavior is to update sel which is a list
+        local_jdata_cpy = local_jdata.copy()
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, False)
+
+    def serialize_network(
+        self,
+        ntypes: int,
+        ndim: int,
+        in_dim: int,
+        neuron: List[int],
+        activation_function: str,
+        resnet_dt: bool,
+        variables: dict,
+        excluded_types: Set[Tuple[int, int]] = set(),
+        suffix: str = "",
+    ) -> dict:
+        """Serialize network.
+
+        Parameters
+        ----------
+        ntypes : int
+            The number of types
+        ndim : int
+            The dimension of elements
+        in_dim : int
+            The input dimension
+        neuron : List[int]
+            The neuron list
+        activation_function : str
+            The activation function
+        resnet_dt : bool
+            Whether to use resnet
+        variables : dict
+            The input variables
+        excluded_types : Set[Tuple[int, int]], optional
+            The excluded types
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        dict
+            The converted network data
+        """
+        embeddings = NetworkCollection(
+            ntypes=ntypes,
+            ndim=ndim,
+            network_type="embedding_network",
+        )
+        if ndim == 2:
+            for type_i, type_j in excluded_types:
+                # initialize an empty network for the excluded types
+                embeddings[(type_i, type_j)] = EmbeddingNet(
+                    in_dim=in_dim,
+                    neuron=neuron,
+                    activation_function=activation_function,
+                    resnet_dt=resnet_dt,
+                    precision=self.precision.name,
+                )
+                embeddings[(type_j, type_i)] = EmbeddingNet(
+                    in_dim=in_dim,
+                    neuron=neuron,
+                    activation_function=activation_function,
+                    resnet_dt=resnet_dt,
+                    precision=self.precision.name,
+                )
+                embeddings[(type_i, type_j)].clear()
+                embeddings[(type_j, type_i)].clear()
+
+        if suffix != "":
+            embedding_net_pattern = (
+                EMBEDDING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)")
+                .replace("/(bias)", suffix + "/(bias)")
+                .replace("/(matrix)", suffix + "/(matrix)")
+            )
+        else:
+            embedding_net_pattern = EMBEDDING_NET_PATTERN
+        for key, value in variables.items():
+            m = re.search(embedding_net_pattern, key)
+            m = [mm for mm in m.groups() if mm is not None]
+            typei = m[0]
+            typej = "_".join(m[3:]) if len(m[3:]) else "all"
+            layer_idx = int(m[2]) - 1
+            weight_name = m[1]
+            if ndim == 0:
+                network_idx = ()
+            elif ndim == 1:
+                network_idx = (int(typej),)
+            elif ndim == 2:
+                network_idx = (int(typei), int(typej))
+            else:
+                raise ValueError(f"Invalid ndim: {ndim}")
+            if embeddings[network_idx] is None:
+                # initialize the network if it is not initialized
+                embeddings[network_idx] = EmbeddingNet(
+                    in_dim=in_dim,
+                    neuron=neuron,
+                    activation_function=activation_function,
+                    resnet_dt=resnet_dt,
+                    precision=self.precision.name,
+                )
+            assert embeddings[network_idx] is not None
+            if weight_name == "idt":
+                value = value.ravel()
+            embeddings[network_idx][layer_idx][weight_name] = value
+        return embeddings.serialize()
+
+    @classmethod
+    def deserialize_network(cls, data: dict, suffix: str = "") -> dict:
+        """Deserialize network.
+
+        Parameters
+        ----------
+        data : dict
+            The input network data
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        variables : dict
+            The input variables
+        """
+        embedding_net_variables = {}
+        embeddings = NetworkCollection.deserialize(data)
+        for ii in range(embeddings.ntypes**embeddings.ndim):
+            net_idx = []
+            rest_ii = ii
+            for _ in range(embeddings.ndim):
+                net_idx.append(rest_ii % embeddings.ntypes)
+                rest_ii //= embeddings.ntypes
+            net_idx = tuple(net_idx)
+            if embeddings.ndim in (0, 1):
+                key0 = "all"
+                key1 = f"_{ii}"
+            elif embeddings.ndim == 2:
+                key0 = f"{net_idx[0]}"
+                key1 = f"_{net_idx[1]}"
+            else:
+                raise ValueError(f"Invalid ndim: {embeddings.ndim}")
+            network = embeddings[net_idx]
+            assert network is not None
+            for layer_idx, layer in enumerate(network.layers):
+                embedding_net_variables[
+                    f"filter_type_{key0}{suffix}/matrix_{layer_idx + 1}{key1}"
+                ] = layer.w
+                embedding_net_variables[
+                    f"filter_type_{key0}{suffix}/bias_{layer_idx + 1}{key1}"
+                ] = layer.b
+                if layer.idt is not None:
+                    embedding_net_variables[
+                        f"filter_type_{key0}{suffix}/idt_{layer_idx + 1}{key1}"
+                    ] = layer.idt.reshape(1, -1)
+                else:
+                    # prevent keyError
+                    embedding_net_variables[
+                        f"filter_type_{key0}{suffix}/idt_{layer_idx + 1}{key1}"
+                    ] = 0.0
+        return embedding_net_variables
diff --git a/deepmd/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py
similarity index 90%
rename from deepmd/descriptor/se_a.py
rename to deepmd/tf/descriptor/se_a.py
index 721bb0d534..7b22b3efd2 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/tf/descriptor/se_a.py
@@ -7,20 +7,23 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.dpmodel.utils.env_mat import (
+    EnvMat,
+)
+from deepmd.tf.common import (
     cast_precision,
     get_activation_func,
     get_np_precision,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.nvnmd.descriptor.se_a import (
+from deepmd.tf.nvnmd.descriptor.se_a import (
     build_davg_dstd,
     build_op_descriptor,
     check_switch_range,
@@ -28,40 +31,43 @@
     filter_GR2D,
     filter_lower_R42GR,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.utils.compress import (
+from deepmd.tf.utils.compress import (
     get_extra_side_embedding_net_variable,
     get_two_side_type_embedding,
     get_type_embedding,
     make_data,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_extra_embedding_net_suffix,
     get_extra_embedding_net_variables_from_graph_def,
     get_pattern_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
     embedding_net_rand_seed_shift,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.tabulate import (
+from deepmd.tf.utils.tabulate import (
     DPTabulate,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     embed_atom_type,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .descriptor import (
     Descriptor,
@@ -112,7 +118,7 @@ class DescrptSeA(DescrptSe):
 
     :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
     :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
-    :meth:`deepmd.utils.network.embedding_net`.
+    :meth:`deepmd.tf.utils.network.embedding_net`.
 
     Parameters
     ----------
@@ -120,7 +126,7 @@ class DescrptSeA(DescrptSe):
             The cut-off radius :math:`r_c`
     rcut_smth
             From where the environment matrix should be smoothed :math:`r_s`
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
@@ -148,6 +154,8 @@ class DescrptSeA(DescrptSe):
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     multi_task
             If the model has multi fitting nets to train.
+    env_protection: float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
 
     References
     ----------
@@ -161,7 +169,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
@@ -176,6 +184,7 @@ def __init__(
         multi_task: bool = False,
         spin: Optional[Spin] = None,
         stripped_type_embedding: bool = False,
+        env_protection: float = 0.0,  # not implement!!
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -183,6 +192,8 @@ def __init__(
             raise RuntimeError(
                 f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!"
             )
+        if env_protection != 0.0:
+            raise NotImplementedError("env_protection != 0.0 is not supported.")
         self.sel_a = sel
         self.rcut_r = rcut
         self.rcut_r_smth = rcut_smth
@@ -195,9 +206,12 @@ def __init__(
         self.trainable = trainable
         self.compress_activation_fn = get_activation_func(activation_function)
         self.filter_activation_fn = get_activation_func(activation_function)
+        self.activation_function_name = activation_function
         self.filter_precision = get_precision(precision)
         self.filter_np_precision = get_np_precision(precision)
+        self.orig_exclude_types = exclude_types
         self.exclude_types = set()
+        self.env_protection = env_protection
         for tt in exclude_types:
             assert len(tt) == 2
             self.exclude_types.add((tt[0], tt[1]))
@@ -333,15 +347,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
@@ -708,7 +722,10 @@ def prod_force_virial(
         tf.summary.histogram("net_derivative", net_deriv)
         net_deriv_reshape = tf.reshape(
             net_deriv,
-            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+            [
+                np.asarray(-1, dtype=np.int64),
+                natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64),
+            ],
         )
         force = op_module.prod_force_se_a(
             net_deriv_reshape,
@@ -1342,3 +1359,109 @@ def explicit_ntypes(self) -> bool:
         if self.stripped_type_embedding:
             return True
         return False
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        if cls is not DescrptSeA:
+            raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class", None)
+        data.pop("type", None)
+        embedding_net_variables = cls.deserialize_network(
+            data.pop("embeddings"), suffix=suffix
+        )
+        data.pop("env_mat")
+        variables = data.pop("@variables")
+        descriptor = cls(**data)
+        descriptor.embedding_net_variables = embedding_net_variables
+        descriptor.davg = variables["davg"].reshape(
+            descriptor.ntypes, descriptor.ndescrpt
+        )
+        descriptor.dstd = variables["dstd"].reshape(
+            descriptor.ntypes, descriptor.ndescrpt
+        )
+        return descriptor
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Parameters
+        ----------
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        if type(self) is not DescrptSeA:
+            raise NotImplementedError(
+                "Not implemented in class %s" % self.__class__.__name__
+            )
+        if self.stripped_type_embedding:
+            raise NotImplementedError(
+                "stripped_type_embedding is unsupported by the native model"
+            )
+        if (self.original_sel != self.sel_a).any():
+            raise NotImplementedError(
+                "Adjusting sel is unsupported by the native model"
+            )
+        if self.embedding_net_variables is None:
+            raise RuntimeError("init_variables must be called before serialize")
+        if self.spin is not None:
+            raise NotImplementedError("spin is unsupported")
+        assert self.davg is not None
+        assert self.dstd is not None
+        # TODO: tf: handle type embedding in DescrptSeA.serialize
+        # not sure how to handle type embedding - type embedding is not a model parameter,
+        # but instead a part of the input data. Maybe the interface should be refactored...
+
+        return {
+            "@class": "Descriptor",
+            "type": "se_e2_a",
+            "@version": 1,
+            "rcut": self.rcut_r,
+            "rcut_smth": self.rcut_r_smth,
+            "sel": self.sel_a,
+            "neuron": self.filter_neuron,
+            "axis_neuron": self.n_axis_neuron,
+            "resnet_dt": self.filter_resnet_dt,
+            "trainable": self.trainable,
+            "type_one_side": self.type_one_side,
+            "exclude_types": list(self.orig_exclude_types),
+            "env_protection": self.env_protection,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function_name,
+            "precision": self.filter_precision.name,
+            "embeddings": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=(1 if self.type_one_side else 2),
+                in_dim=1,
+                neuron=self.filter_neuron,
+                activation_function=self.activation_function_name,
+                resnet_dt=self.filter_resnet_dt,
+                variables=self.embedding_net_variables,
+                excluded_types=self.exclude_types,
+                suffix=suffix,
+            ),
+            "env_mat": EnvMat(self.rcut_r, self.rcut_r_smth).serialize(),
+            "@variables": {
+                "davg": self.davg.reshape(self.ntypes, self.nnei_a, 4),
+                "dstd": self.dstd.reshape(self.ntypes, self.nnei_a, 4),
+            },
+            "spin": self.spin,
+        }
diff --git a/deepmd/descriptor/se_a_ebd.py b/deepmd/tf/descriptor/se_a_ebd.py
similarity index 99%
rename from deepmd/descriptor/se_a_ebd.py
rename to deepmd/tf/descriptor/se_a_ebd.py
index 4816ec1569..f252bf114c 100644
--- a/deepmd/descriptor/se_a_ebd.py
+++ b/deepmd/tf/descriptor/se_a_ebd.py
@@ -6,15 +6,15 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
     one_layer,
 )
@@ -38,7 +38,7 @@ class DescrptSeAEbd(DescrptSeA):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net
@@ -74,7 +74,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
diff --git a/deepmd/descriptor/se_a_ebd_v2.py b/deepmd/tf/descriptor/se_a_ebd_v2.py
similarity index 96%
rename from deepmd/descriptor/se_a_ebd_v2.py
rename to deepmd/tf/descriptor/se_a_ebd_v2.py
index c6e3cebc71..0d2acbc9d5 100644
--- a/deepmd/descriptor/se_a_ebd_v2.py
+++ b/deepmd/tf/descriptor/se_a_ebd_v2.py
@@ -5,7 +5,7 @@
     Optional,
 )
 
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
 
@@ -31,7 +31,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
diff --git a/deepmd/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py
similarity index 97%
rename from deepmd/descriptor/se_a_ef.py
rename to deepmd/tf/descriptor/se_a_ef.py
index 32a62b48f3..f1201d30fb 100644
--- a/deepmd/descriptor/se_a_ef.py
+++ b/deepmd/tf/descriptor/se_a_ef.py
@@ -7,17 +7,17 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
@@ -42,7 +42,7 @@ class DescrptSeAEf(DescrptSe):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net
@@ -74,7 +74,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
@@ -180,15 +180,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
@@ -305,7 +305,7 @@ def __init__(
         op,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/tf/descriptor/se_a_mask.py
similarity index 95%
rename from deepmd/descriptor/se_a_mask.py
rename to deepmd/tf/descriptor/se_a_mask.py
index cc2e6b4fc8..d1ae5d7bad 100644
--- a/deepmd/descriptor/se_a_mask.py
+++ b/deepmd/tf/descriptor/se_a_mask.py
@@ -10,18 +10,18 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net_rand_seed_shift,
 )
 
@@ -73,7 +73,7 @@ class DescrptSeAMask(DescrptSeA):
 
     :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of
     :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at
-    :meth:`deepmd.utils.network.embedding_net`.
+    :meth:`deepmd.tf.utils.network.embedding_net`.
     Specially for descriptor se_a_mask is a concise implementation of se_a.
     The difference is that se_a_mask only considered a non-pbc system.
     And accept a mask matrix to indicate the atom i in frame j is a real atom or not.
@@ -82,7 +82,7 @@ class DescrptSeAMask(DescrptSeA):
 
     Parameters
     ----------
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the neighbor list.
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
@@ -117,7 +117,7 @@ class DescrptSeAMask(DescrptSeA):
 
     def __init__(
         self,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         axis_neuron: int = 8,
         resnet_dt: bool = False,
@@ -235,24 +235,23 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
             Additional keyword arguments.
         """
-        """
-        TODO: Since not all input atoms are real in se_a_mask,
-        statistics should be reimplemented for se_a_mask descriptor.
-        """
+        # TODO: implement compute_input_stats for DescrptSeAMask
+        # Since not all input atoms are real in se_a_mask,
+        # statistics should be reimplemented for se_a_mask descriptor.
 
         self.davg = None
         self.dstd = None
diff --git a/deepmd/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
similarity index 96%
rename from deepmd/descriptor/se_atten.py
rename to deepmd/tf/descriptor/se_atten.py
index 1ceda23065..51e34e9b08 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -12,11 +12,11 @@
     Version,
 )
 
-from deepmd.common import (
+from deepmd.tf.common import (
     cast_precision,
     get_np_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     TF_VERSION,
@@ -24,7 +24,7 @@
     op_module,
     tf,
 )
-from deepmd.nvnmd.descriptor.se_atten import (
+from deepmd.tf.nvnmd.descriptor.se_atten import (
     build_davg_dstd,
     build_op_descriptor,
     check_switch_range,
@@ -32,31 +32,34 @@
     filter_GR2D,
     filter_lower_R42GR,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.utils.compress import (
+from deepmd.tf.utils.compress import (
     get_extra_side_embedding_net_variable,
     get_two_side_type_embedding,
     make_data,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_attention_layer_variables_from_graph_def,
     get_extra_embedding_net_suffix,
     get_extra_embedding_net_variables_from_graph_def,
     get_pattern_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
     one_layer,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
-from deepmd.utils.tabulate import (
+from deepmd.tf.utils.tabulate import (
     DPTabulate,
 )
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
+)
 
 from .descriptor import (
     Descriptor,
@@ -68,6 +71,7 @@
 log = logging.getLogger(__name__)
 
 
+@Descriptor.register("dpa1")
 @Descriptor.register("se_atten")
 class DescrptSeAtten(DescrptSeA):
     r"""Smooth version descriptor with attention.
@@ -78,7 +82,7 @@ class DescrptSeAtten(DescrptSeA):
             The cut-off radius :math:`r_c`
     rcut_smth
             From where the environment matrix should be smoothed :math:`r_s`
-    sel : list[str]
+    sel : int
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
@@ -152,6 +156,16 @@ def __init__(
         multi_task: bool = False,
         stripped_type_embedding: bool = False,
         smooth_type_embdding: bool = False,
+        # not implemented
+        post_ln=True,
+        ffn=False,
+        ffn_embed_dim=1024,
+        scaling_factor=1.0,
+        head_num=1,
+        normalize=True,
+        temperature=None,
+        return_rot=False,
+        concat_output_tebd: bool = True,
         **kwargs,
     ) -> None:
         if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding):
@@ -159,6 +173,24 @@ def __init__(
                 "Set 'set_davg_zero' False in descriptor 'se_atten' "
                 "may cause unexpected incontinuity during model inference!"
             )
+        if not post_ln:
+            raise NotImplementedError("post_ln is not supported.")
+        if ffn:
+            raise NotImplementedError("ffn is not supported.")
+        if ffn_embed_dim != 1024:
+            raise NotImplementedError("ffn_embed_dim is not supported.")
+        if scaling_factor != 1.0:
+            raise NotImplementedError("scaling_factor is not supported.")
+        if head_num != 1:
+            raise NotImplementedError("head_num is not supported.")
+        if not normalize:
+            raise NotImplementedError("normalize is not supported.")
+        if temperature is not None:
+            raise NotImplementedError("temperature is not supported.")
+        if return_rot:
+            raise NotImplementedError("return_rot is not supported.")
+        if not concat_output_tebd:
+            raise NotImplementedError("concat_output_tebd is not supported.")
         DescrptSeA.__init__(
             self,
             rcut,
@@ -269,16 +301,16 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
             The vector for the number of atoms of the system and different types of atoms.
             If mixed_type is True, this para is blank. See real_natoms_vec.
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         mixed_type
@@ -990,6 +1022,7 @@ def _attention_layers(
                 input_xyz = tf.keras.layers.LayerNormalization(
                     beta_initializer=tf.constant_initializer(self.beta[i]),
                     gamma_initializer=tf.constant_initializer(self.gamma[i]),
+                    dtype=self.filter_precision,
                 )(input_xyz)
                 # input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n)
         return input_xyz
@@ -1339,7 +1372,7 @@ def build_type_exclude_mask(
         Notes
         -----
         This method has the similiar way to build the type exclude mask as
-        :meth:`deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask`.
+        :meth:`deepmd.tf.descriptor.descriptor.Descriptor.build_type_exclude_mask`.
         The mathmatical expression has been explained in that method.
         The difference is that the attention descriptor has provided the type of
         the neighbors (idx_j) that is not in order, so we use it from an extra
@@ -1373,7 +1406,7 @@ def build_type_exclude_mask(
 
         See Also
         --------
-        deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask
+        deepmd.tf.descriptor.descriptor.Descriptor.build_type_exclude_mask
         """
         # generate a mask
         # op returns ntypes when the neighbor doesn't exist, so we need to add 1
@@ -1424,9 +1457,5 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         local_jdata : dict
             The local data refer to the current class
         """
-        from deepmd.entrypoints.train import (
-            update_one_sel,
-        )
-
         local_jdata_cpy = local_jdata.copy()
-        return update_one_sel(global_jdata, local_jdata_cpy, True)
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True)
diff --git a/deepmd/descriptor/se_atten_v2.py b/deepmd/tf/descriptor/se_atten_v2.py
similarity index 99%
rename from deepmd/descriptor/se_atten_v2.py
rename to deepmd/tf/descriptor/se_atten_v2.py
index 0e1a70262f..784e02d84d 100644
--- a/deepmd/descriptor/se_atten_v2.py
+++ b/deepmd/tf/descriptor/se_atten_v2.py
@@ -25,7 +25,7 @@ class DescrptSeAttenV2(DescrptSeAtten):
             The cut-off radius :math:`r_c`
     rcut_smth
             From where the environment matrix should be smoothed :math:`r_s`
-    sel : list[str]
+    sel : int
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
diff --git a/deepmd/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py
similarity index 83%
rename from deepmd/descriptor/se_r.py
rename to deepmd/tf/descriptor/se_r.py
index ae926c339f..1443914aab 100644
--- a/deepmd/descriptor/se_r.py
+++ b/deepmd/tf/descriptor/se_r.py
@@ -7,34 +7,40 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.dpmodel.utils.env_mat import (
+    EnvMat,
+)
+from deepmd.tf.common import (
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
     embedding_net_rand_seed_shift,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.tabulate import (
+from deepmd.tf.utils.tabulate import (
     DPTabulate,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .descriptor import (
     Descriptor,
@@ -57,7 +63,7 @@ class DescrptSeR(DescrptSe):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net
@@ -85,7 +91,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         resnet_dt: bool = False,
         trainable: bool = True,
@@ -98,6 +104,7 @@ def __init__(
         uniform_seed: bool = False,
         multi_task: bool = False,
         spin: Optional[Spin] = None,
+        env_protection: float = 0.0,  # not implement!!
         **kwargs,
     ) -> None:
         """Constructor."""
@@ -105,6 +112,8 @@ def __init__(
             raise RuntimeError(
                 f"rcut_smth ({rcut_smth:f}) should be no more than rcut ({rcut:f})!"
             )
+        if env_protection != 0.0:
+            raise NotImplementedError("env_protection != 0.0 is not supported.")
         self.sel_r = sel
         self.rcut = rcut
         self.rcut_smth = rcut_smth
@@ -115,9 +124,11 @@ def __init__(
         self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron)
         self.trainable = trainable
         self.filter_activation_fn = get_activation_func(activation_function)
+        self.activation_function_name = activation_function
         self.filter_precision = get_precision(precision)
-        exclude_types = exclude_types
+        self.orig_exclude_types = exclude_types
         self.exclude_types = set()
+        self.env_protection = env_protection
         for tt in exclude_types:
             assert len(tt) == 2
             self.exclude_types.add((tt[0], tt[1]))
@@ -235,15 +246,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
@@ -500,7 +511,10 @@ def prod_force_virial(
         tf.summary.histogram("net_derivative", net_deriv)
         net_deriv_reshape = tf.reshape(
             net_deriv,
-            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+            [
+                np.asarray(-1, dtype=np.int64),
+                natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64),
+            ],
         )
         force = op_module.prod_force_se_r(
             net_deriv_reshape, self.descrpt_deriv, self.nlist, natoms
@@ -695,3 +709,97 @@ def _filter_r(
             result = tf.reduce_mean(xyz_scatter, axis=1) * res_rescale
 
         return result
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        if cls is not DescrptSeR:
+            raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        embedding_net_variables = cls.deserialize_network(
+            data.pop("embeddings"), suffix=suffix
+        )
+        data.pop("env_mat")
+        variables = data.pop("@variables")
+        descriptor = cls(**data)
+        descriptor.embedding_net_variables = embedding_net_variables
+        descriptor.davg = variables["davg"].reshape(
+            descriptor.ntypes, descriptor.ndescrpt
+        )
+        descriptor.dstd = variables["dstd"].reshape(
+            descriptor.ntypes, descriptor.ndescrpt
+        )
+        return descriptor
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Parameters
+        ----------
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        if type(self) is not DescrptSeR:
+            raise NotImplementedError(
+                "Not implemented in class %s" % self.__class__.__name__
+            )
+        if self.embedding_net_variables is None:
+            raise RuntimeError("init_variables must be called before serialize")
+        if self.spin is not None:
+            raise NotImplementedError("spin is unsupported")
+        assert self.davg is not None
+        assert self.dstd is not None
+        # TODO: tf: handle type embedding in DescrptSeR.serialize
+        # not sure how to handle type embedding - type embedding is not a model parameter,
+        # but instead a part of the input data. Maybe the interface should be refactored...
+        return {
+            "@class": "Descriptor",
+            "type": "se_r",
+            "@version": 1,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel_r,
+            "neuron": self.filter_neuron,
+            "resnet_dt": self.filter_resnet_dt,
+            "trainable": self.trainable,
+            "type_one_side": self.type_one_side,
+            "exclude_types": list(self.orig_exclude_types),
+            "env_protection": self.env_protection,
+            "set_davg_zero": self.set_davg_zero,
+            "activation_function": self.activation_function_name,
+            "precision": self.filter_precision.name,
+            "embeddings": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=(1 if self.type_one_side else 2),
+                in_dim=1,
+                neuron=self.filter_neuron,
+                activation_function=self.activation_function_name,
+                resnet_dt=self.filter_resnet_dt,
+                variables=self.embedding_net_variables,
+                excluded_types=self.exclude_types,
+                suffix=suffix,
+            ),
+            "env_mat": EnvMat(self.rcut, self.rcut_smth).serialize(),
+            "@variables": {
+                "davg": self.davg.reshape(self.ntypes, self.nnei_r, 1),
+                "dstd": self.dstd.reshape(self.ntypes, self.nnei_r, 1),
+            },
+            "spin": self.spin,
+        }
diff --git a/deepmd/descriptor/se_t.py b/deepmd/tf/descriptor/se_t.py
similarity index 97%
rename from deepmd/descriptor/se_t.py
rename to deepmd/tf/descriptor/se_t.py
index d0c9fcbc2e..4f6cda6c9c 100644
--- a/deepmd/descriptor/se_t.py
+++ b/deepmd/tf/descriptor/se_t.py
@@ -7,29 +7,29 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
     embedding_net_rand_seed_shift,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
-from deepmd.utils.tabulate import (
+from deepmd.tf.utils.tabulate import (
     DPTabulate,
 )
 
@@ -56,7 +56,7 @@ class DescrptSeT(DescrptSe):
             The cut-off radius
     rcut_smth
             From where the environment matrix should be smoothed
-    sel : list[str]
+    sel : list[int]
             sel[i] specifies the maxmum number of type i atoms in the cut-off radius
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net
@@ -81,7 +81,7 @@ def __init__(
         self,
         rcut: float,
         rcut_smth: float,
-        sel: List[str],
+        sel: List[int],
         neuron: List[int] = [24, 48, 96],
         resnet_dt: bool = False,
         trainable: bool = True,
@@ -225,15 +225,15 @@ def compute_input_stats(
         Parameters
         ----------
         data_coord
-            The coordinates. Can be generated by deepmd.model.make_stat_input
+            The coordinates. Can be generated by deepmd.tf.model.make_stat_input
         data_box
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         data_atype
-            The atom types. Can be generated by deepmd.model.make_stat_input
+            The atom types. Can be generated by deepmd.tf.model.make_stat_input
         natoms_vec
-            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input
+            The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.tf.model.make_stat_input
         mesh
-            The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input
+            The mesh for neighbor searching. Can be generated by deepmd.tf.model.make_stat_input
         input_dict
             Dictionary for additional input
         **kwargs
@@ -513,7 +513,10 @@ def prod_force_virial(
         [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape)
         net_deriv_reshape = tf.reshape(
             net_deriv,
-            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+            [
+                np.asarray(-1, dtype=np.int64),
+                natoms[0] * np.asarray(self.ndescrpt, dtype=np.int64),
+            ],
         )
         force = op_module.prod_force_se_a(
             net_deriv_reshape,
diff --git a/deepmd/tf/entrypoints/__init__.py b/deepmd/tf/entrypoints/__init__.py
new file mode 100644
index 0000000000..9c3a8b31e1
--- /dev/null
+++ b/deepmd/tf/entrypoints/__init__.py
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Submodule that contains all the DeePMD-Kit entry point scripts."""
+
+from ..infer.model_devi import (
+    make_model_devi,
+)
+from .compress import (
+    compress,
+)
+from .convert import (
+    convert,
+)
+from .doc import (
+    doc_train_input,
+)
+from .freeze import (
+    freeze,
+)
+from .gui import (
+    start_dpgui,
+)
+from .neighbor_stat import (
+    neighbor_stat,
+)
+from .test import (
+    test,
+)
+
+# import `train` as `train_dp` to avoid the conflict of the
+# module name `train` and the function name `train`
+from .train import train as train_dp
+from .transfer import (
+    transfer,
+)
+
+__all__ = [
+    "doc_train_input",
+    "freeze",
+    "test",
+    "train_dp",
+    "transfer",
+    "compress",
+    "doc_train_input",
+    "make_model_devi",
+    "convert",
+    "neighbor_stat",
+    "start_dpgui",
+]
diff --git a/deepmd/entrypoints/compress.py b/deepmd/tf/entrypoints/compress.py
similarity index 93%
rename from deepmd/entrypoints/compress.py
rename to deepmd/tf/entrypoints/compress.py
index 61d6dfcb44..1f2bbc93a0 100644
--- a/deepmd/entrypoints/compress.py
+++ b/deepmd/tf/entrypoints/compress.py
@@ -8,34 +8,35 @@
     Optional,
 )
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_loader,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
     tf,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphTooLargeError,
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
     load_graph_def,
 )
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
+)
 
 from .freeze import (
     freeze,
 )
 from .train import (
-    get_min_nbor_dist,
-    get_rcut,
     train,
 )
 
@@ -115,7 +116,10 @@ def compress(
             log.info("stage 0: compute the min_nbor_dist")
             jdata = j_loader(training_script)
             jdata = update_deepmd_input(jdata)
-            t_min_nbor_dist = get_min_nbor_dist(jdata, get_rcut(jdata))
+            update_sel = UpdateSel()
+            t_min_nbor_dist = update_sel.get_min_nbor_dist(
+                jdata, update_sel.get_rcut(jdata)
+            )
 
     _check_compress_type(graph)
 
diff --git a/deepmd/entrypoints/convert.py b/deepmd/tf/entrypoints/convert.py
similarity index 97%
rename from deepmd/entrypoints/convert.py
rename to deepmd/tf/entrypoints/convert.py
index bea047ba72..17c8667362 100644
--- a/deepmd/entrypoints/convert.py
+++ b/deepmd/tf/entrypoints/convert.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_10_to_21,
     convert_012_to_21,
     convert_12_to_21,
diff --git a/deepmd/tf/entrypoints/doc.py b/deepmd/tf/entrypoints/doc.py
new file mode 100644
index 0000000000..941f989109
--- /dev/null
+++ b/deepmd/tf/entrypoints/doc.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.entrypoints.doc import (
+    doc_train_input,
+)
+
+__all__ = ["doc_train_input"]
diff --git a/deepmd/entrypoints/freeze.py b/deepmd/tf/entrypoints/freeze.py
similarity index 93%
rename from deepmd/entrypoints/freeze.py
rename to deepmd/tf/entrypoints/freeze.py
index 22f3cb80b4..3d2a609797 100755
--- a/deepmd/entrypoints/freeze.py
+++ b/deepmd/tf/entrypoints/freeze.py
@@ -12,6 +12,9 @@
 from os.path import (
     abspath,
 )
+from pathlib import (
+    Path,
+)
 from typing import (
     List,
     Optional,
@@ -21,22 +24,22 @@
 import google.protobuf.message
 
 # load grad of force module
-import deepmd.op  # noqa: F401
-from deepmd.env import (
+import deepmd.tf.op  # noqa: F401
+from deepmd.tf.env import (
     FITTING_NET_PATTERN,
     REMOVE_SUFFIX_DICT,
     tf,
 )
-from deepmd.nvnmd.entrypoints.freeze import (
+from deepmd.tf.nvnmd.entrypoints.freeze import (
     save_weight,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphTooLargeError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_pattern_nodes_from_graph_def,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
@@ -149,10 +152,8 @@ def _modify_model_suffix(output_graph_def, out_suffix, freeze_type):
             else:
                 jdata["training"]["training_data"] = {}
                 log.warning(
-                    "The fitting net {} has no training data in input script, resulting in "
-                    "untrained frozen model, and cannot be compressed directly! ".format(
-                        out_suffix
-                    )
+                    f"The fitting net {out_suffix} has no training data in input script, resulting in "
+                    "untrained frozen model, and cannot be compressed directly! "
                 )
             # loss
             if "loss_dict" in jdata:
@@ -356,13 +357,21 @@ def freeze_graph(
     output_node = _make_node_names(
         freeze_type, modifier, out_suffix=out_suffix, node_names=node_names
     )
+    # see #3334
+    optional_node = [
+        "train_attr/min_nbor_dist",
+        "fitting_attr/aparam_nall",
+        "spin_attr/ntypes_spin",
+    ]
     different_set = set(output_node) - set(input_node)
     if different_set:
-        log.warning(
-            "The following nodes are not in the graph: %s. "
-            "Skip freezeing these nodes. You may be freezing "
-            "a checkpoint generated by an old version." % different_set
-        )
+        different_set -= set(optional_node)
+        if different_set:
+            log.warning(
+                "The following nodes are not in the graph: %s. "
+                "Skip freezeing these nodes. You may be freezing "
+                "a checkpoint generated by an old version." % different_set
+            )
         # use intersection as output list
         output_node = list(set(output_node) & set(input_node))
     log.info(f"The following nodes will be frozen: {output_node}")
@@ -479,7 +488,7 @@ def freeze(
     Parameters
     ----------
     checkpoint_folder : str
-        location of the folder with model
+        location of either the folder with checkpoint or the checkpoint prefix
     output : str
         output file name
     node_names : Optional[str], optional
@@ -492,8 +501,11 @@ def freeze(
         other arguments
     """
     # We retrieve our checkpoint fullpath
-    checkpoint = tf.train.get_checkpoint_state(checkpoint_folder)
-    input_checkpoint = checkpoint.model_checkpoint_path
+    if Path(checkpoint_folder).is_dir():
+        checkpoint = tf.train.get_checkpoint_state(checkpoint_folder)
+        input_checkpoint = checkpoint.model_checkpoint_path
+    else:
+        input_checkpoint = checkpoint_folder
 
     # expand the output file to full path
     output_graph = abspath(output)
diff --git a/deepmd/tf/entrypoints/gui.py b/deepmd/tf/entrypoints/gui.py
new file mode 100644
index 0000000000..ffeee29f7d
--- /dev/null
+++ b/deepmd/tf/entrypoints/gui.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.entrypoints.gui import (
+    start_dpgui,
+)
+
+__all__ = ["start_dpgui"]
diff --git a/deepmd/entrypoints/ipi.py b/deepmd/tf/entrypoints/ipi.py
similarity index 95%
rename from deepmd/entrypoints/ipi.py
rename to deepmd/tf/entrypoints/ipi.py
index da287ff3de..1631a35c2e 100644
--- a/deepmd/entrypoints/ipi.py
+++ b/deepmd/tf/entrypoints/ipi.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Use dp_ipi inside the Python package."""
+
 import os
 import subprocess
 import sys
@@ -7,7 +8,7 @@
     List,
 )
 
-from deepmd.lmp import (
+from deepmd.tf.lmp import (
     get_op_dir,
 )
 
diff --git a/deepmd/tf/entrypoints/main.py b/deepmd/tf/entrypoints/main.py
new file mode 100644
index 0000000000..493e5b7aa4
--- /dev/null
+++ b/deepmd/tf/entrypoints/main.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""DeePMD-Kit entry point module."""
+
+import argparse
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+from deepmd.backend.suffix import (
+    format_model_suffix,
+)
+from deepmd.main import (
+    get_ll,
+    main_parser,
+    parse_args,
+)
+from deepmd.tf.common import (
+    clear_session,
+)
+from deepmd.tf.entrypoints import (
+    compress,
+    convert,
+    freeze,
+    train_dp,
+    transfer,
+)
+from deepmd.tf.loggers import (
+    set_log_handles,
+)
+from deepmd.tf.nvnmd.entrypoints.train import (
+    train_nvnmd,
+)
+
+__all__ = ["main", "parse_args", "get_ll", "main_parser"]
+
+
+def main(args: Optional[Union[List[str], argparse.Namespace]] = None):
+    """DeePMD-Kit entry point.
+
+    Parameters
+    ----------
+    args : List[str] or argparse.Namespace, optional
+        list of command line arguments, used to avoid calling from the subprocess,
+        as it is quite slow to import tensorflow; if Namespace is given, it will
+        be used directly
+
+    Raises
+    ------
+    RuntimeError
+        if no command was input
+    """
+    if args is not None:
+        clear_session()
+
+    if not isinstance(args, argparse.Namespace):
+        args = parse_args(args=args)
+
+    # do not set log handles for None, it is useless
+    # log handles for train will be set separatelly
+    # when the use of MPI will be determined in `RunOptions`
+    if args.command not in (None, "train"):
+        set_log_handles(args.log_level, Path(args.log_path) if args.log_path else None)
+
+    dict_args = vars(args)
+
+    if args.command == "train":
+        train_dp(**dict_args)
+    elif args.command == "freeze":
+        dict_args["output"] = format_model_suffix(
+            dict_args["output"], preferred_backend=args.backend, strict_prefer=True
+        )
+        freeze(**dict_args)
+    elif args.command == "transfer":
+        transfer(**dict_args)
+    elif args.command == "compress":
+        compress(**dict_args)
+    elif args.command == "convert-from":
+        convert(**dict_args)
+    elif args.command == "train-nvnmd":  # nvnmd
+        train_nvnmd(**dict_args)
+    elif args.command is None:
+        pass
+    else:
+        raise RuntimeError(f"unknown command {args.command}")
+
+    if args is not None:
+        clear_session()
diff --git a/deepmd/tf/entrypoints/neighbor_stat.py b/deepmd/tf/entrypoints/neighbor_stat.py
new file mode 100644
index 0000000000..5d31cdd179
--- /dev/null
+++ b/deepmd/tf/entrypoints/neighbor_stat.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
+
+__all__ = ["neighbor_stat"]
diff --git a/deepmd/tf/entrypoints/test.py b/deepmd/tf/entrypoints/test.py
new file mode 100644
index 0000000000..8b4ca64179
--- /dev/null
+++ b/deepmd/tf/entrypoints/test.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.entrypoints.test import (
+    test,
+)
+
+__all__ = ["test"]
diff --git a/deepmd/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
similarity index 55%
rename from deepmd/entrypoints/train.py
rename to deepmd/tf/entrypoints/train.py
index 227aa13644..e573423fc3 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/tf/entrypoints/train.py
@@ -13,53 +13,41 @@
     Optional,
 )
 
-from deepmd.common import (
-    data_requirement,
-    expand_sys_str,
+from deepmd.tf.common import (
     j_loader,
     j_must_have,
 )
-from deepmd.env import (
-    GLOBAL_ENER_FLOAT_PRECISION,
+from deepmd.tf.env import (
     reset_default_tf_session_config,
     tf,
 )
-from deepmd.infer.data_modifier import (
+from deepmd.tf.infer.data_modifier import (
     DipoleChargeModifier,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
-from deepmd.train.run_options import (
-    BUILD,
-    CITATION,
-    WELCOME,
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils import random as dp_random
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils import random as dp_random
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
-    DeepmdDataSystem,
-)
-from deepmd.utils.finetune import (
+from deepmd.tf.utils.finetune import (
     replace_model_params_with_pretrained_model,
 )
-from deepmd.utils.multi_init import (
+from deepmd.tf.utils.multi_init import (
     replace_model_params_with_frz_multi_model,
 )
-from deepmd.utils.neighbor_stat import (
-    NeighborStat,
-)
-from deepmd.utils.path import (
-    DPPath,
+from deepmd.utils.data_system import (
+    get_data,
 )
 
 __all__ = ["train"]
@@ -159,9 +147,6 @@ def train(
         dtype=tf.string,
     )
 
-    for message in WELCOME + CITATION + BUILD:
-        log.info(message)
-
     run_opt.print_resource_summary()
     if origin_type_map is not None:
         jdata["model"]["origin_type_map"] = origin_type_map
@@ -291,53 +276,6 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
         log.info("finished compressing")
 
 
-def get_data(jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=False):
-    systems = j_must_have(jdata, "systems")
-    if isinstance(systems, str):
-        systems = expand_sys_str(systems)
-    elif isinstance(systems, list):
-        systems = systems.copy()
-    help_msg = "Please check your setting for data systems"
-    # check length of systems
-    if len(systems) == 0:
-        msg = "cannot find valid a data system"
-        log.fatal(msg)
-        raise OSError(msg, help_msg)
-    # rougly check all items in systems are valid
-    for ii in systems:
-        ii = DPPath(ii)
-        if not ii.is_dir():
-            msg = f"dir {ii} is not a valid dir"
-            log.fatal(msg)
-            raise OSError(msg, help_msg)
-        if not (ii / "type.raw").is_file():
-            msg = f"dir {ii} is not a valid data system dir"
-            log.fatal(msg)
-            raise OSError(msg, help_msg)
-
-    batch_size = j_must_have(jdata, "batch_size")
-    sys_probs = jdata.get("sys_probs", None)
-    auto_prob = jdata.get("auto_prob", "prob_sys_size")
-    optional_type_map = not multi_task_mode
-
-    data = DeepmdDataSystem(
-        systems=systems,
-        batch_size=batch_size,
-        test_size=1,  # to satisfy the old api
-        shuffle_test=True,  # to satisfy the old api
-        rcut=rcut,
-        type_map=type_map,
-        optional_type_map=optional_type_map,
-        modifier=modifier,
-        trn_all_set=True,  # sample from all sets
-        sys_probs=sys_probs,
-        auto_prob_style=auto_prob,
-    )
-    data.add_dict(data_requirement)
-
-    return data
-
-
 def get_modifier(modi_data=None):
     modifier: Optional[DipoleChargeModifier]
     if modi_data is not None:
@@ -356,154 +294,6 @@ def get_modifier(modi_data=None):
     return modifier
 
 
-def get_rcut(jdata):
-    if jdata["model"].get("type") == "pairwise_dprc":
-        return max(
-            jdata["model"]["qm_model"]["descriptor"]["rcut"],
-            jdata["model"]["qmmm_model"]["descriptor"]["rcut"],
-        )
-    descrpt_data = jdata["model"]["descriptor"]
-    rcut_list = []
-    if descrpt_data["type"] == "hybrid":
-        for ii in descrpt_data["list"]:
-            rcut_list.append(ii["rcut"])
-    else:
-        rcut_list.append(descrpt_data["rcut"])
-    return max(rcut_list)
-
-
-def get_type_map(jdata):
-    return jdata["model"].get("type_map", None)
-
-
-def get_nbor_stat(jdata, rcut, one_type: bool = False):
-    # it seems that DeepmdDataSystem does not need rcut
-    # it's not clear why there is an argument...
-    # max_rcut = get_rcut(jdata)
-    max_rcut = rcut
-    type_map = get_type_map(jdata)
-
-    if type_map and len(type_map) == 0:
-        type_map = None
-    multi_task_mode = "data_dict" in jdata["training"]
-    if not multi_task_mode:
-        train_data = get_data(
-            jdata["training"]["training_data"], max_rcut, type_map, None
-        )
-        train_data.get_batch()
-    else:
-        assert (
-            type_map is not None
-        ), "Data stat in multi-task mode must have available type_map! "
-        train_data = None
-        for systems in jdata["training"]["data_dict"]:
-            tmp_data = get_data(
-                jdata["training"]["data_dict"][systems]["training_data"],
-                max_rcut,
-                type_map,
-                None,
-            )
-            tmp_data.get_batch()
-            assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
-            if train_data is None:
-                train_data = tmp_data
-            else:
-                train_data.system_dirs += tmp_data.system_dirs
-                train_data.data_systems += tmp_data.data_systems
-                train_data.natoms += tmp_data.natoms
-                train_data.natoms_vec += tmp_data.natoms_vec
-                train_data.default_mesh += tmp_data.default_mesh
-    data_ntypes = train_data.get_ntypes()
-    if type_map is not None:
-        map_ntypes = len(type_map)
-    else:
-        map_ntypes = data_ntypes
-    ntypes = max([map_ntypes, data_ntypes])
-
-    neistat = NeighborStat(ntypes, rcut, one_type=one_type)
-
-    min_nbor_dist, max_nbor_size = neistat.get_stat(train_data)
-
-    # moved from traier.py as duplicated
-    # TODO: this is a simple fix but we should have a clear
-    #       architecture to call neighbor stat
-    tf.constant(
-        min_nbor_dist,
-        name="train_attr/min_nbor_dist",
-        dtype=GLOBAL_ENER_FLOAT_PRECISION,
-    )
-    tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32)
-    return min_nbor_dist, max_nbor_size
-
-
-def get_sel(jdata, rcut, one_type: bool = False):
-    _, max_nbor_size = get_nbor_stat(jdata, rcut, one_type=one_type)
-    return max_nbor_size
-
-
-def get_min_nbor_dist(jdata, rcut):
-    min_nbor_dist, _ = get_nbor_stat(jdata, rcut)
-    return min_nbor_dist
-
-
-def parse_auto_sel(sel):
-    if not isinstance(sel, str):
-        return False
-    words = sel.split(":")
-    if words[0] == "auto":
-        return True
-    else:
-        return False
-
-
-def parse_auto_sel_ratio(sel):
-    if not parse_auto_sel(sel):
-        raise RuntimeError(f"invalid auto sel format {sel}")
-    else:
-        words = sel.split(":")
-        if len(words) == 1:
-            ratio = 1.1
-        elif len(words) == 2:
-            ratio = float(words[1])
-        else:
-            raise RuntimeError(f"invalid auto sel format {sel}")
-        return ratio
-
-
-def wrap_up_4(xx):
-    return 4 * ((int(xx) + 3) // 4)
-
-
-def update_one_sel(jdata, descriptor, one_type: bool = False):
-    rcut = descriptor["rcut"]
-    tmp_sel = get_sel(
-        jdata,
-        rcut,
-        one_type=one_type,
-    )
-    sel = descriptor["sel"]
-    if isinstance(sel, int):
-        # convert to list and finnally convert back to int
-        sel = [sel]
-    if parse_auto_sel(descriptor["sel"]):
-        ratio = parse_auto_sel_ratio(descriptor["sel"])
-        descriptor["sel"] = sel = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel]
-    else:
-        # sel is set by user
-        for ii, (tt, dd) in enumerate(zip(tmp_sel, sel)):
-            if dd and tt > dd:
-                # we may skip warning for sel=0, where the user is likely
-                # to exclude such type in the descriptor
-                log.warning(
-                    "sel of type %d is not enough! The expected value is "
-                    "not less than %d, but you set it to %d. The accuracy"
-                    " of your model may get worse." % (ii, tt, dd)
-                )
-    if one_type:
-        descriptor["sel"] = sel = sum(sel)
-    return descriptor
-
-
 def update_sel(jdata):
     log.info(
         "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
diff --git a/deepmd/entrypoints/transfer.py b/deepmd/tf/entrypoints/transfer.py
similarity index 99%
rename from deepmd/entrypoints/transfer.py
rename to deepmd/tf/entrypoints/transfer.py
index 535b32ec09..7c90c77de8 100644
--- a/deepmd/entrypoints/transfer.py
+++ b/deepmd/tf/entrypoints/transfer.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     TRANSFER_PATTERN,
     tf,
 )
diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
new file mode 100644
index 0000000000..c7873b951c
--- /dev/null
+++ b/deepmd/tf/env.py
@@ -0,0 +1,456 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Module that sets tensorflow working environment and exports inportant constants."""
+
+import ctypes
+import os
+import platform
+from importlib import (
+    import_module,
+    reload,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+import numpy as np
+from packaging.version import (
+    Version,
+)
+
+from deepmd.env import (
+    GLOBAL_CONFIG,
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    SHARED_LIB_DIR,
+    SHARED_LIB_MODULE,
+)
+from deepmd.env import get_default_nthreads as get_tf_default_nthreads
+from deepmd.env import (
+    global_float_prec,
+)
+from deepmd.env import set_default_nthreads as set_tf_default_nthreads
+from deepmd.env import (
+    set_env_if_empty,
+)
+
+if TYPE_CHECKING:
+    from types import (
+        ModuleType,
+    )
+
+
+def dlopen_library(module: str, filename: str):
+    """Dlopen a library from a module.
+
+    Parameters
+    ----------
+    module : str
+        The module name.
+    filename : str
+        The library filename pattern.
+    """
+    try:
+        m = import_module(module)
+    except ModuleNotFoundError:
+        pass
+    else:
+        libs = sorted(Path(m.__path__[0]).glob(filename))
+        # hope that there is only one version installed...
+        if len(libs):
+            ctypes.CDLL(str(libs[0].absolute()))
+
+
+# dlopen pip cuda library before tensorflow
+if platform.system() == "Linux":
+    dlopen_library("nvidia.cuda_runtime.lib", "libcudart.so*")
+    dlopen_library("nvidia.cublas.lib", "libcublasLt.so*")
+    dlopen_library("nvidia.cublas.lib", "libcublas.so*")
+    dlopen_library("nvidia.cufft.lib", "libcufft.so*")
+    dlopen_library("nvidia.curand.lib", "libcurand.so*")
+    dlopen_library("nvidia.cusolver.lib", "libcusolver.so*")
+    dlopen_library("nvidia.cusparse.lib", "libcusparse.so*")
+    dlopen_library("nvidia.cudnn.lib", "libcudnn.so*")
+
+# keras 3 is incompatible with tf.compat.v1
+# https://keras.io/getting_started/#tensorflow--keras-2-backwards-compatibility
+os.environ["TF_USE_LEGACY_KERAS"] = "1"
+# import tensorflow v1 compatability
+try:
+    import tensorflow.compat.v1 as tf
+
+    tf.disable_v2_behavior()
+except ImportError:
+    import tensorflow as tf
+try:
+    import tensorflow.compat.v2 as tfv2
+except ImportError:
+    tfv2 = None
+
+__all__ = [
+    "GLOBAL_CONFIG",
+    "GLOBAL_TF_FLOAT_PRECISION",
+    "GLOBAL_NP_FLOAT_PRECISION",
+    "GLOBAL_ENER_FLOAT_PRECISION",
+    "global_float_prec",
+    "global_cvt_2_tf_float",
+    "global_cvt_2_ener_float",
+    "MODEL_VERSION",
+    "SHARED_LIB_DIR",
+    "SHARED_LIB_MODULE",
+    "default_tf_session_config",
+    "reset_default_tf_session_config",
+    "op_module",
+    "op_grads_module",
+    "TRANSFER_PATTERN",
+    "FITTING_NET_PATTERN",
+    "EMBEDDING_NET_PATTERN",
+    "TYPE_EMBEDDING_PATTERN",
+    "ATTENTION_LAYER_PATTERN",
+    "REMOVE_SUFFIX_DICT",
+    "TF_VERSION",
+    "tf_py_version",
+]
+
+
+# Python library version
+try:
+    tf_py_version = tf.version.VERSION
+except AttributeError:
+    tf_py_version = tf.__version__
+
+# subpatterns:
+# \1: type of centeral atom
+# \2: weight name
+# \3: layer index
+# The rest: types of neighbor atoms
+# IMPORTANT: the order is critical to match the pattern
+EMBEDDING_NET_PATTERN = str(
+    r"filter_type_(\d+)/(matrix)_(\d+)_(\d+)|"
+    r"filter_type_(\d+)/(bias)_(\d+)_(\d+)|"
+    r"filter_type_(\d+)/(idt)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(matrix)_(\d+)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(matrix)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(matrix)_(\d+)|"
+    r"filter_type_(all)/(bias)_(\d+)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(bias)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(bias)_(\d+)|"
+    r"filter_type_(all)/(idt)_(\d+)_(\d+)|"
+    r"filter_type_(all)/(idt)_(\d+)|"
+)[:-1]
+
+# subpatterns:
+# \1: layer index or "final"
+# \2: type of centeral atom, optional
+# the last: weight name
+FITTING_NET_PATTERN = str(
+    r"layer_(\d+)/(matrix)|"
+    r"layer_(\d+)_type_(\d+)/(matrix)|"
+    r"layer_(\d+)/(bias)|"
+    r"layer_(\d+)_type_(\d+)/(bias)|"
+    r"layer_(\d+)/(idt)|"
+    r"layer_(\d+)_type_(\d+)/(idt)|"
+    r"(final)_layer/(matrix)|"
+    r"(final)_layer_type_(\d+)/(matrix)|"
+    r"(final)_layer/(bias)|"
+    r"(final)_layer_type_(\d+)/(bias)|"
+    # TODO: supporting extracting parameters for shared layers
+    # not sure how to parse for shared layers...
+    # layer_name
+    r"share_.+_type_\d/matrix|"
+    r"share_.+_type_\d/bias|"
+    r"share_.+_type_\d/idt|"
+    r"share_.+/matrix|"
+    r"share_.+/bias|"
+    r"share_.+/idt|"
+)[:-1]
+
+# subpatterns:
+# \1: weight name
+# \2: layer index
+TYPE_EMBEDDING_PATTERN = str(
+    r"type_embed_net/(matrix)_(\d+)|"
+    r"type_embed_net/(bias)_(\d+)|"
+    r"type_embed_net/(idt)_(\d+)|"
+)[:-1]
+
+ATTENTION_LAYER_PATTERN = str(
+    r"attention_layer_\d+/c_query/matrix|"
+    r"attention_layer_\d+/c_query/bias|"
+    r"attention_layer_\d+/c_key/matrix|"
+    r"attention_layer_\d+/c_key/bias|"
+    r"attention_layer_\d+/c_value/matrix|"
+    r"attention_layer_\d+/c_value/bias|"
+    r"attention_layer_\d+/c_out/matrix|"
+    r"attention_layer_\d+/c_out/bias|"
+    r"attention_layer_\d+/layer_normalization/beta|"
+    r"attention_layer_\d+/layer_normalization/gamma|"
+    r"attention_layer_\d+/layer_normalization_\d+/beta|"
+    r"attention_layer_\d+/layer_normalization_\d+/gamma|"
+)
+
+TRANSFER_PATTERN = (
+    EMBEDDING_NET_PATTERN
+    + FITTING_NET_PATTERN
+    + TYPE_EMBEDDING_PATTERN
+    + str(
+        r"descrpt_attr/t_avg|"
+        r"descrpt_attr/t_std|"
+        r"fitting_attr/t_fparam_avg|"
+        r"fitting_attr/t_fparam_istd|"
+        r"fitting_attr/t_aparam_avg|"
+        r"fitting_attr/t_aparam_istd|"
+        r"model_attr/t_tab_info|"
+        r"model_attr/t_tab_data|"
+    )
+)
+
+REMOVE_SUFFIX_DICT = {
+    "model_attr/sel_type_{}": "model_attr/sel_type",
+    "model_attr/output_dim_{}": "model_attr/output_dim",
+    "_{}/": "/",
+    # when atom_ener is set
+    "_{}_1/": "_1/",
+    "o_energy_{}": "o_energy",
+    "o_force_{}": "o_force",
+    "o_virial_{}": "o_virial",
+    "o_atom_energy_{}": "o_atom_energy",
+    "o_atom_virial_{}": "o_atom_virial",
+    "o_dipole_{}": "o_dipole",
+    "o_global_dipole_{}": "o_global_dipole",
+    "o_polar_{}": "o_polar",
+    "o_global_polar_{}": "o_global_polar",
+    "o_rmat_{}": "o_rmat",
+    "o_rmat_deriv_{}": "o_rmat_deriv",
+    "o_nlist_{}": "o_nlist",
+    "o_rij_{}": "o_rij",
+    "o_dm_force_{}": "o_dm_force",
+    "o_dm_virial_{}": "o_dm_virial",
+    "o_dm_av_{}": "o_dm_av",
+    "o_wfc_{}": "o_wfc",
+}
+
+
+def set_mkl():
+    """Tuning MKL for the best performance.
+
+    References
+    ----------
+    TF overview
+    https://www.tensorflow.org/guide/performance/overview
+
+    Fixing an issue in numpy built by MKL
+    https://github.com/ContinuumIO/anaconda-issues/issues/11367
+    https://github.com/numpy/numpy/issues/12374
+
+    check whether the numpy is built by mkl, see
+    https://github.com/numpy/numpy/issues/14751
+    """
+    try:
+        is_mkl = (
+            np.show_config("dicts")
+            .get("Build Dependencies", {})
+            .get("blas", {})
+            .get("name", "")
+            .lower()
+            .startswith("mkl")
+        )
+    except TypeError:
+        is_mkl = "mkl_rt" in np.__config__.get_info("blas_mkl_info").get(
+            "libraries", []
+        )
+    if is_mkl:
+        set_env_if_empty("KMP_BLOCKTIME", "0")
+        set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
+        reload(np)
+
+
+def get_tf_session_config() -> Any:
+    """Configure tensorflow session.
+
+    Returns
+    -------
+    Any
+        session configure object
+    """
+    set_tf_default_nthreads()
+    intra, inter = get_tf_default_nthreads()
+    if int(os.environ.get("DP_JIT", 0)):
+        set_env_if_empty("TF_XLA_FLAGS", "--tf_xla_auto_jit=2")
+        # pip cuda package
+        if platform.system() == "Linux":
+            try:
+                m = import_module("nvidia.cuda_nvcc")
+            except ModuleNotFoundError:
+                pass
+            else:
+                cuda_data_dir = str(Path(m.__file__).parent.absolute())
+                set_env_if_empty(
+                    "XLA_FLAGS", "--xla_gpu_cuda_data_dir=" + cuda_data_dir
+                )
+    config = tf.ConfigProto(
+        gpu_options=tf.GPUOptions(allow_growth=True),
+        intra_op_parallelism_threads=intra,
+        inter_op_parallelism_threads=inter,
+    )
+    if Version(tf_py_version) >= Version("1.15") and int(
+        os.environ.get("DP_AUTO_PARALLELIZATION", 0)
+    ):
+        config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+    return config
+
+
+default_tf_session_config = get_tf_session_config()
+
+
+def reset_default_tf_session_config(cpu_only: bool):
+    """Limit tensorflow session to CPU or not.
+
+    Parameters
+    ----------
+    cpu_only : bool
+        If enabled, no GPU device is visible to the TensorFlow Session.
+    """
+    global default_tf_session_config
+    if cpu_only:
+        default_tf_session_config.device_count["GPU"] = 0
+    else:
+        if "GPU" in default_tf_session_config.device_count:
+            del default_tf_session_config.device_count["GPU"]
+
+
+def get_module(module_name: str) -> "ModuleType":
+    """Load force module.
+
+    Returns
+    -------
+    ModuleType
+        loaded force module
+
+    Raises
+    ------
+    FileNotFoundError
+        if module is not found in directory
+    """
+    if platform.system() == "Windows":
+        ext = ".dll"
+        prefix = ""
+    # elif platform.system() == "Darwin":
+    #    ext = ".dylib"
+    else:
+        ext = ".so"
+        prefix = "lib"
+
+    module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve()
+
+    if not module_file.is_file():
+        raise FileNotFoundError(f"module {module_name} does not exist")
+    else:
+        try:
+            module = tf.load_op_library(str(module_file))
+        except tf.errors.NotFoundError as e:
+            # check CXX11_ABI_FLAG is compatiblity
+            # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
+            # ABI should be the same
+            if "CXX11_ABI_FLAG" in tf.__dict__:
+                tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
+            else:
+                tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
+            if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag:
+                raise RuntimeError(
+                    "This deepmd-kit package was compiled with "
+                    "CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled "
+                    "with CXX11_ABI_FLAG=%d. These two library ABIs are "
+                    "incompatible and thus an error is raised when loading %s. "
+                    "You need to rebuild deepmd-kit against this TensorFlow "
+                    "runtime."
+                    % (
+                        TF_CXX11_ABI_FLAG,
+                        tf_cxx11_abi_flag,
+                        module_name,
+                    )
+                ) from e
+
+            # different versions may cause incompatibility
+            # see #406, #447, #557, #774, and #796 for example
+            # throw a message if versions are different
+            if TF_VERSION != tf_py_version:
+                raise RuntimeError(
+                    "The version of TensorFlow used to compile this "
+                    f"deepmd-kit package is {TF_VERSION}, but the version of TensorFlow "
+                    f"runtime you are using is {tf_py_version}. These two versions are "
+                    f"incompatible and thus an error is raised when loading {module_name}. "
+                    f"You need to install TensorFlow {TF_VERSION}, or rebuild deepmd-kit "
+                    f"against TensorFlow {tf_py_version}.\nIf you are using a wheel from "
+                    "pypi, you may consider to install deepmd-kit execuating "
+                    "`pip install deepmd-kit --no-binary deepmd-kit` "
+                    "instead."
+                ) from e
+            error_message = (
+                "This deepmd-kit package is inconsitent with TensorFlow "
+                f"Runtime, thus an error is raised when loading {module_name}. "
+                "You need to rebuild deepmd-kit against this TensorFlow "
+                "runtime."
+            )
+            if TF_CXX11_ABI_FLAG == 1:
+                # #1791
+                error_message += (
+                    "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. "
+                    "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704"
+                )
+            raise RuntimeError(error_message) from e
+        return module
+
+
+if GLOBAL_CONFIG["enable_tensorflow"] == "0":
+    raise RuntimeError(
+        "TensorFlow backend is not built. To enable it, "
+        "set the environmental variable DP_ENABLE_TENSORFLOW=1."
+    )
+MODEL_VERSION = GLOBAL_CONFIG["model_version"]
+TF_VERSION = GLOBAL_CONFIG["tf_version"]
+TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])
+
+op_module = get_module("deepmd_op")
+op_grads_module = get_module("op_grads")
+# prevent OOM when using with other backends
+# tf.config doesn't work for unclear reason
+set_env_if_empty("TF_FORCE_GPU_ALLOW_GROWTH", "true", verbose=False)
+
+# FLOAT_PREC
+GLOBAL_TF_FLOAT_PRECISION = tf.dtypes.as_dtype(GLOBAL_NP_FLOAT_PRECISION)
+
+
+def global_cvt_2_tf_float(xx: tf.Tensor) -> tf.Tensor:
+    """Cast tensor to globally set TF precision.
+
+    Parameters
+    ----------
+    xx : tf.Tensor
+        input tensor
+
+    Returns
+    -------
+    tf.Tensor
+        output tensor cast to `GLOBAL_TF_FLOAT_PRECISION`
+    """
+    return tf.cast(xx, GLOBAL_TF_FLOAT_PRECISION)
+
+
+def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor:
+    """Cast tensor to globally set energy precision.
+
+    Parameters
+    ----------
+    xx : tf.Tensor
+        input tensor
+
+    Returns
+    -------
+    tf.Tensor
+        output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION`
+    """
+    return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION)
diff --git a/deepmd/fit/__init__.py b/deepmd/tf/fit/__init__.py
similarity index 100%
rename from deepmd/fit/__init__.py
rename to deepmd/tf/fit/__init__.py
diff --git a/deepmd/fit/dipole.py b/deepmd/tf/fit/dipole.py
similarity index 77%
rename from deepmd/fit/dipole.py
rename to deepmd/tf/fit/dipole.py
index 312bcc9bf1..f98d52c7bd 100644
--- a/deepmd/fit/dipole.py
+++ b/deepmd/tf/fit/dipole.py
@@ -6,30 +6,33 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.loss.tensor import (
+from deepmd.tf.loss.tensor import (
     TensorLoss,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_fitting_net_variables_from_graph_def,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     one_layer,
     one_layer_rand_seed_shift,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
 @Fitting.register("dipole")
@@ -38,8 +41,12 @@ class DipoleFittingSeA(Fitting):
 
     Parameters
     ----------
-    descrpt : tf.Tensor
-            The descrptor
+    ntypes
+            The ntypes of the descrptor :math:`\mathcal{D}`
+    dim_descrpt
+            The dimension of the descrptor :math:`\mathcal{D}`
+    embedding_width
+            The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
     neuron : List[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
@@ -55,11 +62,16 @@ class DipoleFittingSeA(Fitting):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
     """
 
     def __init__(
         self,
-        descrpt: tf.Tensor,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
         neuron: List[int] = [120, 120, 120],
         resnet_dt: bool = True,
         sel_type: Optional[List[int]] = None,
@@ -67,11 +79,12 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
+        mixed_types: bool = False,
         **kwargs,
     ) -> None:
         """Constructor."""
-        self.ntypes = descrpt.get_ntypes()
-        self.dim_descrpt = descrpt.get_dim_out()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
         self.n_neuron = neuron
         self.resnet_dt = resnet_dt
         self.sel_type = sel_type
@@ -83,13 +96,15 @@ def __init__(
         self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = one_layer_rand_seed_shift()
+        self.activation_function_name = activation_function
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
-        self.dim_rot_mat_1 = descrpt.get_dim_rot_mat_1()
+        self.dim_rot_mat_1 = embedding_width
         self.dim_rot_mat = self.dim_rot_mat_1 * 3
         self.useBN = False
         self.fitting_net_variables = None
         self.mixed_prec = None
+        self.mixed_types = mixed_types
 
     def get_sel_type(self) -> int:
         """Get selected type."""
@@ -99,6 +114,7 @@ def get_out_size(self) -> int:
         """Get the output size. Should be 3."""
         return 3
 
+    @cast_precision
     def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None):
         # cut-out inputs
         inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1])
@@ -162,7 +178,6 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No
         final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms, 3])
         return final_layer
 
-    @cast_precision
     def build(
         self,
         input_d: tf.Tensor,
@@ -205,8 +220,12 @@ def build(
         start_index = 0
         inputs = tf.reshape(input_d, [-1, natoms[0], self.dim_descrpt])
         rot_mat = tf.reshape(rot_mat, [-1, natoms[0], self.dim_rot_mat])
+        if nframes is None:
+            nframes = tf.shape(inputs)[0]
 
-        if type_embedding is not None:
+        if self.mixed_types or type_embedding is not None:
+            # keep old behavior
+            self.mixed_types = True
             nloc_mask = tf.reshape(
                 tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1]
             )
@@ -218,13 +237,30 @@ def build(
             self.nloc_masked = tf.shape(
                 tf.reshape(self.atype_nloc_masked, [nframes, -1])
             )[1]
+
+        if type_embedding is not None:
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked)
         else:
             atype_embed = None
 
         self.atype_embed = atype_embed
+        if atype_embed is not None:
+            inputs = tf.reshape(
+                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
+                [-1, self.dim_descrpt],
+            )
+            rot_mat = tf.reshape(
+                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[
+                    nloc_mask
+                ],
+                [-1, self.dim_rot_mat_1, 3],
+            )
+            atype_embed = tf.cast(atype_embed, self.fitting_precision)
+            type_shape = atype_embed.get_shape().as_list()
+            inputs = tf.concat([inputs, atype_embed], axis=1)
+            self.dim_descrpt = self.dim_descrpt + type_shape[1]
 
-        if atype_embed is None:
+        if not self.mixed_types:
             count = 0
             outs_list = []
             for type_i in range(self.ntypes):
@@ -245,20 +281,6 @@ def build(
                 count += 1
             outs = tf.concat(outs_list, axis=1)
         else:
-            inputs = tf.reshape(
-                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
-                [-1, self.dim_descrpt],
-            )
-            rot_mat = tf.reshape(
-                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[
-                    nloc_mask
-                ],
-                [-1, self.dim_rot_mat_1, 3],
-            )
-            atype_embed = tf.cast(atype_embed, self.fitting_precision)
-            type_shape = atype_embed.get_shape().as_list()
-            inputs = tf.concat([inputs, atype_embed], axis=1)
-            self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [nframes, self.nloc_masked, self.dim_descrpt])
             rot_mat = tf.reshape(
                 rot_mat, [nframes, self.nloc_masked, self.dim_rot_mat_1 * 3]
@@ -327,3 +349,63 @@ def get_loss(self, loss: dict, lr) -> Loss:
             tensor_size=3,
             label_name="dipole",
         )
+
+    def serialize(self, suffix: str) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        data = {
+            "@class": "Fitting",
+            "type": "dipole",
+            "@version": 1,
+            "var_name": "dipole",
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "embedding_width": self.dim_rot_mat_1,
+            "mixed_types": self.mixed_types,
+            "dim_out": 3,
+            "neuron": self.n_neuron,
+            "resnet_dt": self.resnet_dt,
+            "activation_function": self.activation_function_name,
+            "precision": self.fitting_precision.name,
+            "exclude_types": [],
+            "nets": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=0 if self.mixed_types else 1,
+                in_dim=self.dim_descrpt,
+                out_dim=self.dim_rot_mat_1,
+                neuron=self.n_neuron,
+                activation_function=self.activation_function_name,
+                resnet_dt=self.resnet_dt,
+                variables=self.fitting_net_variables,
+                suffix=suffix,
+            ),
+        }
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        fitting = cls(**data)
+        fitting.fitting_net_variables = cls.deserialize_network(
+            data["nets"],
+            suffix=suffix,
+        )
+        return fitting
diff --git a/deepmd/fit/dos.py b/deepmd/tf/fit/dos.py
similarity index 83%
rename from deepmd/fit/dos.py
rename to deepmd/tf/fit/dos.py
index bbf7d39a09..7989752e5a 100644
--- a/deepmd/fit/dos.py
+++ b/deepmd/tf/fit/dos.py
@@ -7,42 +7,48 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.dos import (
+from deepmd.tf.loss.dos import (
     DOSLoss,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.nvnmd.fit.ener import (
+from deepmd.tf.nvnmd.fit.ener import (
     one_layer_nvnmd,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_fitting_net_variables_from_graph_def,
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import one_layer as one_layer_deepmd
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import one_layer as one_layer_deepmd
+from deepmd.tf.utils.network import (
     one_layer_rand_seed_shift,
 )
+from deepmd.utils.out_stat import (
+    compute_stats_from_redu,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 log = logging.getLogger(__name__)
 
@@ -54,8 +60,10 @@ class DOSFitting(Fitting):
 
     Parameters
     ----------
-    descrpt
-            The descrptor :math:`\mathcal{D}`
+    ntypes
+            The ntypes of the descrptor :math:`\mathcal{D}`
+    dim_descrpt
+            The dimension of the descrptor :math:`\mathcal{D}`
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     resnet_dt
@@ -87,11 +95,15 @@ class DOSFitting(Fitting):
     use_aparam_as_mask: bool, optional
             If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
             And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
     """
 
     def __init__(
         self,
-        descrpt: tf.Tensor,
+        ntypes: int,
+        dim_descrpt: int,
         neuron: List[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -105,12 +117,13 @@ def __init__(
         uniform_seed: bool = False,
         layer_name: Optional[List[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
+        mixed_types: bool = False,
         **kwargs,
     ) -> None:
         """Constructor."""
         # model param
-        self.ntypes = descrpt.get_ntypes()
-        self.dim_descrpt = descrpt.get_dim_out()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
         self.use_aparam_as_mask = use_aparam_as_mask
 
         self.numb_fparam = numb_fparam
@@ -124,6 +137,7 @@ def __init__(
         self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = one_layer_rand_seed_shift()
+        self.activation_function = activation_function
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
         self.trainable = trainable
@@ -142,16 +156,16 @@ def __init__(
             add_data_requirement(
                 "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False
             )
-            self.fparam_avg = None
-            self.fparam_std = None
-            self.fparam_inv_std = None
+        self.fparam_avg = None
+        self.fparam_std = None
+        self.fparam_inv_std = None
         if self.numb_aparam > 0:
             add_data_requirement(
                 "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False
             )
-            self.aparam_avg = None
-            self.aparam_std = None
-            self.aparam_inv_std = None
+        self.aparam_avg = None
+        self.aparam_std = None
+        self.aparam_inv_std = None
 
         self.fitting_net_variables = None
         self.mixed_prec = None
@@ -161,6 +175,7 @@ def __init__(
             assert (
                 len(self.layer_name) == len(self.n_neuron) + 1
             ), "length of layer_name should be that of n_neuron + 1"
+        self.mixed_types = mixed_types
 
     def get_numb_fparam(self) -> int:
         """Get the number of frame parameters."""
@@ -225,8 +240,10 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False):
         sys_tynatom = np.reshape(sys_tynatom, [nsys, -1])
         sys_tynatom = sys_tynatom[:, 2:]
 
-        dos_shift, resd, rank, s_value = np.linalg.lstsq(
-            sys_tynatom, sys_dos, rcond=rcond
+        dos_shift, _ = compute_stats_from_redu(
+            sys_dos,
+            sys_tynatom,
+            rcond=rcond,
         )
 
         return dos_shift
@@ -492,13 +509,22 @@ def build(
             tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1]
         )  ## lammps will make error
         if type_embedding is not None:
+            # keep old behavior
+            self.mixed_types = True
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc)
         else:
             atype_embed = None
 
         self.atype_embed = atype_embed
+        if atype_embed is not None:
+            atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION)
+            type_shape = atype_embed.get_shape().as_list()
+            inputs = tf.concat(
+                [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1
+            )
+            self.dim_descrpt = self.dim_descrpt + type_shape[1]
 
-        if atype_embed is None:
+        if not self.mixed_types:
             start_index = 0
             outs_list = []
             for type_i in range(self.ntypes):
@@ -516,7 +542,11 @@ def build(
 
                 final_layer = tf.reshape(
                     final_layer,
-                    [tf.shape(inputs)[0] * self.numb_dos, natoms[2 + type_i]],
+                    [
+                        tf.shape(inputs)[0],
+                        natoms[2 + type_i],
+                        self.numb_dos,
+                    ],
                 )
                 outs_list.append(final_layer)
                 start_index += natoms[2 + type_i]
@@ -525,13 +555,6 @@ def build(
             outs = tf.concat(outs_list, axis=1)
         # with type embedding
         else:
-            atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION)
-            type_shape = atype_embed.get_shape().as_list()
-            inputs = tf.concat(
-                [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1
-            )
-            original_dim_descrpt = self.dim_descrpt
-            self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt])
             final_layer = self._build_lower(
                 0,
@@ -545,7 +568,8 @@ def build(
             )
 
             outs = tf.reshape(
-                final_layer, [tf.shape(inputs)[0] * self.numb_dos, natoms[0]]
+                final_layer,
+                [tf.shape(inputs)[0], natoms[0], self.numb_dos],
             )
         # add bias
         # self.atom_ener_before = outs
@@ -557,7 +581,7 @@ def build(
         # self.atom_ener_after = outs
 
         tf.summary.histogram("fitting_net_output", outs)
-        return tf.reshape(outs, [-1])
+        return outs
 
     def init_variables(
         self,
@@ -636,3 +660,81 @@ def get_loss(self, loss: dict, lr) -> Loss:
         return DOSLoss(
             **loss, starter_learning_rate=lr.start_lr(), numb_dos=self.get_numb_dos()
         )
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data["numb_dos"] = data.pop("dim_out")
+        fitting = cls(**data)
+        fitting.fitting_net_variables = cls.deserialize_network(
+            data["nets"],
+            suffix=suffix,
+        )
+        fitting.bias_dos = data["@variables"]["bias_atom_e"]
+        if fitting.numb_fparam > 0:
+            fitting.fparam_avg = data["@variables"]["fparam_avg"]
+            fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
+        if fitting.numb_aparam > 0:
+            fitting.aparam_avg = data["@variables"]["aparam_avg"]
+            fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"]
+        return fitting
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        data = {
+            "@class": "Fitting",
+            "type": "dos",
+            "@version": 1,
+            "var_name": "dos",
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "mixed_types": self.mixed_types,
+            "dim_out": self.numb_dos,
+            "neuron": self.n_neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "rcond": self.rcond,
+            "trainable": self.trainable,
+            "activation_function": self.activation_function,
+            "precision": self.fitting_precision.name,
+            "exclude_types": [],
+            "nets": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=0 if self.mixed_types else 1,
+                in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
+                out_dim=self.numb_dos,
+                neuron=self.n_neuron,
+                activation_function=self.activation_function,
+                resnet_dt=self.resnet_dt,
+                variables=self.fitting_net_variables,
+                suffix=suffix,
+            ),
+            "@variables": {
+                "bias_atom_e": self.bias_dos,
+                "fparam_avg": self.fparam_avg,
+                "fparam_inv_std": self.fparam_inv_std,
+                "aparam_avg": self.aparam_avg,
+                "aparam_inv_std": self.aparam_inv_std,
+            },
+        }
+        return data
diff --git a/deepmd/fit/ener.py b/deepmd/tf/fit/ener.py
similarity index 84%
rename from deepmd/fit/ener.py
rename to deepmd/tf/fit/ener.py
index 4c15e57124..d38d0416af 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/tf/fit/ener.py
@@ -1,57 +1,70 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    TYPE_CHECKING,
     List,
     Optional,
 )
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     global_cvt_2_tf_float,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPotential,
 )
-from deepmd.loss.ener import (
+from deepmd.tf.loss.ener import (
     EnerDipoleLoss,
     EnerSpinLoss,
     EnerStdLoss,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.nvnmd.fit.ener import (
+from deepmd.tf.nvnmd.fit.ener import (
     one_layer_nvnmd,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_fitting_net_variables_from_graph_def,
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import one_layer as one_layer_deepmd
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import one_layer as one_layer_deepmd
+from deepmd.tf.utils.network import (
     one_layer_rand_seed_shift,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
+from deepmd.utils.finetune import (
+    change_energy_bias_lower,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_redu,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+if TYPE_CHECKING:
+    pass
 
 log = logging.getLogger(__name__)
 
@@ -91,8 +104,10 @@ class EnerFitting(Fitting):
 
     Parameters
     ----------
-    descrpt
-            The descrptor :math:`\mathcal{D}`
+    ntypes
+            The ntypes of the descrptor :math:`\mathcal{D}`
+    dim_descrpt
+            The dimension of the descrptor :math:`\mathcal{D}`
     neuron
             Number of neurons :math:`N` in each hidden layer of the fitting net
     resnet_dt
@@ -126,11 +141,15 @@ class EnerFitting(Fitting):
     use_aparam_as_mask: bool, optional
             If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
             And the aparam will not be used as the atomic parameters for embedding.
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
     """
 
     def __init__(
         self,
-        descrpt: tf.Tensor,
+        ntypes: int,
+        dim_descrpt: int,
         neuron: List[int] = [120, 120, 120],
         resnet_dt: bool = True,
         numb_fparam: int = 0,
@@ -146,12 +165,13 @@ def __init__(
         layer_name: Optional[List[Optional[str]]] = None,
         use_aparam_as_mask: bool = False,
         spin: Optional[Spin] = None,
+        mixed_types: bool = False,
         **kwargs,
     ) -> None:
         """Constructor."""
         # model param
-        self.ntypes = descrpt.get_ntypes()
-        self.dim_descrpt = descrpt.get_dim_out()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
         self.use_aparam_as_mask = use_aparam_as_mask
         # args = ()\
         #        .add('numb_fparam',      int,    default = 0)\
@@ -176,6 +196,7 @@ def __init__(
         self.ntypes_spin = self.spin.get_ntypes_spin() if self.spin is not None else 0
         self.seed_shift = one_layer_rand_seed_shift()
         self.tot_ener_zero = tot_ener_zero
+        self.activation_function_name = activation_function
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
         self.trainable = trainable
@@ -188,7 +209,7 @@ def __init__(
         ), "length of trainable should be that of n_neuron + 1"
         self.atom_ener = []
         self.atom_ener_v = atom_ener
-        for at, ae in enumerate(atom_ener):
+        for at, ae in enumerate(atom_ener if atom_ener is not None else []):
             if ae is not None:
                 self.atom_ener.append(
                     tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name="atom_%d_ener" % at)
@@ -202,16 +223,16 @@ def __init__(
             add_data_requirement(
                 "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False
             )
-            self.fparam_avg = None
-            self.fparam_std = None
-            self.fparam_inv_std = None
+        self.fparam_avg = None
+        self.fparam_std = None
+        self.fparam_inv_std = None
         if self.numb_aparam > 0:
             add_data_requirement(
                 "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False
             )
-            self.aparam_avg = None
-            self.aparam_std = None
-            self.aparam_inv_std = None
+        self.aparam_avg = None
+        self.aparam_std = None
+        self.aparam_inv_std = None
 
         self.fitting_net_variables = None
         self.mixed_prec = None
@@ -221,6 +242,7 @@ def __init__(
             assert (
                 len(self.layer_name) == len(self.n_neuron) + 1
             ), "length of layer_name should be that of n_neuron + 1"
+        self.mixed_types = mixed_types
 
     def get_numb_fparam(self) -> int:
         """Get the number of frame parameters."""
@@ -284,21 +306,17 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False):
             # In this situation, we directly use these assigned energies instead of computing stats.
             # This will make the loss decrease quickly
             assigned_atom_ener = np.array(
-                [ee for ee in self.atom_ener_v if ee is not None]
+                [ee if ee is not None else np.nan for ee in self.atom_ener_v]
             )
-            assigned_ener_idx = [
-                ii for ii, ee in enumerate(self.atom_ener_v) if ee is not None
-            ]
-            # np.dot out size: nframe
-            sys_ener -= np.dot(sys_tynatom[:, assigned_ener_idx], assigned_atom_ener)
-            sys_tynatom[:, assigned_ener_idx] = 0.0
-        energy_shift, resd, rank, s_value = np.linalg.lstsq(
-            sys_tynatom, sys_ener, rcond=rcond
+        else:
+            assigned_atom_ener = None
+        energy_shift, _ = compute_stats_from_redu(
+            sys_ener.reshape(-1, 1),
+            sys_tynatom,
+            assigned_bias=assigned_atom_ener,
+            rcond=rcond,
         )
-        if len(self.atom_ener) > 0:
-            for ii in assigned_ener_idx:
-                energy_shift[ii] = self.atom_ener_v[ii]
-        return energy_shift
+        return energy_shift.ravel()
 
     def compute_input_stats(self, all_stat: dict, protection: float = 1e-2) -> None:
         """Compute the input statistics.
@@ -572,6 +590,8 @@ def build(
                 )
             else:
                 inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION)
+        else:
+            inputs_zero = None
 
         if bias_atom_e is not None:
             assert len(bias_atom_e) == self.ntypes
@@ -615,13 +635,29 @@ def build(
         ):
             type_embedding = nvnmd_cfg.map["t_ebd"]
         if type_embedding is not None:
+            # keep old behavior
+            self.mixed_types = True
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc)
         else:
             atype_embed = None
 
         self.atype_embed = atype_embed
+        original_dim_descrpt = self.dim_descrpt
+        if atype_embed is not None:
+            atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION)
+            type_shape = atype_embed.get_shape().as_list()
+            inputs = tf.concat(
+                [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1
+            )
+            self.dim_descrpt = self.dim_descrpt + type_shape[1]
+            if len(self.atom_ener):
+                assert inputs_zero is not None
+                inputs_zero = tf.concat(
+                    [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed],
+                    axis=1,
+                )
 
-        if atype_embed is None:
+        if not self.mixed_types:
             start_index = 0
             outs_list = []
             for type_i in range(ntypes_atom):
@@ -660,13 +696,6 @@ def build(
             outs = tf.concat(outs_list, axis=1)
         # with type embedding
         else:
-            atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION)
-            type_shape = atype_embed.get_shape().as_list()
-            inputs = tf.concat(
-                [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1
-            )
-            original_dim_descrpt = self.dim_descrpt
-            self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt])
             final_layer = self._build_lower(
                 0,
@@ -680,10 +709,6 @@ def build(
             )
             if len(self.atom_ener):
                 # remove contribution in vacuum
-                inputs_zero = tf.concat(
-                    [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed],
-                    axis=1,
-                )
                 inputs_zero = tf.reshape(inputs_zero, [-1, natoms[0], self.dim_descrpt])
                 zero_layer = self._build_lower(
                     0,
@@ -780,109 +805,21 @@ def change_energy_bias(
         frozen_model,
         origin_type_map,
         full_type_map,
-        bias_shift="delta",
+        bias_adjust_mode="change-by-statistic",
         ntest=10,
     ) -> None:
-        """Change the energy bias according to the input data and the pretrained model.
-
-        Parameters
-        ----------
-        data : DeepmdDataSystem
-            The training data.
-        frozen_model : str
-            The path file of frozen model.
-        origin_type_map : list
-            The original type_map in dataset, they are targets to change the energy bias.
-        full_type_map : str
-            The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
-                    and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
-        ntest : int
-            The number of test samples in a system to change the energy bias.
-        """
-        type_numbs = []
-        energy_ground_truth = []
-        energy_predict = []
-        sorter = np.argsort(full_type_map)
-        idx_type_map = sorter[
-            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
-        ]
-        mixed_type = data.mixed_type
-        numb_type = len(full_type_map)
         dp = None
-        if bias_shift == "delta":
+        if bias_adjust_mode == "change-by-statistic":
             # init model
             dp = DeepPotential(frozen_model)
-        for sys in data.data_systems:
-            test_data = sys.get_test()
-            nframes = test_data["box"].shape[0]
-            numb_test = min(nframes, ntest)
-            if mixed_type:
-                atype = test_data["type"][:numb_test].reshape([numb_test, -1])
-            else:
-                atype = test_data["type"][0]
-            assert np.array(
-                [i in idx_type_map for i in list(set(atype.reshape(-1)))]
-            ).all(), "Some types are not in 'type_map'!"
-            energy_ground_truth.append(
-                test_data["energy"][:numb_test].reshape([numb_test, 1])
-            )
-            if mixed_type:
-                type_numbs.append(
-                    np.array(
-                        [(atype == i).sum(axis=-1) for i in idx_type_map],
-                        dtype=np.int32,
-                    ).T
-                )
-            else:
-                type_numbs.append(
-                    np.tile(
-                        np.bincount(atype, minlength=numb_type)[idx_type_map],
-                        (numb_test, 1),
-                    )
-                )
-            if bias_shift == "delta":
-                coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
-                if sys.pbc:
-                    box = test_data["box"][:numb_test]
-                else:
-                    box = None
-                ret = dp.eval(coord, box, atype, mixed_type=mixed_type)
-                energy_predict.append(ret[0].reshape([numb_test, 1]))
-        type_numbs = np.concatenate(type_numbs)
-        energy_ground_truth = np.concatenate(energy_ground_truth)
-        old_bias = self.bias_atom_e[idx_type_map]
-        if bias_shift == "delta":
-            energy_predict = np.concatenate(energy_predict)
-            bias_diff = energy_ground_truth - energy_predict
-            delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
-            unbias_e = energy_predict + type_numbs @ delta_bias
-            atom_numbs = type_numbs.sum(-1)
-            rmse_ae = np.sqrt(
-                np.mean(
-                    np.square(
-                        (unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs
-                    )
-                )
-            )
-            self.bias_atom_e[idx_type_map] += delta_bias.reshape(-1)
-            log.info(
-                f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
-            )
-        elif bias_shift == "statistic":
-            statistic_bias = np.linalg.lstsq(
-                type_numbs, energy_ground_truth, rcond=None
-            )[0]
-            self.bias_atom_e[idx_type_map] = statistic_bias.reshape(-1)
-        else:
-            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
-        log.info(
-            "Change energy bias of {} from {} to {}.".format(
-                str(origin_type_map), str(old_bias), str(self.bias_atom_e[idx_type_map])
-            )
+        self.bias_atom_e = change_energy_bias_lower(
+            data,
+            dp,
+            origin_type_map,
+            full_type_map,
+            self.bias_atom_e,
+            bias_adjust_mode=bias_adjust_mode,
+            ntest=ntest,
         )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
@@ -921,3 +858,84 @@ def get_loss(self, loss: dict, lr) -> Loss:
             return EnerSpinLoss(**loss, use_spin=self.spin.use_spin)
         else:
             raise RuntimeError("unknown loss type")
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        fitting = cls(**data)
+        fitting.fitting_net_variables = cls.deserialize_network(
+            data["nets"],
+            suffix=suffix,
+        )
+        fitting.bias_atom_e = data["@variables"]["bias_atom_e"].ravel()
+        if fitting.numb_fparam > 0:
+            fitting.fparam_avg = data["@variables"]["fparam_avg"]
+            fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
+        if fitting.numb_aparam > 0:
+            fitting.aparam_avg = data["@variables"]["aparam_avg"]
+            fitting.aparam_inv_std = data["@variables"]["aparam_inv_std"]
+        return fitting
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        data = {
+            "@class": "Fitting",
+            "type": "ener",
+            "@version": 1,
+            "var_name": "energy",
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "mixed_types": self.mixed_types,
+            "dim_out": 1,
+            "neuron": self.n_neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "rcond": self.rcond,
+            "tot_ener_zero": self.tot_ener_zero,
+            "trainable": self.trainable,
+            "atom_ener": self.atom_ener_v,
+            "activation_function": self.activation_function_name,
+            "precision": self.fitting_precision.name,
+            "layer_name": self.layer_name,
+            "use_aparam_as_mask": self.use_aparam_as_mask,
+            "spin": self.spin,
+            "exclude_types": [],
+            "nets": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=0 if self.mixed_types else 1,
+                in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
+                neuron=self.n_neuron,
+                activation_function=self.activation_function_name,
+                resnet_dt=self.resnet_dt,
+                variables=self.fitting_net_variables,
+                suffix=suffix,
+            ),
+            "@variables": {
+                "bias_atom_e": self.bias_atom_e.reshape(-1, 1),
+                "fparam_avg": self.fparam_avg,
+                "fparam_inv_std": self.fparam_inv_std,
+                "aparam_avg": self.aparam_avg,
+                "aparam_inv_std": self.aparam_inv_std,
+            },
+        }
+        return data
diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py
new file mode 100644
index 0000000000..0f73230bc8
--- /dev/null
+++ b/deepmd/tf/fit/fitting.py
@@ -0,0 +1,255 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import re
+from abc import (
+    abstractmethod,
+)
+from typing import (
+    List,
+    Optional,
+)
+
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.dpmodel.utils.network import (
+    FittingNet,
+    NetworkCollection,
+)
+from deepmd.tf.env import (
+    FITTING_NET_PATTERN,
+    tf,
+)
+from deepmd.tf.loss.loss import (
+    Loss,
+)
+from deepmd.tf.utils import (
+    PluginVariant,
+)
+from deepmd.utils.plugin import (
+    make_plugin_registry,
+)
+
+
+class Fitting(PluginVariant, make_plugin_registry("fitting")):
+    def __new__(cls, *args, **kwargs):
+        if cls is Fitting:
+            cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__))
+        return super().__new__(cls)
+
+    @property
+    def precision(self) -> tf.DType:
+        """Precision of fitting network."""
+        return self.fitting_precision
+
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
+    ) -> None:
+        """Init the fitting net variables with the given dict.
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
+
+        Notes
+        -----
+        This method is called by others when the fitting supported initialization from the given variables.
+        """
+        raise NotImplementedError(
+            "Fitting %s doesn't support initialization from the given variables!"
+            % type(self).__name__
+        )
+
+    @abstractmethod
+    def get_loss(self, loss: dict, lr) -> Loss:
+        """Get the loss function.
+
+        Parameters
+        ----------
+        loss : dict
+            the loss dict
+        lr : LearningRateExp
+            the learning rate
+
+        Returns
+        -------
+        Loss
+            the loss function
+        """
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = "") -> "Fitting":
+        """Deserialize the fitting.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this fitting
+
+        Returns
+        -------
+        Fitting
+            The deserialized fitting
+        """
+        if cls is Fitting:
+            return Fitting.get_class_by_type(
+                j_get_type(data, cls.__name__)
+            ).deserialize(data, suffix=suffix)
+        raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the fitting.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this fitting
+        """
+        raise NotImplementedError("Not implemented in class %s" % self.__name__)
+
+    def serialize_network(
+        self,
+        ntypes: int,
+        ndim: int,
+        in_dim: int,
+        neuron: List[int],
+        activation_function: str,
+        resnet_dt: bool,
+        variables: dict,
+        out_dim: Optional[int] = 1,
+        suffix: str = "",
+    ) -> dict:
+        """Serialize network.
+
+        Parameters
+        ----------
+        ntypes : int
+            The number of types
+        ndim : int
+            The dimension of elements
+        in_dim : int
+            The input dimension
+        neuron : List[int]
+            The neuron list
+        activation_function : str
+            The activation function
+        resnet_dt : bool
+            Whether to use resnet
+        variables : dict
+            The input variables
+        suffix : str, optional
+            The suffix of the scope
+        out_dim : int, optional
+            The output dimension
+
+        Returns
+        -------
+        dict
+            The converted network data
+        """
+        fittings = NetworkCollection(
+            ntypes=ntypes,
+            ndim=ndim,
+            network_type="fitting_network",
+        )
+        if suffix != "":
+            fitting_net_pattern = (
+                FITTING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)")
+                .replace("/(bias)", suffix + "/(bias)")
+                .replace("/(matrix)", suffix + "/(matrix)")
+            )
+        else:
+            fitting_net_pattern = FITTING_NET_PATTERN
+        for key, value in variables.items():
+            m = re.search(fitting_net_pattern, key)
+            m = [mm for mm in m.groups() if mm is not None]
+            layer_idx = int(m[0]) if m[0] != "final" else len(neuron)
+            weight_name = m[-1]
+            if ndim == 0:
+                network_idx = ()
+            elif ndim == 1:
+                network_idx = (int(m[1]),)
+            else:
+                raise ValueError(f"Invalid ndim: {ndim}")
+            if fittings[network_idx] is None:
+                # initialize the network if it is not initialized
+                fittings[network_idx] = FittingNet(
+                    in_dim=in_dim,
+                    out_dim=out_dim,
+                    neuron=neuron,
+                    activation_function=activation_function,
+                    resnet_dt=resnet_dt,
+                    precision=self.precision.name,
+                    bias_out=True,
+                )
+            assert fittings[network_idx] is not None
+            if weight_name == "idt":
+                value = value.ravel()
+            fittings[network_idx][layer_idx][weight_name] = value
+        return fittings.serialize()
+
+    @classmethod
+    def deserialize_network(cls, data: dict, suffix: str = "") -> dict:
+        """Deserialize network.
+
+        Parameters
+        ----------
+        data : dict
+            The input network data
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        variables : dict
+            The input variables
+        """
+        fitting_net_variables = {}
+        fittings = NetworkCollection.deserialize(data)
+        for ii in range(fittings.ntypes**fittings.ndim):
+            net_idx = []
+            rest_ii = ii
+            for _ in range(fittings.ndim):
+                net_idx.append(rest_ii % fittings.ntypes)
+                rest_ii //= fittings.ntypes
+            net_idx = tuple(net_idx)
+            if fittings.ndim == 0:
+                key = ""
+            elif fittings.ndim == 1:
+                key = "_type_" + str(net_idx[0])
+            else:
+                raise ValueError(f"Invalid ndim: {fittings.ndim}")
+            network = fittings[net_idx]
+            assert network is not None
+            for layer_idx, layer in enumerate(network.layers):
+                if layer_idx == len(network.layers) - 1:
+                    layer_name = "final_layer"
+                else:
+                    layer_name = f"layer_{layer_idx}"
+                fitting_net_variables[f"{layer_name}{key}{suffix}/matrix"] = layer.w
+                fitting_net_variables[f"{layer_name}{key}{suffix}/bias"] = layer.b
+                if layer.idt is not None:
+                    fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = (
+                        layer.idt.reshape(1, -1)
+                    )
+                else:
+                    # prevent keyError
+                    fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = 0.0
+        return fitting_net_variables
diff --git a/deepmd/fit/polar.py b/deepmd/tf/fit/polar.py
similarity index 83%
rename from deepmd/fit/polar.py
rename to deepmd/tf/fit/polar.py
index 8f6631866c..473b57ff54 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/tf/fit/polar.py
@@ -7,33 +7,37 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     cast_precision,
     get_activation_func,
     get_precision,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.loss.tensor import (
+from deepmd.tf.loss.tensor import (
     TensorLoss,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_fitting_net_variables_from_graph_def,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     one_layer,
     one_layer_rand_seed_shift,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
 @Fitting.register("polar")
@@ -42,8 +46,12 @@ class PolarFittingSeA(Fitting):
 
     Parameters
     ----------
-    descrpt : tf.Tensor
-            The descrptor
+    ntypes
+            The ntypes of the descrptor :math:`\mathcal{D}`
+    dim_descrpt
+            The dimension of the descrptor :math:`\mathcal{D}`
+    embedding_width
+            The rotation matrix dimension of the descrptor :math:`\mathcal{D}`
     neuron : List[int]
             Number of neurons in each hidden layer of the fitting net
     resnet_dt : bool
@@ -65,11 +73,16 @@ class PolarFittingSeA(Fitting):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
+    mixed_types : bool
+        If true, use a uniform fitting net for all atom types, otherwise use
+        different fitting nets for different atom types.
     """
 
     def __init__(
         self,
-        descrpt: tf.Tensor,
+        ntypes: int,
+        dim_descrpt: int,
+        embedding_width: int,
         neuron: List[int] = [120, 120, 120],
         resnet_dt: bool = True,
         sel_type: Optional[List[int]] = None,
@@ -81,11 +94,12 @@ def __init__(
         activation_function: str = "tanh",
         precision: str = "default",
         uniform_seed: bool = False,
+        mixed_types: bool = False,
         **kwargs,
     ) -> None:
         """Constructor."""
-        self.ntypes = descrpt.get_ntypes()
-        self.dim_descrpt = descrpt.get_dim_out()
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
         self.n_neuron = neuron
         self.resnet_dt = resnet_dt
         self.sel_type = sel_type
@@ -96,6 +110,7 @@ def __init__(
         # self.diag_shift = diag_shift
         self.shift_diag = shift_diag
         self.scale = scale
+        self.activation_function_name = activation_function
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
         if self.sel_type is None:
@@ -104,7 +119,19 @@ def __init__(
             [ii in self.sel_type for ii in range(self.ntypes)], dtype=bool
         )
         if self.scale is None:
-            self.scale = [1.0 for ii in range(self.ntypes)]
+            self.scale = np.array([1.0 for ii in range(self.ntypes)])
+        else:
+            if isinstance(self.scale, list):
+                assert (
+                    len(self.scale) == ntypes
+                ), "Scale should be a list of length ntypes."
+            elif isinstance(self.scale, float):
+                self.scale = [self.scale for _ in range(ntypes)]
+            else:
+                raise ValueError(
+                    "Scale must be a list of float of length ntypes or a float."
+                )
+            self.scale = np.array(self.scale)
         # if self.diag_shift is None:
         #    self.diag_shift = [0.0 for ii in range(self.ntypes)]
         if not isinstance(self.sel_type, list):
@@ -115,14 +142,12 @@ def __init__(
         )  # self.ntypes x 1, store the average diagonal value
         # if type(self.diag_shift) is not list:
         #    self.diag_shift = [self.diag_shift]
-        if not isinstance(self.scale, list):
-            self.scale = [self.scale for ii in range(self.ntypes)]
-        self.scale = np.array(self.scale)
-        self.dim_rot_mat_1 = descrpt.get_dim_rot_mat_1()
+        self.dim_rot_mat_1 = embedding_width
         self.dim_rot_mat = self.dim_rot_mat_1 * 3
         self.useBN = False
         self.fitting_net_variables = None
         self.mixed_prec = None
+        self.mixed_types = mixed_types
 
     def get_sel_type(self) -> List[int]:
         """Get selected atom types."""
@@ -132,16 +157,14 @@ def get_out_size(self) -> int:
         """Get the output size. Should be 9."""
         return 9
 
-    def compute_input_stats(self, all_stat, protection=1e-2):
-        """Compute the input statistics.
+    def compute_output_stats(self, all_stat):
+        """Compute the output statistics.
 
         Parameters
         ----------
         all_stat
             Dictionary of inputs.
             can be prepared by model.make_stat_input
-        protection
-            Divided-by-zero protection
         """
         if "polarizability" not in all_stat.keys():
             self.avgeig = np.zeros([9])
@@ -166,6 +189,7 @@ def compute_input_stats(self, all_stat, protection=1e-2):
             mean_polar = np.zeros([len(self.sel_type), 9])
             sys_matrix, polar_bias = [], []
             for ss in range(len(all_stat["type"])):
+                nframes = all_stat["type"][ss].shape[0]
                 atom_has_polar = [
                     w for w in all_stat["type"][ss][0] if (w in self.sel_type)
                 ]  # select atom with polar
@@ -176,7 +200,7 @@ def compute_input_stats(self, all_stat, protection=1e-2):
                         index_lis = [
                             index
                             for index, w in enumerate(atom_has_polar)
-                            if atom_has_polar[index] == self.sel_type[itype]
+                            if w == self.sel_type[itype]
                         ]  # select index in this type
 
                         sys_matrix.append(np.zeros((1, len(self.sel_type))))
@@ -184,10 +208,11 @@ def compute_input_stats(self, all_stat, protection=1e-2):
 
                         polar_bias.append(
                             np.sum(
-                                all_stat["atomic_polarizability"][ss].reshape((-1, 9))[
-                                    index_lis
-                                ],
-                                axis=0,
+                                all_stat["atomic_polarizability"][ss].reshape(
+                                    nframes, len(atom_has_polar), -1
+                                )[:, index_lis, :]
+                                / nframes,
+                                axis=(0, 1),
                             ).reshape((1, 9))
                         )
                 else:  # No atomic polar in this system, so it should have global polar
@@ -211,7 +236,9 @@ def compute_input_stats(self, all_stat, protection=1e-2):
                         sys_matrix[-1][0, itype] = len(index_lis)
 
                     # add polar_bias
-                    polar_bias.append(all_stat["polarizability"][ss].reshape((1, 9)))
+                    polar_bias.append(
+                        np.mean(all_stat["polarizability"][ss], axis=0).reshape((1, 9))
+                    )
 
             matrix, bias = (
                 np.concatenate(sys_matrix, axis=0),
@@ -223,6 +250,7 @@ def compute_input_stats(self, all_stat, protection=1e-2):
                     np.diagonal(atom_polar[itype].reshape((3, 3)))
                 )
 
+    @cast_precision
     def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None):
         # cut-out inputs
         inputs_i = tf.slice(
@@ -331,7 +359,6 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No
         final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms, 3, 3])
         return final_layer
 
-    @cast_precision
     def build(
         self,
         input_d: tf.Tensor,
@@ -374,8 +401,12 @@ def build(
         start_index = 0
         inputs = tf.reshape(input_d, [-1, self.dim_descrpt * natoms[0]])
         rot_mat = tf.reshape(rot_mat, [-1, self.dim_rot_mat * natoms[0]])
+        if nframes is None:
+            nframes = tf.shape(inputs)[0]
 
-        if type_embedding is not None:
+        if self.mixed_types or type_embedding is not None:
+            # keep old behavior
+            self.mixed_types = True
             # nframes x nloc
             nloc_mask = tf.reshape(
                 tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1]
@@ -404,13 +435,28 @@ def build(
             self.nloc_masked = tf.shape(
                 tf.reshape(self.atype_nloc_masked, [nframes, -1])
             )[1]
+
+        if type_embedding is not None:
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked)
         else:
             atype_embed = None
 
         self.atype_embed = atype_embed
+        if atype_embed is not None:
+            inputs = tf.reshape(
+                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
+                [-1, self.dim_descrpt],
+            )
+            rot_mat = tf.reshape(
+                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask],
+                [-1, self.dim_rot_mat * self.nloc_masked],
+            )
+            atype_embed = tf.cast(atype_embed, self.fitting_precision)
+            type_shape = atype_embed.get_shape().as_list()
+            inputs = tf.concat([inputs, atype_embed], axis=1)
+            self.dim_descrpt = self.dim_descrpt + type_shape[1]
 
-        if atype_embed is None:
+        if not self.mixed_types:
             count = 0
             outs_list = []
             for type_i in range(self.ntypes):
@@ -431,7 +477,7 @@ def build(
                 final_layer = final_layer + self.constant_matrix[sel_type_idx] * tf.eye(
                     3,
                     batch_shape=[tf.shape(inputs)[0], natoms[2 + type_i]],
-                    dtype=self.fitting_precision,
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
                 )
                 start_index += natoms[2 + type_i]
 
@@ -440,18 +486,6 @@ def build(
                 count += 1
             outs = tf.concat(outs_list, axis=1)
         else:
-            inputs = tf.reshape(
-                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
-                [-1, self.dim_descrpt],
-            )
-            rot_mat = tf.reshape(
-                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask],
-                [-1, self.dim_rot_mat * self.nloc_masked],
-            )
-            atype_embed = tf.cast(atype_embed, self.fitting_precision)
-            type_shape = atype_embed.get_shape().as_list()
-            inputs = tf.concat([inputs, atype_embed], axis=1)
-            self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [-1, self.dim_descrpt * self.nloc_masked])
             final_layer = self._build_lower(
                 0, self.nloc_masked, inputs, rot_mat, suffix=suffix, reuse=reuse
@@ -461,7 +495,7 @@ def build(
             if self.shift_diag:
                 final_layer += tf.expand_dims(
                     tf.expand_dims(constant_matrix, -1), -1
-                ) * tf.eye(3, batch_shape=[1, 1], dtype=self.fitting_precision)
+                ) * tf.eye(3, batch_shape=[1, 1], dtype=GLOBAL_TF_FLOAT_PRECISION)
             outs = final_layer
 
         tf.summary.histogram("fitting_net_output", outs)
@@ -509,6 +543,71 @@ def get_loss(self, loss: dict, lr) -> Loss:
             label_name="polarizability",
         )
 
+    def serialize(self, suffix: str) -> dict:
+        """Serialize the model.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        data = {
+            "@class": "Fitting",
+            "type": "polar",
+            "@version": 1,
+            "var_name": "polar",
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "embedding_width": self.dim_rot_mat_1,
+            "mixed_types": self.mixed_types,
+            "dim_out": 3,
+            "neuron": self.n_neuron,
+            "resnet_dt": self.resnet_dt,
+            "activation_function": self.activation_function_name,
+            "precision": self.fitting_precision.name,
+            "exclude_types": [],
+            "fit_diag": self.fit_diag,
+            "scale": list(self.scale),
+            "shift_diag": self.shift_diag,
+            "nets": self.serialize_network(
+                ntypes=self.ntypes,
+                ndim=0 if self.mixed_types else 1,
+                in_dim=self.dim_descrpt,
+                out_dim=self.dim_rot_mat_1,
+                neuron=self.n_neuron,
+                activation_function=self.activation_function_name,
+                resnet_dt=self.resnet_dt,
+                variables=self.fitting_net_variables,
+                suffix=suffix,
+            ),
+        }
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(
+            data.pop("@version", 1), 2, 1
+        )  # to allow PT version.
+        fitting = cls(**data)
+        fitting.fitting_net_variables = cls.deserialize_network(
+            data["nets"],
+            suffix=suffix,
+        )
+        return fitting
+
 
 class GlobalPolarFittingSeA:
     r"""Fit the system polarizability with descriptor se_a.
diff --git a/deepmd/tf/infer/__init__.py b/deepmd/tf/infer/__init__.py
new file mode 100644
index 0000000000..9ef9c0d348
--- /dev/null
+++ b/deepmd/tf/infer/__init__.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Submodule containing all the implemented potentials."""
+
+from typing import (
+    TYPE_CHECKING,
+)
+
+from deepmd.infer import (
+    DeepPotential,
+)
+
+from .data_modifier import (
+    DipoleChargeModifier,
+)
+from .deep_dipole import (
+    DeepDipole,
+)
+from .deep_dos import (
+    DeepDOS,
+)
+from .deep_eval import (
+    DeepEval,
+)
+from .deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from .deep_pot import (
+    DeepPot,
+)
+from .deep_wfc import (
+    DeepWFC,
+)
+from .ewald_recp import (
+    EwaldRecp,
+)
+from .model_devi import (
+    calc_model_devi,
+)
+
+if TYPE_CHECKING:
+    from deepmd.infer.deep_eval import (
+        DeepEval,
+    )
+
+__all__ = [
+    "DeepPotential",
+    "DeepDipole",
+    "DeepEval",
+    "DeepGlobalPolar",
+    "DeepPolar",
+    "DeepPot",
+    "DeepDOS",
+    "DeepWFC",
+    "DipoleChargeModifier",
+    "EwaldRecp",
+    "calc_model_devi",
+]
diff --git a/deepmd/infer/data_modifier.py b/deepmd/tf/infer/data_modifier.py
similarity index 98%
rename from deepmd/infer/data_modifier.py
rename to deepmd/tf/infer/data_modifier.py
index 62c4b879e9..ccd072673d 100644
--- a/deepmd/infer/data_modifier.py
+++ b/deepmd/tf/infer/data_modifier.py
@@ -7,26 +7,24 @@
 
 import numpy as np
 
-import deepmd.op  # noqa: F401
-from deepmd.common import (
+import deepmd.tf.op  # noqa: F401
+from deepmd.tf.common import (
     make_default_mesh,
     select_idx_map,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
-from deepmd.infer.deep_dipole import (
-    DeepDipole,
-)
-from deepmd.infer.ewald_recp import (
+from deepmd.tf.infer.deep_dipole import DeepDipoleOld as DeepDipole
+from deepmd.tf.infer.ewald_recp import (
     EwaldRecp,
 )
-from deepmd.utils.data import (
+from deepmd.tf.utils.data import (
     DeepmdData,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/tf/infer/deep_dipole.py b/deepmd/tf/infer/deep_dipole.py
new file mode 100644
index 0000000000..e10d09564d
--- /dev/null
+++ b/deepmd/tf/infer/deep_dipole.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from pathlib import (
+    Path,
+)
+from typing import (
+    Optional,
+)
+
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.tf.infer.deep_tensor import (
+    DeepTensor,
+)
+
+__all__ = [
+    "DeepDipole",
+]
+
+
+class DeepDipoleOld(DeepTensor):
+    # used for DipoleChargeModifier only
+    """Constructor.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    load_prefix: str
+        The prefix in the load computational graph
+    default_tf_graph : bool
+        If uses the default tf graph, otherwise build a new tf graph for evaluation
+    input_map : dict, optional
+        The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
+
+    Warnings
+    --------
+    For developers: `DeepTensor` initializer must be called at the end after
+    `self.tensors` are modified because it uses the data in `self.tensors` dict.
+    Do not chanage the order!
+    """
+
+    def __init__(
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        input_map: Optional[dict] = None,
+        neighbor_list=None,
+    ) -> None:
+        # use this in favor of dict update to move attribute from class to
+        # instance namespace
+        self.tensors = dict(
+            {
+                # output tensor
+                "t_tensor": "o_dipole:0",
+            },
+            **self.tensors,
+        )
+
+        DeepTensor.__init__(
+            self,
+            model_file,
+            load_prefix=load_prefix,
+            default_tf_graph=default_tf_graph,
+            input_map=input_map,
+            neighbor_list=neighbor_list,
+        )
+
+    def get_dim_fparam(self) -> int:
+        """Unsupported in this model."""
+        raise NotImplementedError("This model type does not support this attribute")
+
+    def get_dim_aparam(self) -> int:
+        """Unsupported in this model."""
+        raise NotImplementedError("This model type does not support this attribute")
diff --git a/deepmd/tf/infer/deep_dos.py b/deepmd/tf/infer/deep_dos.py
new file mode 100644
index 0000000000..7a9f9b781c
--- /dev/null
+++ b/deepmd/tf/infer/deep_dos.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+
+__all__ = [
+    "DeepDOS",
+]
diff --git a/deepmd/tf/infer/deep_eval.py b/deepmd/tf/infer/deep_eval.py
new file mode 100644
index 0000000000..ccbd44cf97
--- /dev/null
+++ b/deepmd/tf/infer/deep_eval.py
@@ -0,0 +1,1547 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from functools import (
+    lru_cache,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.dpmodel.output_def import (
+    ModelOutputDef,
+    OutputVariableCategory,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
+from deepmd.infer.deep_eval import (
+    DeepEvalBackend,
+)
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+from deepmd.tf.env import (
+    MODEL_VERSION,
+    default_tf_session_config,
+    tf,
+)
+from deepmd.tf.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.tf.utils.sess import (
+    run_sess,
+)
+
+if TYPE_CHECKING:
+    from pathlib import (
+        Path,
+    )
+
+    from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
+
+
+class DeepEval(DeepEvalBackend):
+    """TensorFlow backend implementation for DeepEval.
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    output_def : ModelOutputDef
+        The output definition of the model.
+    *args : list
+        Positional arguments.
+    load_prefix: str
+        The prefix in the load computational graph
+    default_tf_graph : bool
+        If uses the default tf graph, otherwise build a new tf graph for evaluation
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    input_map : dict, optional
+        The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    **kwargs : dict
+        Keyword arguments.
+    """
+
+    def __init__(
+        self,
+        model_file: "Path",
+        output_def: ModelOutputDef,
+        *args: list,
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = False,
+        input_map: Optional[dict] = None,
+        neighbor_list=None,
+        **kwargs: dict,
+    ):
+        self.graph = self._load_graph(
+            model_file,
+            prefix=load_prefix,
+            default_tf_graph=default_tf_graph,
+            input_map=input_map,
+        )
+        self.load_prefix = load_prefix
+
+        # graph_compatable should be called after graph and prefix are set
+        if not self._graph_compatable():
+            raise RuntimeError(
+                f"model in graph (version {self.model_version}) is incompatible"
+                f"with the model (version {MODEL_VERSION}) supported by the current code."
+                "See https://deepmd.rtfd.io/compatability/ for details."
+            )
+
+        # set default to False, as subclasses may not support
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+
+        self.neighbor_list = neighbor_list
+
+        self.output_def = output_def
+        self._init_tensors()
+        self._init_attr()
+        self.has_efield = self.tensors["efield"] is not None
+        self.has_fparam = self.tensors["fparam"] is not None
+        self.has_aparam = self.tensors["aparam"] is not None
+        self.has_spin = self.ntypes_spin > 0
+        self.modifier_type = None
+
+        # looks ugly...
+        if self.modifier_type == "dipole_charge":
+            from deepmd.tf.infer.data_modifier import (
+                DipoleChargeModifier,
+            )
+
+            t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0")
+            t_mdl_charge_map = self._get_tensor("modifier_attr/mdl_charge_map:0")
+            t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0")
+            t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0")
+            t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0")
+            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess(
+                self.sess,
+                [
+                    t_mdl_name,
+                    t_mdl_charge_map,
+                    t_sys_charge_map,
+                    t_ewald_h,
+                    t_ewald_beta,
+                ],
+            )
+            mdl_name = mdl_name.decode("UTF-8")
+            mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()]
+            sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()]
+            self.dm = DipoleChargeModifier(
+                mdl_name,
+                mdl_charge_map,
+                sys_charge_map,
+                ewald_h=ewald_h,
+                ewald_beta=ewald_beta,
+            )
+
+    def _init_tensors(self):
+        tensor_names = {
+            # descrpt attrs
+            "ntypes": "descrpt_attr/ntypes:0",
+            "rcut": "descrpt_attr/rcut:0",
+            # model attrs
+            "tmap": "model_attr/tmap:0",
+            # inputs
+            "coord": "t_coord:0",
+            "type": "t_type:0",
+            "natoms": "t_natoms:0",
+            "box": "t_box:0",
+            "mesh": "t_mesh:0",
+        }
+        optional_tensor_names = {
+            # fitting attrs
+            "dfparam": "fitting_attr/dfparam:0",
+            "daparam": "fitting_attr/daparam:0",
+            "numb_dos": "fitting_attr/numb_dos:0",
+            # model attrs
+            "sel_type": "model_attr/sel_type:0",
+            # additonal inputs
+            "efield": "t_efield:0",
+            "fparam": "t_fparam:0",
+            "aparam": "t_aparam:0",
+            "ntypes_spin": "spin_attr/ntypes_spin:0",
+            # descriptor
+            "descriptor": "o_descriptor:0",
+        }
+        # output tensors
+        output_tensor_names = {}
+        for vv in self.output_def.var_defs:
+            output_tensor_names[vv] = f"o_{self._OUTDEF_DP2BACKEND[vv]}:0"
+
+        self.tensors = {}
+        for tensor_key, tensor_name in tensor_names.items():
+            self.tensors[tensor_key] = self._get_tensor(tensor_name)
+        for tensor_key, tensor_name in optional_tensor_names.items():
+            try:
+                self.tensors[tensor_key] = self._get_tensor(tensor_name)
+            except KeyError:
+                self.tensors[tensor_key] = None
+        self.output_tensors = {}
+        removed_defs = []
+        for ii, (tensor_key, tensor_name) in enumerate(output_tensor_names.items()):
+            try:
+                self.output_tensors[tensor_key] = self._get_tensor(tensor_name)
+            except KeyError:
+                # do not output
+                removed_defs.append(ii)
+        for ii in sorted(removed_defs, reverse=True):
+            del self.output_def.var_defs[list(self.output_def.var_defs.keys())[ii]]
+
+    def _init_attr(self):
+        [
+            self.ntypes,
+            self.rcut,
+            tmap,
+        ] = run_sess(
+            self.sess,
+            [
+                self.tensors["ntypes"],
+                self.tensors["rcut"],
+                self.tensors["tmap"],
+            ],
+        )
+        if self.tensors["ntypes_spin"] is not None:
+            self.ntypes_spin = run_sess(self.sess, [self.tensors["ntypes_spin"]])[0]
+        else:
+            self.ntypes_spin = 0
+        if self.tensors["dfparam"] is not None:
+            self.dfparam = run_sess(self.sess, [self.tensors["dfparam"]])[0]
+        else:
+            self.dfparam = 0
+        if self.tensors["daparam"] is not None:
+            self.daparam = run_sess(self.sess, [self.tensors["daparam"]])[0]
+        else:
+            self.daparam = 0
+        if self.tensors["sel_type"] is not None:
+            self.sel_type = run_sess(self.sess, [self.tensors["sel_type"]])[0]
+        else:
+            self.sel_type = None
+        if self.tensors["numb_dos"] is not None:
+            self.numb_dos = run_sess(self.sess, [self.tensors["numb_dos"]])[0]
+        else:
+            self.numb_dos = 0
+        self.tmap = tmap.decode("utf-8").split()
+
+    @property
+    @lru_cache(maxsize=None)
+    def model_type(self) -> "DeepEvalWrapper":
+        """Get type of model.
+
+        :type:str
+        """
+        t_mt = self._get_tensor("model_attr/model_type:0")
+        [mt] = run_sess(self.sess, [t_mt], feed_dict={})
+        model_type = mt.decode("utf-8")
+        if model_type == "ener":
+            return DeepPot
+        elif model_type == "dos":
+            return DeepDOS
+        elif model_type == "dipole":
+            return DeepDipole
+        elif model_type == "polar":
+            return DeepPolar
+        elif model_type == "global_polar":
+            return DeepGlobalPolar
+        elif model_type == "wfc":
+            return DeepWFC
+        else:
+            raise RuntimeError(f"unknown model type {model_type}")
+
+    @property
+    @lru_cache(maxsize=None)
+    def model_version(self) -> str:
+        """Get version of model.
+
+        Returns
+        -------
+        str
+            version of model
+        """
+        try:
+            t_mt = self._get_tensor("model_attr/model_version:0")
+        except KeyError:
+            # For deepmd-kit version 0.x - 1.x, set model version to 0.0
+            return "0.0"
+        else:
+            [mt] = run_sess(self.sess, [t_mt], feed_dict={})
+            return mt.decode("utf-8")
+
+    @property
+    @lru_cache(maxsize=None)
+    def sess(self) -> tf.Session:
+        """Get TF session."""
+        # start a tf session associated to the graph
+        return tf.Session(graph=self.graph, config=default_tf_session_config)
+
+    def _graph_compatable(self) -> bool:
+        """Check the model compatability.
+
+        Returns
+        -------
+        bool
+            If the model stored in the graph file is compatable with the current code
+        """
+        model_version_major = int(self.model_version.split(".")[0])
+        model_version_minor = int(self.model_version.split(".")[1])
+        MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0])
+        MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1])
+        if (model_version_major != MODEL_VERSION_MAJOR) or (
+            model_version_minor > MODEL_VERSION_MINOR
+        ):
+            return False
+        else:
+            return True
+
+    def _get_tensor(
+        self,
+        tensor_name: str,
+    ) -> tf.Tensor:
+        """Get TF graph tensor.
+
+        Parameters
+        ----------
+        tensor_name : str
+            name of tensor to get
+
+        Returns
+        -------
+        tf.Tensor
+            loaded tensor
+        """
+        # do not use os.path.join as it doesn't work on Windows
+        tensor_path = "/".join((self.load_prefix, tensor_name))
+        tensor = self.graph.get_tensor_by_name(tensor_path)
+        return tensor
+
+    @staticmethod
+    def _load_graph(
+        frozen_graph_filename: "Path",
+        prefix: str = "load",
+        default_tf_graph: bool = False,
+        input_map: Optional[dict] = None,
+    ):
+        # We load the protobuf file from the disk and parse it to retrieve the
+        # unserialized graph_def
+        with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+
+            if default_tf_graph:
+                tf.import_graph_def(
+                    graph_def,
+                    input_map=input_map,
+                    return_elements=None,
+                    name=prefix,
+                    producer_op_list=None,
+                )
+                graph = tf.get_default_graph()
+            else:
+                # Then, we can use again a convenient built-in function to import
+                # a graph_def into the  current default Graph
+                with tf.Graph().as_default() as graph:
+                    tf.import_graph_def(
+                        graph_def,
+                        input_map=None,
+                        return_elements=None,
+                        name=prefix,
+                        producer_op_list=None,
+                    )
+
+            return graph
+
+    @staticmethod
+    def sort_input(
+        coord: np.ndarray,
+        atom_type: np.ndarray,
+        sel_atoms: Optional[List[int]] = None,
+    ):
+        """Sort atoms in the system according their types.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+            Should be of shape [nframes, natoms, 3]
+        atom_type
+            The type of atoms
+            Should be of shape [natoms]
+        sel_atoms
+            The selected atoms by type
+
+        Returns
+        -------
+        coord_out
+            The coordinates after sorting
+        atom_type_out
+            The atom types after sorting
+        idx_map
+            The index mapping from the input to the output.
+            For example coord_out = coord[:,idx_map,:]
+        sel_atom_type
+            Only output if sel_atoms is not None
+            The sorted selected atom types
+        sel_idx_map
+            Only output if sel_atoms is not None
+            The index mapping from the selected atoms to sorted selected atoms.
+        """
+        natoms = atom_type.shape[1]
+        if sel_atoms is not None:
+            selection = np.array([False] * natoms, dtype=bool)
+            for ii in sel_atoms:
+                selection += atom_type[0] == ii
+            sel_atom_type = atom_type[:, selection]
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, atom_type[0]))
+        nframes = coord.shape[0]
+        coord = coord.reshape([nframes, -1, 3])
+        coord = np.reshape(coord[:, idx_map, :], [nframes, -1])
+        atom_type = atom_type[:, idx_map]
+        if sel_atoms is not None:
+            sel_natoms = sel_atom_type.shape[1]
+            sel_idx = np.arange(sel_natoms)
+            sel_idx_map = np.lexsort((sel_idx, sel_atom_type[0]))
+            sel_atom_type = sel_atom_type[:, sel_idx_map]
+            return coord, atom_type, idx_map, sel_atom_type, sel_idx_map
+        else:
+            return coord, atom_type, idx_map, atom_type, idx_map
+
+    @staticmethod
+    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
+        """Reverse mapping of a vector according to the index map.
+
+        Parameters
+        ----------
+        vec
+            Input vector. Be of shape [nframes, natoms, -1]
+        imap
+            Index map. Be of shape [natoms]
+
+        Returns
+        -------
+        vec_out
+            Reverse mapped vector.
+        """
+        ret = np.zeros(vec.shape)
+        ret[:, imap, :] = vec
+        return ret
+
+    def make_natoms_vec(
+        self,
+        atom_types: np.ndarray,
+    ) -> np.ndarray:
+        """Make the natom vector used by deepmd-kit.
+
+        Parameters
+        ----------
+        atom_types
+            The type of atoms
+
+        Returns
+        -------
+        natoms
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+
+        """
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        natoms = atom_types[0].size
+        natoms_vec[0] = natoms
+        natoms_vec[1] = natoms
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atom_types[0] == ii)
+        if np.count_nonzero(atom_types[0] == -1) > 0:
+            # contains virtual atoms
+            # energy fitting sums over natoms_vec[2:] instead of reading from natoms_vec[0]
+            # causing errors for shape mismatch
+            natoms_vec[2] += np.count_nonzero(atom_types[0] == -1)
+        return natoms_vec
+
+    def eval_typeebd(self) -> np.ndarray:
+        """Evaluate output of type embedding network by using this model.
+
+        Returns
+        -------
+        np.ndarray
+            The output of type embedding network. The shape is [ntypes, o_size],
+            where ntypes is the number of types, and o_size is the number of nodes
+            in the output layer.
+
+        Raises
+        ------
+        KeyError
+            If the model does not enable type embedding.
+
+        See Also
+        --------
+        deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network.
+
+        Examples
+        --------
+        Get the output of type embedding network of `graph.pb`:
+
+        >>> from deepmd.tf.infer import DeepPotential
+        >>> dp = DeepPotential("graph.pb")
+        >>> dp.eval_typeebd()
+        """
+        t_typeebd = self._get_tensor("t_typeebd:0")
+        [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={})
+        return typeebd
+
+    def build_neighbor_list(
+        self,
+        coords: np.ndarray,
+        cell: Optional[np.ndarray],
+        atype: np.ndarray,
+        imap: np.ndarray,
+        neighbor_list,
+    ):
+        """Make the mesh with neighbor list for a single frame.
+
+        Parameters
+        ----------
+        coords : np.ndarray
+            The coordinates of atoms. Should be of shape [natoms, 3]
+        cell : Optional[np.ndarray]
+            The cell of the system. Should be of shape [3, 3]
+        atype : np.ndarray
+            The type of atoms. Should be of shape [natoms]
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [natoms]
+        neighbor_list : ase.neighborlist.NewPrimitiveNeighborList
+            ASE neighbor list. The following method or attribute will be
+            used/set: bothways, self_interaction, update, build, first_neigh,
+            pair_second, offset_vec.
+
+        Returns
+        -------
+        natoms_vec : np.ndarray
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: nloc
+            natoms[1]: nall
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc
+        coords : np.ndarray
+            The coordinates of atoms, including ghost atoms. Should be of
+            shape [nframes, nall, 3]
+        atype : np.ndarray
+            The type of atoms, including ghost atoms. Should be of shape [nall]
+        mesh : np.ndarray
+            The mesh in nei_mode=4.
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [nall]
+        ghost_map : np.ndarray
+            The index map of ghost atoms. Should be of shape [nghost]
+        """
+        pbc = np.repeat(cell is not None, 3)
+        cell = cell.reshape(3, 3)
+        positions = coords.reshape(-1, 3)
+        neighbor_list.bothways = True
+        neighbor_list.self_interaction = False
+        if neighbor_list.update(pbc, cell, positions):
+            neighbor_list.build(pbc, cell, positions)
+        first_neigh = neighbor_list.first_neigh.copy()
+        pair_second = neighbor_list.pair_second.copy()
+        offset_vec = neighbor_list.offset_vec.copy()
+        # get out-of-box neighbors
+        out_mask = np.any(offset_vec != 0, axis=1)
+        out_idx = pair_second[out_mask]
+        out_offset = offset_vec[out_mask]
+        out_coords = positions[out_idx] + out_offset.dot(cell)
+        atype = np.array(atype, dtype=int).reshape(-1)
+        out_atype = atype[out_idx]
+
+        nloc = positions.shape[0]
+        nghost = out_idx.size
+        all_coords = np.concatenate((positions, out_coords), axis=0)
+        all_atype = np.concatenate((atype, out_atype), axis=0)
+        # convert neighbor indexes
+        ghost_map = pair_second[out_mask]
+        pair_second[out_mask] = np.arange(nloc, nloc + nghost)
+        # get the mesh
+        mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int)
+        mesh[0] = nloc
+        # ilist
+        mesh[16 : 16 + nloc] = np.arange(nloc)
+        # numnei
+        mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1]
+        # jlist
+        mesh[16 + nloc * 2 :] = pair_second
+
+        # natoms_vec
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        natoms_vec[0] = nloc
+        natoms_vec[1] = nloc + nghost
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atype == ii)
+        # imap append ghost atoms
+        imap = np.concatenate((imap, np.arange(nloc, nloc + nghost)))
+        return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return self.ntypes
+
+    def get_ntypes_spin(self) -> int:
+        """Get the number of spin atom types of this model."""
+        return self.ntypes_spin
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius of this model."""
+        return self.rcut
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.tmap
+
+    def get_sel_type(self) -> Optional[np.ndarray]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return np.array(self.sel_type).ravel()
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dfparam
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.daparam
+
+    def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
+        """Wrapper method with auto batch size.
+
+        Parameters
+        ----------
+        inner_func : Callable
+            the method to be wrapped
+        numb_test : int
+            number of tests
+        natoms : int
+            number of atoms
+
+        Returns
+        -------
+        Callable
+            the wrapper
+        """
+        if self.auto_batch_size is not None:
+
+            def eval_func(*args, **kwargs):
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
+        else:
+            eval_func = inner_func
+        return eval_func
+
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: Union[List[int], np.ndarray],
+    ) -> Tuple[int, int]:
+        natoms = len(atom_types[0])
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        return natoms, nframes
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        atomic: bool = False,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+        **kwargs: Dict[str, Any],
+    ) -> Dict[str, np.ndarray]:
+        """Evaluate the energy, force and virial by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Calculate the atomic energy and virial
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        efield
+            The external field on atoms.
+            The array should be of size nframes x natoms x 3
+        **kwargs
+            Other parameters
+
+        Returns
+        -------
+        output_dict : dict
+            The output of the evaluation. The keys are the names of the output
+            variables, and the values are the corresponding output arrays.
+        """
+        # reshape coords before getting shape
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords,
+            atom_types,
+        )
+        output = self._eval_func(self._eval_inner, numb_test, natoms)(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            atomic=atomic,
+            efield=efield,
+        )
+        if not isinstance(output, tuple):
+            output = (output,)
+
+        output_dict = {
+            odef.name: oo for oo, odef in zip(output, self.output_def.var_defs.values())
+        }
+        # ugly!!
+        if self.modifier_type is not None and isinstance(self.model_type, DeepPot):
+            if atomic:
+                raise RuntimeError("modifier does not support atomic modification")
+            me, mf, mv = self.dm.eval(coords, cells, atom_types)
+            output = list(output)  # tuple to list
+            e, f, v = output[:3]
+            output_dict["energy_redu"] += me.reshape(e.shape)
+            output_dict["energy_deri_r"] += mf.reshape(f.shape)
+            output_dict["energy_deri_c_redu"] += mv.reshape(v.shape)
+        return output_dict
+
+    def _prepare_feed_dict(
+        self,
+        coords,
+        cells,
+        atom_types,
+        fparam=None,
+        aparam=None,
+        efield=None,
+    ):
+        # standarize the shape of inputs
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords,
+            atom_types,
+        )
+        atom_types = np.array(atom_types, dtype=int).reshape([nframes, natoms])
+        coords = np.reshape(np.array(coords), [nframes, natoms * 3])
+        if cells is None:
+            pbc = False
+            # make cells to work around the requirement of pbc
+            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
+        else:
+            pbc = True
+            cells = np.array(cells).reshape([nframes, 9])
+
+        if self.has_fparam:
+            assert fparam is not None
+            fparam = np.array(fparam)
+        if self.has_aparam:
+            assert aparam is not None
+            aparam = np.array(aparam)
+        if self.has_efield:
+            assert (
+                efield is not None
+            ), "you are using a model with external field, parameter efield should be provided"
+            efield = np.array(efield)
+
+        # reshape the inputs
+        if self.has_fparam:
+            fdim = self.get_dim_fparam()
+            if fparam.size == nframes * fdim:
+                fparam = np.reshape(fparam, [nframes, fdim])
+            elif fparam.size == fdim:
+                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d or %d"
+                    % (nframes, fdim, fdim)
+                )
+        if self.has_aparam:
+            fdim = self.get_dim_aparam()
+            if aparam.size == nframes * natoms * fdim:
+                aparam = np.reshape(aparam, [nframes, natoms * fdim])
+            elif aparam.size == natoms * fdim:
+                aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
+            elif aparam.size == fdim:
+                aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d"
+                    % (nframes, natoms, fdim, natoms, fdim, fdim)
+                )
+
+        # sort inputs
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
+            coords, atom_types, sel_atoms=self.get_sel_type()
+        )
+        if self.has_efield:
+            efield = np.reshape(efield, [nframes, natoms, 3])
+            efield = efield[:, imap, :]
+            efield = np.reshape(efield, [nframes, natoms * 3])
+        if self.has_aparam:
+            aparam = np.reshape(aparam, [nframes, natoms, fdim])
+            aparam = aparam[:, imap, :]
+            aparam = np.reshape(aparam, [nframes, natoms * fdim])
+
+        # make natoms_vec and default_mesh
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, not self._check_mixed_types(atom_types))
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
+
+        # evaluate
+        feed_dict_test = {}
+        feed_dict_test[self.tensors["natoms"]] = natoms_vec
+        feed_dict_test[self.tensors["type"]] = atom_types.reshape([-1])
+        feed_dict_test[self.tensors["coord"]] = np.reshape(coords, [-1])
+
+        if len(self.tensors["box"].shape) == 1:
+            feed_dict_test[self.tensors["box"]] = np.reshape(cells, [-1])
+        elif len(self.tensors["box"].shape) == 2:
+            feed_dict_test[self.tensors["box"]] = cells
+        else:
+            raise RuntimeError
+        if self.has_efield:
+            feed_dict_test[self.tensors["efield"]] = np.reshape(efield, [-1])
+        feed_dict_test[self.tensors["mesh"]] = mesh
+        if self.has_fparam:
+            feed_dict_test[self.tensors["fparam"]] = np.reshape(fparam, [-1])
+        if self.has_aparam:
+            feed_dict_test[self.tensors["aparam"]] = np.reshape(aparam, [-1])
+        return feed_dict_test, imap, natoms_vec, ghost_map, sel_at, sel_imap
+
+    def _eval_inner(
+        self,
+        coords,
+        cells,
+        atom_types,
+        fparam=None,
+        aparam=None,
+        efield=None,
+        **kwargs,
+    ):
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords,
+            atom_types,
+        )
+        (
+            feed_dict_test,
+            imap,
+            natoms_vec,
+            ghost_map,
+            sel_at,
+            sel_imap,
+        ) = self._prepare_feed_dict(
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            efield,
+        )
+
+        nloc = natoms_vec[0]
+        nloc_sel = sel_at.shape[1]
+        nall = natoms_vec[1]
+
+        t_out = list(self.output_tensors.values())
+
+        v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test)
+
+        if nloc_sel == 0:
+            nloc_sel = nloc
+            sel_imap = imap
+        if self.has_spin:
+            ntypes_real = self.ntypes - self.ntypes_spin
+            natoms_real = sum(
+                [
+                    np.count_nonzero(np.array(atom_types[0]) == ii)
+                    for ii in range(ntypes_real)
+                ]
+            )
+        else:
+            natoms_real = nloc_sel
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            for ii, odef in enumerate(self.output_def.var_defs.values()):
+                # when the shape is nall
+                if odef.category in (
+                    OutputVariableCategory.DERV_R,
+                    OutputVariableCategory.DERV_C,
+                ):
+                    odef_shape = self._get_output_shape(odef, nframes, nall)
+                    tmp_shape = [np.prod(odef_shape[:-2]), *odef_shape[-2:]]
+                    v_out[ii] = np.reshape(v_out[ii], tmp_shape)
+                    for jj in range(v_out[ii].shape[0]):
+                        np.add.at(v_out[ii][jj], ghost_map, v_out[ii][jj, nloc:])
+
+        for ii, odef in enumerate(self.output_def.var_defs.values()):
+            if odef.category in (
+                OutputVariableCategory.DERV_R,
+                OutputVariableCategory.DERV_C,
+            ):
+                odef_shape = self._get_output_shape(odef, nframes, nall)
+                tmp_shape = [np.prod(odef_shape[:-2]), *odef_shape[-2:]]
+                # reverse map of the outputs
+                v_out[ii] = self.reverse_map(np.reshape(v_out[ii], tmp_shape), imap)
+                v_out[ii] = np.reshape(v_out[ii], odef_shape)
+                if nloc < nall:
+                    v_out[ii] = v_out[ii][:, :, :nloc]
+            elif odef.category == OutputVariableCategory.OUT:
+                odef_shape = self._get_output_shape(odef, nframes, natoms_real)
+                v_out[ii] = self.reverse_map(
+                    np.reshape(v_out[ii], odef_shape), sel_imap[:natoms_real]
+                )
+                if nloc_sel < nloc:
+                    # convert shape from nsel to nloc
+                    # sel_atoms was applied before sort; see sort_input
+                    # do not consider mixed_types here (as it is never supported)
+                    sel_mask = np.isin(atom_types[0], self.sel_type)
+                    out_nsel = v_out[ii]
+                    out_nloc = np.zeros(
+                        (nframes, nloc, *out_nsel.shape[2:]), dtype=out_nsel.dtype
+                    )
+                    out_nloc[:, sel_mask] = out_nsel
+                    v_out[ii] = out_nloc
+                    odef_shape = self._get_output_shape(odef, nframes, nloc)
+                v_out[ii] = np.reshape(v_out[ii], odef_shape)
+            elif odef.category in (
+                OutputVariableCategory.REDU,
+                OutputVariableCategory.DERV_C_REDU,
+            ):
+                odef_shape = self._get_output_shape(odef, nframes, 0)
+                v_out[ii] = np.reshape(v_out[ii], odef_shape)
+            else:
+                raise RuntimeError("unknown category")
+        return tuple(v_out)
+
+    def _get_output_shape(self, odef, nframes, natoms):
+        if odef.category == OutputVariableCategory.DERV_C_REDU:
+            # virial
+            return [nframes, *odef.shape[:-1], 9]
+        elif odef.category == OutputVariableCategory.REDU:
+            # energy
+            return [nframes, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_C:
+            # atom_virial
+            return [nframes, *odef.shape[:-1], natoms, 9]
+        elif odef.category == OutputVariableCategory.DERV_R:
+            # force
+            return [nframes, *odef.shape[:-1], natoms, 3]
+        elif odef.category == OutputVariableCategory.OUT:
+            # atom_energy, atom_tensor
+            # Something wrong here?
+            # return [nframes, *shape, natoms, 1]
+            return [nframes, natoms, *odef.shape, 1]
+        else:
+            raise RuntimeError("unknown category")
+
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
+        """Evaluate descriptors by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        efield
+            The external field on atoms.
+            The array should be of size nframes x natoms x 3
+
+        Returns
+        -------
+        descriptor
+            Descriptors.
+        """
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords,
+            atom_types,
+        )
+        descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            efield=efield,
+        )
+        return descriptor
+
+    def _eval_descriptor_inner(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords,
+            atom_types,
+        )
+        (
+            feed_dict_test,
+            imap,
+            natoms_vec,
+            ghost_map,
+            sel_at,
+            sel_imap,
+        ) = self._prepare_feed_dict(
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            efield,
+        )
+        (descriptor,) = run_sess(
+            self.sess, [self.tensors["descriptor"]], feed_dict=feed_dict_test
+        )
+        imap = imap[:natoms]
+        return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
+
+    def get_numb_dos(self) -> int:
+        return self.numb_dos
+
+    def get_has_efield(self) -> bool:
+        return self.has_efield
+
+
+class DeepEvalOld:
+    # old class for DipoleChargeModifier only
+    """Common methods for DeepPot, DeepWFC, DeepPolar, ...
+
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    load_prefix: str
+        The prefix in the load computational graph
+    default_tf_graph : bool
+        If uses the default tf graph, otherwise build a new tf graph for evaluation
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    input_map : dict, optional
+        The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NewPrimitiveNeighborList, optional
+        The ASE neighbor list class to produce the neighbor list. If None, the
+        neighbor list will be built natively in the model.
+    """
+
+    load_prefix: str  # set by subclass
+
+    def __init__(
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = False,
+        input_map: Optional[dict] = None,
+        neighbor_list=None,
+    ):
+        self.graph = self._load_graph(
+            model_file,
+            prefix=load_prefix,
+            default_tf_graph=default_tf_graph,
+            input_map=input_map,
+        )
+        self.load_prefix = load_prefix
+
+        # graph_compatable should be called after graph and prefix are set
+        if not self._graph_compatable():
+            raise RuntimeError(
+                f"model in graph (version {self.model_version}) is incompatible"
+                f"with the model (version {MODEL_VERSION}) supported by the current code."
+                "See https://deepmd.rtfd.io/compatability/ for details."
+            )
+
+        # set default to False, as subclasses may not support
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
+
+        self.neighbor_list = neighbor_list
+
+    @property
+    @lru_cache(maxsize=None)
+    def model_type(self) -> str:
+        """Get type of model.
+
+        :type:str
+        """
+        t_mt = self._get_tensor("model_attr/model_type:0")
+        [mt] = run_sess(self.sess, [t_mt], feed_dict={})
+        return mt.decode("utf-8")
+
+    @property
+    @lru_cache(maxsize=None)
+    def model_version(self) -> str:
+        """Get version of model.
+
+        Returns
+        -------
+        str
+            version of model
+        """
+        try:
+            t_mt = self._get_tensor("model_attr/model_version:0")
+        except KeyError:
+            # For deepmd-kit version 0.x - 1.x, set model version to 0.0
+            return "0.0"
+        else:
+            [mt] = run_sess(self.sess, [t_mt], feed_dict={})
+            return mt.decode("utf-8")
+
+    @property
+    @lru_cache(maxsize=None)
+    def sess(self) -> tf.Session:
+        """Get TF session."""
+        # start a tf session associated to the graph
+        return tf.Session(graph=self.graph, config=default_tf_session_config)
+
+    def _graph_compatable(self) -> bool:
+        """Check the model compatability.
+
+        Returns
+        -------
+        bool
+            If the model stored in the graph file is compatable with the current code
+        """
+        model_version_major = int(self.model_version.split(".")[0])
+        model_version_minor = int(self.model_version.split(".")[1])
+        MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0])
+        MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1])
+        if (model_version_major != MODEL_VERSION_MAJOR) or (
+            model_version_minor > MODEL_VERSION_MINOR
+        ):
+            return False
+        else:
+            return True
+
+    def _get_tensor(
+        self, tensor_name: str, attr_name: Optional[str] = None
+    ) -> tf.Tensor:
+        """Get TF graph tensor and assign it to class namespace.
+
+        Parameters
+        ----------
+        tensor_name : str
+            name of tensor to get
+        attr_name : Optional[str], optional
+            if specified, class attribute with this name will be created and tensor will
+            be assigned to it, by default None
+
+        Returns
+        -------
+        tf.Tensor
+            loaded tensor
+        """
+        # do not use os.path.join as it doesn't work on Windows
+        tensor_path = "/".join((self.load_prefix, tensor_name))
+        tensor = self.graph.get_tensor_by_name(tensor_path)
+        if attr_name:
+            setattr(self, attr_name, tensor)
+            return tensor
+        else:
+            return tensor
+
+    @staticmethod
+    def _load_graph(
+        frozen_graph_filename: "Path",
+        prefix: str = "load",
+        default_tf_graph: bool = False,
+        input_map: Optional[dict] = None,
+    ):
+        # We load the protobuf file from the disk and parse it to retrieve the
+        # unserialized graph_def
+        with tf.gfile.GFile(str(frozen_graph_filename), "rb") as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+
+            if default_tf_graph:
+                tf.import_graph_def(
+                    graph_def,
+                    input_map=input_map,
+                    return_elements=None,
+                    name=prefix,
+                    producer_op_list=None,
+                )
+                graph = tf.get_default_graph()
+            else:
+                # Then, we can use again a convenient built-in function to import
+                # a graph_def into the  current default Graph
+                with tf.Graph().as_default() as graph:
+                    tf.import_graph_def(
+                        graph_def,
+                        input_map=None,
+                        return_elements=None,
+                        name=prefix,
+                        producer_op_list=None,
+                    )
+
+            return graph
+
+    @staticmethod
+    def sort_input(
+        coord: np.ndarray,
+        atom_type: np.ndarray,
+        sel_atoms: Optional[List[int]] = None,
+        mixed_type: bool = False,
+    ):
+        """Sort atoms in the system according their types.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+            Should be of shape [nframes, natoms, 3]
+        atom_type
+            The type of atoms
+            Should be of shape [natoms]
+        sel_atoms
+            The selected atoms by type
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
+
+        Returns
+        -------
+        coord_out
+            The coordinates after sorting
+        atom_type_out
+            The atom types after sorting
+        idx_map
+            The index mapping from the input to the output.
+            For example coord_out = coord[:,idx_map,:]
+        sel_atom_type
+            Only output if sel_atoms is not None
+            The sorted selected atom types
+        sel_idx_map
+            Only output if sel_atoms is not None
+            The index mapping from the selected atoms to sorted selected atoms.
+        """
+        if mixed_type:
+            # mixed_type need not to resort
+            natoms = atom_type[0].size
+            idx_map = np.arange(natoms)
+            return coord, atom_type, idx_map
+        if sel_atoms is not None:
+            selection = [False] * np.size(atom_type)
+            for ii in sel_atoms:
+                selection += atom_type == ii
+            sel_atom_type = atom_type[selection]
+        natoms = atom_type.size
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, atom_type))
+        nframes = coord.shape[0]
+        coord = coord.reshape([nframes, -1, 3])
+        coord = np.reshape(coord[:, idx_map, :], [nframes, -1])
+        atom_type = atom_type[idx_map]
+        if sel_atoms is not None:
+            sel_natoms = np.size(sel_atom_type)
+            sel_idx = np.arange(sel_natoms)
+            sel_idx_map = np.lexsort((sel_idx, sel_atom_type))
+            sel_atom_type = sel_atom_type[sel_idx_map]
+            return coord, atom_type, idx_map, sel_atom_type, sel_idx_map
+        else:
+            return coord, atom_type, idx_map
+
+    @staticmethod
+    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
+        """Reverse mapping of a vector according to the index map.
+
+        Parameters
+        ----------
+        vec
+            Input vector. Be of shape [nframes, natoms, -1]
+        imap
+            Index map. Be of shape [natoms]
+
+        Returns
+        -------
+        vec_out
+            Reverse mapped vector.
+        """
+        ret = np.zeros(vec.shape)
+        # for idx,ii in enumerate(imap) :
+        #     ret[:,ii,:] = vec[:,idx,:]
+        ret[:, imap, :] = vec
+        return ret
+
+    def make_natoms_vec(
+        self, atom_types: np.ndarray, mixed_type: bool = False
+    ) -> np.ndarray:
+        """Make the natom vector used by deepmd-kit.
+
+        Parameters
+        ----------
+        atom_types
+            The type of atoms
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
+
+        Returns
+        -------
+        natoms
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+
+        """
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        if mixed_type:
+            natoms = atom_types[0].size
+        else:
+            natoms = atom_types.size
+        natoms_vec[0] = natoms
+        natoms_vec[1] = natoms
+        if mixed_type:
+            natoms_vec[2] = natoms
+            return natoms_vec
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atom_types == ii)
+        return natoms_vec
+
+    def eval_typeebd(self) -> np.ndarray:
+        """Evaluate output of type embedding network by using this model.
+
+        Returns
+        -------
+        np.ndarray
+            The output of type embedding network. The shape is [ntypes, o_size],
+            where ntypes is the number of types, and o_size is the number of nodes
+            in the output layer.
+
+        Raises
+        ------
+        KeyError
+            If the model does not enable type embedding.
+
+        See Also
+        --------
+        deepmd.tf.utils.type_embed.TypeEmbedNet : The type embedding network.
+
+        Examples
+        --------
+        Get the output of type embedding network of `graph.pb`:
+
+        >>> from deepmd.tf.infer import DeepPotential
+        >>> dp = DeepPotential("graph.pb")
+        >>> dp.eval_typeebd()
+        """
+        t_typeebd = self._get_tensor("t_typeebd:0")
+        [typeebd] = run_sess(self.sess, [t_typeebd], feed_dict={})
+        return typeebd
+
+    def build_neighbor_list(
+        self,
+        coords: np.ndarray,
+        cell: Optional[np.ndarray],
+        atype: np.ndarray,
+        imap: np.ndarray,
+        neighbor_list,
+    ):
+        """Make the mesh with neighbor list for a single frame.
+
+        Parameters
+        ----------
+        coords : np.ndarray
+            The coordinates of atoms. Should be of shape [natoms, 3]
+        cell : Optional[np.ndarray]
+            The cell of the system. Should be of shape [3, 3]
+        atype : np.ndarray
+            The type of atoms. Should be of shape [natoms]
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [natoms]
+        neighbor_list : ase.neighborlist.NewPrimitiveNeighborList
+            ASE neighbor list. The following method or attribute will be
+            used/set: bothways, self_interaction, update, build, first_neigh,
+            pair_second, offset_vec.
+
+        Returns
+        -------
+        natoms_vec : np.ndarray
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: nloc
+            natoms[1]: nall
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms for nloc
+        coords : np.ndarray
+            The coordinates of atoms, including ghost atoms. Should be of
+            shape [nframes, nall, 3]
+        atype : np.ndarray
+            The type of atoms, including ghost atoms. Should be of shape [nall]
+        mesh : np.ndarray
+            The mesh in nei_mode=4.
+        imap : np.ndarray
+            The index map of atoms. Should be of shape [nall]
+        ghost_map : np.ndarray
+            The index map of ghost atoms. Should be of shape [nghost]
+        """
+        pbc = np.repeat(cell is not None, 3)
+        cell = cell.reshape(3, 3)
+        positions = coords.reshape(-1, 3)
+        neighbor_list.bothways = True
+        neighbor_list.self_interaction = False
+        if neighbor_list.update(pbc, cell, positions):
+            neighbor_list.build(pbc, cell, positions)
+        first_neigh = neighbor_list.first_neigh.copy()
+        pair_second = neighbor_list.pair_second.copy()
+        offset_vec = neighbor_list.offset_vec.copy()
+        # get out-of-box neighbors
+        out_mask = np.any(offset_vec != 0, axis=1)
+        out_idx = pair_second[out_mask]
+        out_offset = offset_vec[out_mask]
+        out_coords = positions[out_idx] + out_offset.dot(cell)
+        atype = np.array(atype, dtype=int)
+        out_atype = atype[out_idx]
+
+        nloc = positions.shape[0]
+        nghost = out_idx.size
+        all_coords = np.concatenate((positions, out_coords), axis=0)
+        all_atype = np.concatenate((atype, out_atype), axis=0)
+        # convert neighbor indexes
+        ghost_map = pair_second[out_mask]
+        pair_second[out_mask] = np.arange(nloc, nloc + nghost)
+        # get the mesh
+        mesh = np.zeros(16 + nloc * 2 + pair_second.size, dtype=int)
+        mesh[0] = nloc
+        # ilist
+        mesh[16 : 16 + nloc] = np.arange(nloc)
+        # numnei
+        mesh[16 + nloc : 16 + nloc * 2] = first_neigh[1:] - first_neigh[:-1]
+        # jlist
+        mesh[16 + nloc * 2 :] = pair_second
+
+        # natoms_vec
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
+        natoms_vec[0] = nloc
+        natoms_vec[1] = nloc + nghost
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atype == ii)
+        # imap append ghost atoms
+        imap = np.concatenate((imap, np.arange(nloc, nloc + nghost)))
+        return natoms_vec, all_coords, all_atype, mesh, imap, ghost_map
diff --git a/deepmd/tf/infer/deep_polar.py b/deepmd/tf/infer/deep_polar.py
new file mode 100644
index 0000000000..c3d42fd537
--- /dev/null
+++ b/deepmd/tf/infer/deep_polar.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+
+__all__ = [
+    "DeepPolar",
+    "DeepGlobalPolar",
+]
diff --git a/deepmd/tf/infer/deep_pot.py b/deepmd/tf/infer/deep_pot.py
new file mode 100644
index 0000000000..587a13996a
--- /dev/null
+++ b/deepmd/tf/infer/deep_pot.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.infer import (
+    DeepPot,
+)
+
+__all__ = ["DeepPot"]
diff --git a/deepmd/tf/infer/deep_tensor.py b/deepmd/tf/infer/deep_tensor.py
new file mode 100644
index 0000000000..59fdab7cd1
--- /dev/null
+++ b/deepmd/tf/infer/deep_tensor.py
@@ -0,0 +1,443 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.tf.common import (
+    make_default_mesh,
+)
+from deepmd.tf.infer.deep_eval import DeepEvalOld as DeepEval
+from deepmd.tf.utils.sess import (
+    run_sess,
+)
+
+if TYPE_CHECKING:
+    from pathlib import (
+        Path,
+    )
+
+
+class DeepTensor(DeepEval):
+    """Evaluates a tensor model.
+
+    Parameters
+    ----------
+    model_file: str
+        The name of the frozen model file.
+    load_prefix: str
+        The prefix in the load computational graph
+    default_tf_graph : bool
+        If uses the default tf graph, otherwise build a new tf graph for evaluation
+    input_map : dict, optional
+        The input map for tf.import_graph_def. Only work with default tf graph
+    neighbor_list : ase.neighborlist.NeighborList, optional
+        The neighbor list object. If None, then build the native neighbor list.
+    """
+
+    tensors: ClassVar[Dict[str, str]] = {
+        # descriptor attrs
+        "t_ntypes": "descrpt_attr/ntypes:0",
+        "t_rcut": "descrpt_attr/rcut:0",
+        # model attrs
+        "t_tmap": "model_attr/tmap:0",
+        "t_sel_type": "model_attr/sel_type:0",
+        "t_ouput_dim": "model_attr/output_dim:0",
+        # inputs
+        "t_coord": "t_coord:0",
+        "t_type": "t_type:0",
+        "t_natoms": "t_natoms:0",
+        "t_box": "t_box:0",
+        "t_mesh": "t_mesh:0",
+    }
+
+    def __init__(
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
+        input_map: Optional[dict] = None,
+        neighbor_list=None,
+    ) -> None:
+        """Constructor."""
+        DeepEval.__init__(
+            self,
+            model_file,
+            load_prefix=load_prefix,
+            default_tf_graph=default_tf_graph,
+            input_map=input_map,
+            neighbor_list=neighbor_list,
+        )
+        # check model type
+        model_type = self.tensors["t_tensor"][2:-2]
+        assert (
+            self.model_type == model_type
+        ), f"expect {model_type} model but got {self.model_type}"
+
+        # now load tensors to object attributes
+        for attr_name, tensor_name in self.tensors.items():
+            self._get_tensor(tensor_name, attr_name)
+
+        # load optional tensors if possible
+        optional_tensors = {
+            "t_global_tensor": f"o_global_{model_type}:0",
+            "t_force": "o_force:0",
+            "t_virial": "o_virial:0",
+            "t_atom_virial": "o_atom_virial:0",
+        }
+        try:
+            # first make sure these tensor all exists (but do not modify self attr)
+            for attr_name, tensor_name in optional_tensors.items():
+                self._get_tensor(tensor_name)
+            # then put those into self.attrs
+            for attr_name, tensor_name in optional_tensors.items():
+                self._get_tensor(tensor_name, attr_name)
+        except KeyError:
+            self._support_gfv = False
+        else:
+            self.tensors.update(optional_tensors)
+            self._support_gfv = True
+
+        self._run_default_sess()
+        self.tmap = self.tmap.decode("UTF-8").split()
+
+    def _run_default_sess(self):
+        [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] = run_sess(
+            self.sess,
+            [
+                self.t_ntypes,
+                self.t_rcut,
+                self.t_tmap,
+                self.t_sel_type,
+                self.t_ouput_dim,
+            ],
+        )
+
+    def get_ntypes(self) -> int:
+        """Get the number of atom types of this model."""
+        return self.ntypes
+
+    def get_rcut(self) -> float:
+        """Get the cut-off radius of this model."""
+        return self.rcut
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map (element name of the atom types) of this model."""
+        return self.tmap
+
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model."""
+        return self.tselt
+
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this DP."""
+        return self.dfparam
+
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this DP."""
+        return self.daparam
+
+    def eval(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: List[int],
+        atomic: bool = True,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+    ) -> np.ndarray:
+        """Evaluate the model.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            If True (default), return the atomic tensor
+            Otherwise return the global tensor
+        fparam
+            Not used in this model
+        aparam
+            Not used in this model
+        efield
+            Not used in this model
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
+
+        Returns
+        -------
+        tensor
+            The returned tensor
+            If atomic == False then of size nframes x output_dim
+            else of size nframes x natoms x output_dim
+        """
+        # standarize the shape of inputs
+        if mixed_type:
+            natoms = atom_types[0].size
+            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
+        else:
+            atom_types = np.array(atom_types, dtype=int).reshape([-1])
+            natoms = atom_types.size
+        coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        if cells is None:
+            pbc = False
+            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
+        else:
+            pbc = True
+            cells = np.array(cells).reshape([nframes, 9])
+
+        # sort inputs
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
+            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        )
+
+        # make natoms_vec and default_mesh
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                _,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
+
+        # evaluate
+        feed_dict_test = {}
+        feed_dict_test[self.t_natoms] = natoms_vec
+        if mixed_type:
+            feed_dict_test[self.t_type] = atom_types.reshape([-1])
+        else:
+            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
+                [-1]
+            )
+        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
+        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
+        feed_dict_test[self.t_mesh] = mesh
+
+        if atomic:
+            assert (
+                "global" not in self.model_type
+            ), f"cannot do atomic evaluation with model type {self.model_type}"
+            t_out = [self.t_tensor]
+        else:
+            assert (
+                self._support_gfv or "global" in self.model_type
+            ), f"do not support global tensor evaluation with old {self.model_type} model"
+            t_out = [self.t_global_tensor if self._support_gfv else self.t_tensor]
+        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
+        tensor = v_out[0]
+
+        # reverse map of the outputs
+        if atomic:
+            tensor = np.array(tensor)
+            tensor = self.reverse_map(
+                np.reshape(tensor, [nframes, -1, self.output_dim]), sel_imap
+            )
+            tensor = np.reshape(tensor, [nframes, len(sel_at), self.output_dim])
+        else:
+            tensor = np.reshape(tensor, [nframes, self.output_dim])
+
+        return tensor
+
+    def eval_full(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: List[int],
+        atomic: bool = False,
+        fparam: Optional[np.array] = None,
+        aparam: Optional[np.array] = None,
+        efield: Optional[np.array] = None,
+        mixed_type: bool = False,
+    ) -> Tuple[np.ndarray, ...]:
+        """Evaluate the model with interface similar to the energy model.
+        Will return global tensor, component-wise force and virial
+        and optionally atomic tensor and atomic virial.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        atomic
+            Whether to calculate atomic tensor and virial
+        fparam
+            Not used in this model
+        aparam
+            Not used in this model
+        efield
+            Not used in this model
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
+
+        Returns
+        -------
+        tensor
+            The global tensor.
+            shape: [nframes x nout]
+        force
+            The component-wise force (negative derivative) on each atom.
+            shape: [nframes x nout x natoms x 3]
+        virial
+            The component-wise virial of the tensor.
+            shape: [nframes x nout x 9]
+        atom_tensor
+            The atomic tensor. Only returned when atomic == True
+            shape: [nframes x natoms x nout]
+        atom_virial
+            The atomic virial. Only returned when atomic == True
+            shape: [nframes x nout x natoms x 9]
+        """
+        assert self._support_gfv, "do not support eval_full with old tensor model"
+
+        # standarize the shape of inputs
+        if mixed_type:
+            natoms = atom_types[0].size
+            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
+        else:
+            atom_types = np.array(atom_types, dtype=int).reshape([-1])
+            natoms = atom_types.size
+        coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        nframes = coords.shape[0]
+        if cells is None:
+            pbc = False
+            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
+        else:
+            pbc = True
+            cells = np.array(cells).reshape([nframes, 9])
+        nout = self.output_dim
+
+        # sort inputs
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
+            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        )
+
+        # make natoms_vec and default_mesh
+        if self.neighbor_list is None:
+            natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
+            assert natoms_vec[0] == natoms
+            mesh = make_default_mesh(pbc, mixed_type)
+            ghost_map = None
+        else:
+            if nframes > 1:
+                raise NotImplementedError(
+                    "neighbor_list does not support multiple frames"
+                )
+            (
+                natoms_vec,
+                coords,
+                atom_types,
+                mesh,
+                imap,
+                ghost_map,
+            ) = self.build_neighbor_list(
+                coords,
+                cells if cells is not None else None,
+                atom_types,
+                imap,
+                self.neighbor_list,
+            )
+
+        # evaluate
+        feed_dict_test = {}
+        feed_dict_test[self.t_natoms] = natoms_vec
+        if mixed_type:
+            feed_dict_test[self.t_type] = atom_types.reshape([-1])
+        else:
+            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
+                [-1]
+            )
+        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
+        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
+        feed_dict_test[self.t_mesh] = mesh
+
+        t_out = [self.t_global_tensor, self.t_force, self.t_virial]
+        if atomic:
+            t_out += [self.t_tensor, self.t_atom_virial]
+
+        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
+        gt = v_out[0]  # global tensor
+        force = v_out[1]
+        virial = v_out[2]
+        if atomic:
+            at = v_out[3]  # atom tensor
+            av = v_out[4]  # atom virial
+
+        nloc = natoms_vec[0]
+        nall = natoms_vec[1]
+
+        if ghost_map is not None:
+            # add the value of ghost atoms to real atoms
+            force = np.reshape(force, [nframes * nout, -1, 3])
+            for ii in range(nframes * nout):
+                np.add.at(force[ii], ghost_map, force[ii, nloc:])
+            if atomic:
+                av = np.reshape(av, [nframes * nout, -1, 9])
+                for ii in range(nframes * nout):
+                    np.add.at(av[ii], ghost_map, av[ii, nloc:])
+
+        # please note here the shape are wrong!
+        force = self.reverse_map(np.reshape(force, [nframes * nout, nall, 3]), imap)
+        if atomic:
+            at = self.reverse_map(
+                np.reshape(at, [nframes, len(sel_at), nout]), sel_imap
+            )
+            av = self.reverse_map(np.reshape(av, [nframes * nout, nall, 9]), imap)
+
+        # make sure the shapes are correct here
+        gt = np.reshape(gt, [nframes, nout])
+        force = np.reshape(force, [nframes, nout, nall, 3])
+        if nloc < nall:
+            force = force[:, :, :nloc, :]
+        virial = np.reshape(virial, [nframes, nout, 9])
+        if atomic:
+            at = np.reshape(at, [nframes, len(sel_at), self.output_dim])
+            av = np.reshape(av, [nframes, nout, nall, 9])
+            if nloc < nall:
+                av = av[:, :, :nloc, :]
+            return gt, force, virial, at, av
+        else:
+            return gt, force, virial
diff --git a/deepmd/tf/infer/deep_wfc.py b/deepmd/tf/infer/deep_wfc.py
new file mode 100644
index 0000000000..f7674bdde7
--- /dev/null
+++ b/deepmd/tf/infer/deep_wfc.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
+
+__all__ = [
+    "DeepWFC",
+]
diff --git a/deepmd/infer/ewald_recp.py b/deepmd/tf/infer/ewald_recp.py
similarity index 97%
rename from deepmd/infer/ewald_recp.py
rename to deepmd/tf/infer/ewald_recp.py
index 429a3cdfd6..110188c34f 100644
--- a/deepmd/infer/ewald_recp.py
+++ b/deepmd/tf/infer/ewald_recp.py
@@ -5,13 +5,13 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     default_tf_session_config,
     op_module,
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/tf/infer/model_devi.py b/deepmd/tf/infer/model_devi.py
new file mode 100644
index 0000000000..4ee979ac67
--- /dev/null
+++ b/deepmd/tf/infer/model_devi.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.infer.model_devi import (
+    calc_model_devi,
+    calc_model_devi_e,
+    calc_model_devi_f,
+    calc_model_devi_v,
+    make_model_devi,
+    write_model_devi_out,
+)
+
+__all__ = [
+    "make_model_devi",
+    "calc_model_devi",
+    "write_model_devi_out",
+    "calc_model_devi_e",
+    "calc_model_devi_f",
+    "calc_model_devi_v",
+]
diff --git a/deepmd/lmp.py b/deepmd/tf/lmp.py
similarity index 99%
rename from deepmd/lmp.py
rename to deepmd/tf/lmp.py
index 5238cd9935..b2e47308ed 100644
--- a/deepmd/lmp.py
+++ b/deepmd/tf/lmp.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Register entry points for lammps-wheel."""
+
 import os
 import platform
 from importlib import (
@@ -17,7 +18,7 @@
     Version,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     SHARED_LIB_DIR,
     TF_VERSION,
     tf,
diff --git a/deepmd_utils/loggers/__init__.py b/deepmd/tf/loggers/__init__.py
similarity index 51%
rename from deepmd_utils/loggers/__init__.py
rename to deepmd/tf/loggers/__init__.py
index 39aa76139d..d9227d3620 100644
--- a/deepmd_utils/loggers/__init__.py
+++ b/deepmd/tf/loggers/__init__.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module taking care of logging duties."""
+"""Alias of deepmd.loggers for backward compatibility."""
 
-from .loggers import (
+from deepmd.loggers.loggers import (
     set_log_handles,
 )
 
diff --git a/deepmd/tf/loggers/loggers.py b/deepmd/tf/loggers/loggers.py
new file mode 100644
index 0000000000..be948c9858
--- /dev/null
+++ b/deepmd/tf/loggers/loggers.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias of deepmd.loggers.loggers for backward compatibility."""
+
+from deepmd.loggers.loggers import (
+    set_log_handles,
+)
+
+__all__ = ["set_log_handles"]
diff --git a/deepmd/loss/__init__.py b/deepmd/tf/loss/__init__.py
similarity index 100%
rename from deepmd/loss/__init__.py
rename to deepmd/tf/loss/__init__.py
diff --git a/deepmd/loss/dos.py b/deepmd/tf/loss/dos.py
similarity index 98%
rename from deepmd/loss/dos.py
rename to deepmd/tf/loss/dos.py
index 7d38f2b17a..763e75638f 100644
--- a/deepmd/loss/dos.py
+++ b/deepmd/tf/loss/dos.py
@@ -1,15 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     global_cvt_2_ener_float,
     global_cvt_2_tf_float,
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/loss/ener.py b/deepmd/tf/loss/ener.py
similarity index 99%
rename from deepmd/loss/ener.py
rename to deepmd/tf/loss/ener.py
index d7f83f09e5..baa4aa3e02 100644
--- a/deepmd/loss/ener.py
+++ b/deepmd/tf/loss/ener.py
@@ -5,15 +5,15 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     global_cvt_2_ener_float,
     global_cvt_2_tf_float,
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
@@ -120,7 +120,6 @@ def __init__(
             "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3
         )
         # drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates
-        # TODO: could numb_generalized_coord decided from the training data?
         if self.has_gf > 0:
             add_data_requirement(
                 "drdq",
diff --git a/deepmd/loss/loss.py b/deepmd/tf/loss/loss.py
similarity index 98%
rename from deepmd/loss/loss.py
rename to deepmd/tf/loss/loss.py
index a719a08d81..327aea5230 100644
--- a/deepmd/loss/loss.py
+++ b/deepmd/tf/loss/loss.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
diff --git a/deepmd/loss/tensor.py b/deepmd/tf/loss/tensor.py
similarity index 98%
rename from deepmd/loss/tensor.py
rename to deepmd/tf/loss/tensor.py
index a40f95a18e..3be01d3871 100644
--- a/deepmd/loss/tensor.py
+++ b/deepmd/tf/loss/tensor.py
@@ -1,14 +1,14 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     global_cvt_2_tf_float,
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/model/__init__.py b/deepmd/tf/model/__init__.py
similarity index 54%
rename from deepmd/model/__init__.py
rename to deepmd/tf/model/__init__.py
index d366ca1441..1d100f2b09 100644
--- a/deepmd/model/__init__.py
+++ b/deepmd/tf/model/__init__.py
@@ -1,4 +1,17 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.tf.model.frozen import (
+    FrozenModel,
+)
+from deepmd.tf.model.linear import (
+    LinearEnergyModel,
+)
+from deepmd.tf.model.pairtab import (
+    PairTabModel,
+)
+from deepmd.tf.model.pairwise_dprc import (
+    PairwiseDPRc,
+)
+
 from .dos import (
     DOSModel,
 )
@@ -23,4 +36,8 @@
     "GlobalPolarModel",
     "PolarModel",
     "WFCModel",
+    "FrozenModel",
+    "LinearEnergyModel",
+    "PairTabModel",
+    "PairwiseDPRc",
 ]
diff --git a/deepmd/model/dos.py b/deepmd/tf/model/dos.py
similarity index 99%
rename from deepmd/model/dos.py
rename to deepmd/tf/model/dos.py
index 22e291a0f0..265026b60a 100644
--- a/deepmd/model/dos.py
+++ b/deepmd/tf/model/dos.py
@@ -5,12 +5,12 @@
     Union,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     MODEL_VERSION,
     global_cvt_2_ener_float,
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
diff --git a/deepmd/model/ener.py b/deepmd/tf/model/ener.py
similarity index 97%
rename from deepmd/model/ener.py
rename to deepmd/tf/model/ener.py
index 0d8d66b305..a493fe0517 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/tf/model/ener.py
@@ -7,19 +7,19 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     MODEL_VERSION,
     global_cvt_2_ener_float,
     op_module,
     tf,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
@@ -486,7 +486,7 @@ def change_energy_bias(
         frozen_model: str,
         origin_type_map: list,
         full_type_map: str,
-        bias_shift: str = "delta",
+        bias_adjust_mode: str = "change-by-statistic",
     ) -> None:
         """Change the energy bias according to the input data and the pretrained model.
 
@@ -500,17 +500,17 @@ def change_energy_bias(
             The original type_map in dataset, they are targets to change the energy bias.
         full_type_map : str
             The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         self.fitting.change_energy_bias(
             data,
             frozen_model,
             origin_type_map,
             full_type_map,
-            bias_shift,
+            bias_adjust_mode,
             self.data_bias_nsample,
         )
diff --git a/deepmd/model/frozen.py b/deepmd/tf/model/frozen.py
similarity index 68%
rename from deepmd/model/frozen.py
rename to deepmd/tf/model/frozen.py
index 38f342ebec..86676bfe0b 100644
--- a/deepmd/model/frozen.py
+++ b/deepmd/tf/model/frozen.py
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import tempfile
 from enum import (
     Enum,
 )
@@ -7,26 +10,37 @@
     Union,
 )
 
-from deepmd.env import (
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     MODEL_VERSION,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPotential,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
+from deepmd.tf.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
 
 from .model import (
     Model,
 )
 
 
+@Model.register("frozen")
 class FrozenModel(Model):
     """Load model from a frozen model, which cannot be trained.
 
@@ -39,8 +53,20 @@ class FrozenModel(Model):
     def __init__(self, model_file: str, **kwargs):
         super().__init__(**kwargs)
         self.model_file = model_file
-        self.model = DeepPotential(model_file)
-        self.model_type = self.model.model_type
+        if not model_file.endswith(".pb"):
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(
+                suffix=".pb", dir=os.curdir, delete=False
+            ) as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model_file = f.name
+        self.model = DeepPotential(self.model_file)
+        if isinstance(self.model, DeepPot):
+            self.model_type = "ener"
+        else:
+            raise NotImplementedError(
+                "This model type has not been implemented. " "Contribution is welcome!"
+            )
 
     def build(
         self,
@@ -122,14 +148,26 @@ def build(
             )
         if self.model_type == "ener":
             return {
-                "energy": tf.identity(self.model.t_energy, name="o_energy" + suffix),
-                "force": tf.identity(self.model.t_force, name="o_force" + suffix),
-                "virial": tf.identity(self.model.t_virial, name="o_virial" + suffix),
+                # must visit the backend class
+                "energy": tf.identity(
+                    self.model.deep_eval.output_tensors["energy_redu"],
+                    name="o_energy" + suffix,
+                ),
+                "force": tf.identity(
+                    self.model.deep_eval.output_tensors["energy_derv_r"],
+                    name="o_force" + suffix,
+                ),
+                "virial": tf.identity(
+                    self.model.deep_eval.output_tensors["energy_derv_c_redu"],
+                    name="o_virial" + suffix,
+                ),
                 "atom_ener": tf.identity(
-                    self.model.t_ae, name="o_atom_energy" + suffix
+                    self.model.deep_eval.output_tensors["energy"],
+                    name="o_atom_energy" + suffix,
                 ),
                 "atom_virial": tf.identity(
-                    self.model.t_av, name="o_atom_virial" + suffix
+                    self.model.deep_eval.output_tensors["energy_derv_c"],
+                    name="o_atom_virial" + suffix,
                 ),
                 "coord": coord_,
                 "atype": atype_,
@@ -207,3 +245,19 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """
         # we don't know how to compress it, so no neighbor statistics here
         return local_jdata
+
+    def serialize(self, suffix: str = "") -> dict:
+        # try to recover the original model
+        # the current graph contains a prefix "load",
+        # so it cannot used to recover the original model
+        graph, graph_def = load_graph_def(self.model_file)
+        t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+        jdata = json.loads(t_jdata)
+        model = Model(**jdata["model"])
+        # important! must be called before serialize
+        model.init_variables(graph=graph, graph_def=graph_def)
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        raise RuntimeError("Should not touch here.")
diff --git a/deepmd/model/linear.py b/deepmd/tf/model/linear.py
similarity index 98%
rename from deepmd/model/linear.py
rename to deepmd/tf/model/linear.py
index 7c527fe9dc..ae1b0b5c78 100644
--- a/deepmd/model/linear.py
+++ b/deepmd/tf/model/linear.py
@@ -11,15 +11,15 @@
     Union,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     MODEL_VERSION,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
 
@@ -54,7 +54,6 @@ def __init__(self, models: List[dict], weights: List[float], **kwargs):
             self.weights = [1 / len(models) for _ in range(len(models))]
         elif weights == "sum":
             self.weights = [1 for _ in range(len(models))]
-        # TODO: add more weights, for example, so-called committee models
         else:
             raise ValueError(f"Invalid weights {weights}")
 
@@ -147,6 +146,7 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         return local_jdata_cpy
 
 
+@Model.register("linear_ener")
 class LinearEnergyModel(LinearModel):
     """Linear energy model make linear combinations of several existing energy models."""
 
diff --git a/deepmd/model/model.py b/deepmd/tf/model/model.py
similarity index 78%
rename from deepmd/model/model.py
rename to deepmd/tf/model/model.py
index 6117b4942d..76bcc6072b 100644
--- a/deepmd/model/model.py
+++ b/deepmd/tf/model/model.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 from abc import (
     ABC,
     abstractmethod,
@@ -13,40 +14,61 @@
     Union,
 )
 
-from deepmd.descriptor.descriptor import (
+from deepmd.common import (
+    j_get_type,
+)
+from deepmd.tf.descriptor.descriptor import (
     Descriptor,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.dipole import (
+    DipoleFittingSeA,
+)
+from deepmd.tf.fit.dos import (
+    DOSFitting,
+)
+from deepmd.tf.fit.ener import (
+    EnerFitting,
+)
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.fit.polar import (
+    PolarFittingSeA,
+)
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     type_embedding_args,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     load_graph_def,
 )
-from deepmd.utils.pair_tab import (
+from deepmd.tf.utils.pair_tab import (
     PairTab,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
+from deepmd.utils.plugin import (
+    make_plugin_registry,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
-class Model(ABC):
+class Model(ABC, make_plugin_registry("model")):
     """Abstract base model.
 
     Parameters
@@ -78,52 +100,10 @@ class Model(ABC):
         Compression information for internal use
     """
 
-    @classmethod
-    def get_class_by_input(cls, input: dict):
-        """Get the class by input data.
-
-        Parameters
-        ----------
-        input : dict
-            The input data
-        """
-        # infer model type by fitting_type
-        from deepmd.model.frozen import (
-            FrozenModel,
-        )
-        from deepmd.model.linear import (
-            LinearEnergyModel,
-        )
-        from deepmd.model.multi import (
-            MultiModel,
-        )
-        from deepmd.model.pairtab import (
-            PairTabModel,
-        )
-        from deepmd.model.pairwise_dprc import (
-            PairwiseDPRc,
-        )
-
-        model_type = input.get("type", "standard")
-        if model_type == "standard":
-            return StandardModel
-        elif model_type == "multi":
-            return MultiModel
-        elif model_type == "pairwise_dprc":
-            return PairwiseDPRc
-        elif model_type == "frozen":
-            return FrozenModel
-        elif model_type == "linear_ener":
-            return LinearEnergyModel
-        elif model_type == "pairtab":
-            return PairTabModel
-        else:
-            raise ValueError(f"unknown model type: {model_type}")
-
     def __new__(cls, *args, **kwargs):
         if cls is Model:
             # init model
-            cls = cls.get_class_by_input(kwargs)
+            cls = cls.get_class_by_type(kwargs.get("type", "standard"))
             return cls.__new__(cls, *args, **kwargs)
         return super().__new__(cls)
 
@@ -428,7 +408,7 @@ def change_energy_bias(
         frozen_model: str,
         origin_type_map: list,
         full_type_map: str,
-        bias_shift: str = "delta",
+        bias_adjust_mode: str = "change-by-statistic",
     ) -> None:
         """Change the energy bias according to the input data and the pretrained model.
 
@@ -442,11 +422,11 @@ def change_energy_bias(
             The original type_map in dataset, they are targets to change the energy bias.
         full_type_map : str
             The full type_map in pretrained model
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         raise RuntimeError("Not supported")
 
@@ -515,7 +495,7 @@ def get_feed_dict(
             natoms[1]: total number of atoms held by this processor
             natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         box : tf.Tensor
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         mesh : tf.Tensor
             For historical reasons, only the length of the Tensor matters.
             if size of mesh == 6, pbc is assumed.
@@ -562,10 +542,52 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict:
         dict
             The updated local data
         """
-        cls = cls.get_class_by_input(local_jdata)
+        cls = cls.get_class_by_type(local_jdata.get("type", "standard"))
         return cls.update_sel(global_jdata, local_jdata)
 
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = "") -> "Model":
+        """Deserialize the model.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this model
+
+        Returns
+        -------
+        Model
+            The deserialized Model
+        """
+        if cls is Model:
+            return Model.get_class_by_type(data.get("type", "standard")).deserialize(
+                data,
+                suffix=suffix,
+            )
+        raise NotImplementedError("Not implemented in class %s" % cls.__name__)
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
 
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this descriptor
+        """
+        raise NotImplementedError("Not implemented in class %s" % self.__name__)
+
+
+@Model.register("standard")
 class StandardModel(Model):
     """Standard model, which must contain a descriptor and a fitting.
 
@@ -594,16 +616,23 @@ def __new__(cls, *args, **kwargs):
         )
 
         if cls is StandardModel:
-            fitting_type = kwargs["fitting_net"]["type"]
+            if isinstance(kwargs["fitting_net"], dict):
+                fitting_type = Fitting.get_class_by_type(
+                    j_get_type(kwargs["fitting_net"], cls.__name__)
+                )
+            elif isinstance(kwargs["fitting_net"], Fitting):
+                fitting_type = type(kwargs["fitting_net"])
+            else:
+                raise RuntimeError("get unknown fitting type when building model")
             # init model
             # infer model type by fitting_type
-            if fitting_type == "ener":
+            if issubclass(fitting_type, EnerFitting):
                 cls = EnerModel
-            elif fitting_type == "dos":
+            elif issubclass(fitting_type, DOSFitting):
                 cls = DOSModel
-            elif fitting_type == "dipole":
+            elif issubclass(fitting_type, DipoleFittingSeA):
                 cls = DipoleModel
-            elif fitting_type == "polar":
+            elif issubclass(fitting_type, PolarFittingSeA):
                 cls = PolarModel
             else:
                 raise RuntimeError("get unknown fitting type when building model")
@@ -631,7 +660,16 @@ def __init__(
         if isinstance(fitting_net, Fitting):
             self.fitting = fitting_net
         else:
-            self.fitting = Fitting(**fitting_net, descrpt=self.descrpt, spin=self.spin)
+            if fitting_net["type"] in ["dipole", "polar"]:
+                fitting_net["embedding_width"] = self.descrpt.get_dim_rot_mat_1()
+            self.fitting = Fitting(
+                **fitting_net,
+                descrpt=self.descrpt,
+                spin=self.spin,
+                ntypes=self.descrpt.get_ntypes(),
+                dim_descrpt=self.descrpt.get_dim_out(),
+                mixed_types=type_embedding is not None or self.descrpt.explicit_ntypes,
+            )
         self.rcut = self.descrpt.get_rcut()
         self.ntypes = self.descrpt.get_ntypes()
 
@@ -640,6 +678,7 @@ def __init__(
             self.typeebd = type_embedding
         elif type_embedding is not None:
             self.typeebd = TypeEmbedNet(
+                ntypes=self.ntypes,
                 **type_embedding,
                 padding=self.descrpt.explicit_ntypes,
             )
@@ -648,6 +687,7 @@ def __init__(
             default_args_dict = {i.name: i.default for i in default_args}
             default_args_dict["activation_function"] = None
             self.typeebd = TypeEmbedNet(
+                ntypes=self.ntypes,
                 **default_args_dict,
                 padding=True,
             )
@@ -724,3 +764,63 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
             global_jdata, local_jdata["descriptor"]
         )
         return local_jdata_cpy
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor":
+        """Deserialize the model.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this descriptor
+
+        Returns
+        -------
+        Descriptor
+            The deserialized descriptor
+        """
+        data = copy.deepcopy(data)
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        descriptor = Descriptor.deserialize(data.pop("descriptor"), suffix=suffix)
+        fitting = Fitting.deserialize(data.pop("fitting"), suffix=suffix)
+        data.pop("atom_exclude_types")
+        data.pop("pair_exclude_types")
+        return cls(
+            descriptor=descriptor,
+            fitting_net=fitting,
+            **data,
+        )
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        There is no suffix in a native DP model, but it is important
+        for the TF backend.
+
+        Returns
+        -------
+        dict
+            The serialized data
+        suffix : str, optional
+            Name suffix to identify this descriptor
+        """
+        if self.typeebd is not None:
+            raise NotImplementedError("type embedding is not supported")
+        if self.spin is not None:
+            raise NotImplementedError("spin is not supported")
+        return {
+            "@class": "Model",
+            "type": "standard",
+            "@version": 1,
+            "type_map": self.type_map,
+            "descriptor": self.descrpt.serialize(suffix=suffix),
+            "fitting": self.fitting.serialize(suffix=suffix),
+            # not supported yet
+            "atom_exclude_types": [],
+            "pair_exclude_types": [],
+        }
diff --git a/deepmd/model/model_stat.py b/deepmd/tf/model/model_stat.py
similarity index 85%
rename from deepmd/model/model_stat.py
rename to deepmd/tf/model/model_stat.py
index 933a634ce8..db70262d50 100644
--- a/deepmd/model/model_stat.py
+++ b/deepmd/tf/model/model_stat.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
-from deepmd_utils.utils.model_stat import (
+
+from deepmd.utils.model_stat import (
     _make_all_stat_ref,
     make_stat_input,
     merge_sys_stat,
diff --git a/deepmd/model/multi.py b/deepmd/tf/model/multi.py
similarity index 96%
rename from deepmd/model/multi.py
rename to deepmd/tf/model/multi.py
index 83b231c0e8..e49ad47ee3 100644
--- a/deepmd/model/multi.py
+++ b/deepmd/tf/model/multi.py
@@ -8,41 +8,41 @@
 
 import numpy as np
 
-from deepmd.descriptor.descriptor import (
+from deepmd.tf.descriptor.descriptor import (
     Descriptor,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     MODEL_VERSION,
     global_cvt_2_ener_float,
     op_module,
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DipoleFittingSeA,
     DOSFitting,
     EnerFitting,
     GlobalPolarFittingSeA,
     PolarFittingSeA,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     type_embedding_args,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.pair_tab import (
+from deepmd.tf.utils.pair_tab import (
     PairTab,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
@@ -55,6 +55,7 @@
 )
 
 
+@Model.register("multi")
 class MultiModel(Model):
     """Multi-task model.
 
@@ -133,15 +134,25 @@ def __init__(
             if isinstance(item_fitting_param, Fitting):
                 fitting_dict[item] = item_fitting_param
             else:
+                if item_fitting_param["type"] in ["dipole", "polar"]:
+                    item_fitting_param["embedding_width"] = (
+                        self.descrpt.get_dim_rot_mat_1()
+                    )
                 fitting_dict[item] = Fitting(
-                    **item_fitting_param, descrpt=self.descrpt, spin=self.spin
+                    **item_fitting_param,
+                    descrpt=self.descrpt,
+                    spin=self.spin,
+                    ntypes=self.descrpt.get_ntypes(),
+                    dim_descrpt=self.descrpt.get_dim_out(),
                 )
 
+        self.ntypes = self.descrpt.get_ntypes()
         # type embedding
         if type_embedding is not None and isinstance(type_embedding, TypeEmbedNet):
             self.typeebd = type_embedding
         elif type_embedding is not None:
             self.typeebd = TypeEmbedNet(
+                ntypes=self.ntypes,
                 **type_embedding,
                 padding=self.descrpt.explicit_ntypes,
             )
@@ -150,6 +161,7 @@ def __init__(
             default_args_dict = {i.name: i.default for i in default_args}
             default_args_dict["activation_function"] = None
             self.typeebd = TypeEmbedNet(
+                ntypes=self.ntypes,
                 **default_args_dict,
                 padding=True,
             )
@@ -158,7 +170,6 @@ def __init__(
 
         # descriptor
         self.rcut = self.descrpt.get_rcut()
-        self.ntypes = self.descrpt.get_ntypes()
         # fitting
         self.fitting_dict = fitting_dict
         self.numb_fparam_dict = {
diff --git a/deepmd/model/pairtab.py b/deepmd/tf/model/pairtab.py
similarity index 96%
rename from deepmd/model/pairtab.py
rename to deepmd/tf/model/pairtab.py
index 38934818e6..3cc1114f81 100644
--- a/deepmd/model/pairtab.py
+++ b/deepmd/tf/model/pairtab.py
@@ -10,27 +10,31 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     MODEL_VERSION,
     global_cvt_2_ener_float,
     op_module,
     tf,
 )
-from deepmd.fit.fitting import (
+from deepmd.tf.fit.fitting import (
     Fitting,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
-from deepmd.utils.pair_tab import (
+from deepmd.tf.utils.pair_tab import (
     PairTab,
 )
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
+)
 
 
+@Model.register("pairtab")
 class PairTabModel(Model):
     """Pairwise tabulation energy model.
 
@@ -280,9 +284,5 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict:
         dict
             The updated local data
         """
-        from deepmd.entrypoints.train import (
-            update_one_sel,
-        )
-
         local_jdata_cpy = local_jdata.copy()
-        return update_one_sel(global_jdata, local_jdata_cpy, True)
+        return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True)
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/tf/model/pairwise_dprc.py
similarity index 96%
rename from deepmd/model/pairwise_dprc.py
rename to deepmd/tf/model/pairwise_dprc.py
index f74571febb..92e943d486 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/tf/model/pairwise_dprc.py
@@ -6,33 +6,37 @@
     Union,
 )
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
     make_default_mesh,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     MODEL_VERSION,
     op_module,
     tf,
 )
-from deepmd.loss.loss import (
+from deepmd.tf.loss.loss import (
     Loss,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     load_graph_def,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
+)
 
 
+@Model.register("pairwise_dprc")
 class PairwiseDPRc(Model):
     """Pairwise Deep Potential - Range Correction."""
 
@@ -73,11 +77,13 @@ def __init__(
             compress=compress,
             **kwargs,
         )
+        self.ntypes = len(type_map)
         # type embedding
         if isinstance(type_embedding, TypeEmbedNet):
             self.typeebd = type_embedding
         else:
             self.typeebd = TypeEmbedNet(
+                ntypes=self.ntypes,
                 **type_embedding,
                 # must use se_atten, so it must be True
                 padding=True,
@@ -96,7 +102,6 @@ def __init__(
             compress=compress,
         )
         add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False)
-        self.ntypes = len(type_map)
         self.rcut = max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut())
 
     def build(
@@ -377,7 +382,7 @@ def get_feed_dict(
             natoms[1]: total number of atoms held by this processor
             natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         box : tf.Tensor
-            The box. Can be generated by deepmd.model.make_stat_input
+            The box. Can be generated by deepmd.tf.model.make_stat_input
         mesh : tf.Tensor
             For historical reasons, only the length of the Tensor matters.
             if size of mesh == 6, pbc is assumed.
@@ -412,13 +417,9 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         local_jdata : dict
             The local data refer to the current class
         """
-        from deepmd.entrypoints.train import (
-            get_min_nbor_dist,
-        )
-
         # do not update sel; only find min distance
         # rcut is not important here
-        get_min_nbor_dist(global_jdata, 6.0)
+        UpdateSel().get_min_nbor_dist(global_jdata, 6.0)
         return local_jdata
 
 
diff --git a/deepmd/model/tensor.py b/deepmd/tf/model/tensor.py
similarity index 98%
rename from deepmd/model/tensor.py
rename to deepmd/tf/model/tensor.py
index 6a21e085f3..b232f40b13 100644
--- a/deepmd/model/tensor.py
+++ b/deepmd/tf/model/tensor.py
@@ -5,11 +5,11 @@
     Union,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     MODEL_VERSION,
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
@@ -86,7 +86,7 @@ def data_stat(self, data):
         all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False)
         m_all_stat = merge_sys_stat(all_stat)
         self._compute_input_stat(m_all_stat, protection=self.data_stat_protect)
-        self._compute_output_stat(all_stat)
+        self._compute_output_stat(m_all_stat)
 
     def _compute_input_stat(self, all_stat, protection=1e-2):
         self.descrpt.compute_input_stats(
diff --git a/deepmd/nvnmd/__init__.py b/deepmd/tf/nvnmd/__init__.py
similarity index 100%
rename from deepmd/nvnmd/__init__.py
rename to deepmd/tf/nvnmd/__init__.py
diff --git a/deepmd/nvnmd/data/__init__.py b/deepmd/tf/nvnmd/data/__init__.py
similarity index 100%
rename from deepmd/nvnmd/data/__init__.py
rename to deepmd/tf/nvnmd/data/__init__.py
diff --git a/deepmd/nvnmd/data/data.py b/deepmd/tf/nvnmd/data/data.py
similarity index 100%
rename from deepmd/nvnmd/data/data.py
rename to deepmd/tf/nvnmd/data/data.py
diff --git a/deepmd/nvnmd/descriptor/__init__.py b/deepmd/tf/nvnmd/descriptor/__init__.py
similarity index 100%
rename from deepmd/nvnmd/descriptor/__init__.py
rename to deepmd/tf/nvnmd/descriptor/__init__.py
diff --git a/deepmd/nvnmd/descriptor/se_a.py b/deepmd/tf/nvnmd/descriptor/se_a.py
similarity index 98%
rename from deepmd/nvnmd/descriptor/se_a.py
rename to deepmd/tf/nvnmd/descriptor/se_a.py
index 816f17cfa3..cc90df7a5c 100644
--- a/deepmd/nvnmd/descriptor/se_a.py
+++ b/deepmd/tf/nvnmd/descriptor/se_a.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
@@ -11,16 +11,16 @@
 )
 
 #
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.weight import (
+from deepmd.tf.nvnmd.utils.weight import (
     get_normalize,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
 )
 
diff --git a/deepmd/nvnmd/descriptor/se_atten.py b/deepmd/tf/nvnmd/descriptor/se_atten.py
similarity index 98%
rename from deepmd/nvnmd/descriptor/se_atten.py
rename to deepmd/tf/nvnmd/descriptor/se_atten.py
index cfffb8a90b..474f6995cf 100644
--- a/deepmd/nvnmd/descriptor/se_atten.py
+++ b/deepmd/tf/nvnmd/descriptor/se_atten.py
@@ -3,20 +3,20 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
 #
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.weight import (
+from deepmd.tf.nvnmd.utils.weight import (
     get_normalize,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
 
diff --git a/deepmd/nvnmd/entrypoints/__init__.py b/deepmd/tf/nvnmd/entrypoints/__init__.py
similarity index 100%
rename from deepmd/nvnmd/entrypoints/__init__.py
rename to deepmd/tf/nvnmd/entrypoints/__init__.py
diff --git a/deepmd/nvnmd/entrypoints/freeze.py b/deepmd/tf/nvnmd/entrypoints/freeze.py
similarity index 96%
rename from deepmd/nvnmd/entrypoints/freeze.py
rename to deepmd/tf/nvnmd/entrypoints/freeze.py
index e56a0c2130..2a2b8d9179 100644
--- a/deepmd/nvnmd/entrypoints/freeze.py
+++ b/deepmd/tf/nvnmd/entrypoints/freeze.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioDic,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
 )
 
diff --git a/deepmd/nvnmd/entrypoints/mapt.py b/deepmd/tf/nvnmd/entrypoints/mapt.py
similarity index 98%
rename from deepmd/nvnmd/entrypoints/mapt.py
rename to deepmd/tf/nvnmd/entrypoints/mapt.py
index 1299d7a74e..7401234e35 100644
--- a/deepmd/nvnmd/entrypoints/mapt.py
+++ b/deepmd/tf/nvnmd/entrypoints/mapt.py
@@ -6,30 +6,30 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     jdata_deepmd_input_v0,
     jdata_sys,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioDic,
 )
-from deepmd.nvnmd.utils.network import (
+from deepmd.tf.nvnmd.utils.network import (
     get_sess,
 )
-from deepmd.nvnmd.utils.weight import (
+from deepmd.tf.nvnmd.utils.weight import (
     get_filter_type_weight,
     get_filter_weight,
     get_normalize,
     get_type_embedding_weight,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/nvnmd/entrypoints/train.py b/deepmd/tf/nvnmd/entrypoints/train.py
similarity index 94%
rename from deepmd/nvnmd/entrypoints/train.py
rename to deepmd/tf/nvnmd/entrypoints/train.py
index 6e14b6f865..18c644a7f6 100644
--- a/deepmd/nvnmd/entrypoints/train.py
+++ b/deepmd/tf/nvnmd/entrypoints/train.py
@@ -5,28 +5,28 @@
     Optional,
 )
 
-from deepmd.entrypoints.freeze import (
+from deepmd.tf.entrypoints.freeze import (
     freeze,
 )
-from deepmd.entrypoints.train import (
+from deepmd.tf.entrypoints.train import (
     train,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     jdata_deepmd_input_v0,
 )
-from deepmd.nvnmd.entrypoints.mapt import (
+from deepmd.tf.nvnmd.entrypoints.mapt import (
     mapt,
 )
-from deepmd.nvnmd.entrypoints.wrap import (
+from deepmd.tf.nvnmd.entrypoints.wrap import (
     wrap,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioDic,
 )
 
diff --git a/deepmd/nvnmd/entrypoints/wrap.py b/deepmd/tf/nvnmd/entrypoints/wrap.py
similarity index 98%
rename from deepmd/nvnmd/entrypoints/wrap.py
rename to deepmd/tf/nvnmd/entrypoints/wrap.py
index 1ba2ed7384..f2be8352e2 100644
--- a/deepmd/nvnmd/entrypoints/wrap.py
+++ b/deepmd/tf/nvnmd/entrypoints/wrap.py
@@ -6,32 +6,32 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     jdata_deepmd_input_v0,
     jdata_sys,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.encode import (
+from deepmd.tf.nvnmd.utils.encode import (
     Encode,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioBin,
     FioTxt,
 )
-from deepmd.nvnmd.utils.network import (
+from deepmd.tf.nvnmd.utils.network import (
     get_sess,
 )
-from deepmd.nvnmd.utils.weight import (
+from deepmd.tf.nvnmd.utils.weight import (
     get_fitnet_weight,
     get_type_weight,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
diff --git a/deepmd/nvnmd/fit/__init__.py b/deepmd/tf/nvnmd/fit/__init__.py
similarity index 100%
rename from deepmd/nvnmd/fit/__init__.py
rename to deepmd/tf/nvnmd/fit/__init__.py
diff --git a/deepmd/nvnmd/fit/ener.py b/deepmd/tf/nvnmd/fit/ener.py
similarity index 58%
rename from deepmd/nvnmd/fit/ener.py
rename to deepmd/tf/nvnmd/fit/ener.py
index 1f316a2145..20adda395c 100644
--- a/deepmd/nvnmd/fit/ener.py
+++ b/deepmd/tf/nvnmd/fit/ener.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.network import one_layer as one_layer_nvnmd
+from deepmd.tf.nvnmd.utils.network import one_layer as one_layer_nvnmd
 
 __all__ = [
     "GLOBAL_TF_FLOAT_PRECISION",
diff --git a/deepmd/nvnmd/utils/__init__.py b/deepmd/tf/nvnmd/utils/__init__.py
similarity index 100%
rename from deepmd/nvnmd/utils/__init__.py
rename to deepmd/tf/nvnmd/utils/__init__.py
diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/tf/nvnmd/utils/argcheck.py
similarity index 73%
rename from deepmd/nvnmd/utils/argcheck.py
rename to deepmd/tf/nvnmd/utils/argcheck.py
index 2b9362efb0..1f10a1c03e 100644
--- a/deepmd/nvnmd/utils/argcheck.py
+++ b/deepmd/tf/nvnmd/utils/argcheck.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Alias for backward compatibility."""
-from deepmd_utils.utils.argcheck_nvnmd import (
+
+from deepmd.utils.argcheck_nvnmd import (
     nvnmd_args,
 )
 
diff --git a/deepmd/nvnmd/utils/config.py b/deepmd/tf/nvnmd/utils/config.py
similarity index 99%
rename from deepmd/nvnmd/utils/config.py
rename to deepmd/tf/nvnmd/utils/config.py
index 5bfd9ea54f..15998069b3 100644
--- a/deepmd/nvnmd/utils/config.py
+++ b/deepmd/tf/nvnmd/utils/config.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     NVNMD_CITATION,
     NVNMD_WELCOME,
     jdata_config_v0,
@@ -17,10 +17,10 @@
     jdata_deepmd_input_v1_ni128,
     jdata_deepmd_input_v1_ni256,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioDic,
 )
-from deepmd.nvnmd.utils.op import (
+from deepmd.tf.nvnmd.utils.op import (
     r2s,
 )
 
diff --git a/deepmd/nvnmd/utils/encode.py b/deepmd/tf/nvnmd/utils/encode.py
similarity index 99%
rename from deepmd/nvnmd/utils/encode.py
rename to deepmd/tf/nvnmd/utils/encode.py
index 55f4efd52e..21398fbf23 100644
--- a/deepmd/nvnmd/utils/encode.py
+++ b/deepmd/tf/nvnmd/utils/encode.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     jdata_sys,
 )
 
diff --git a/deepmd/nvnmd/utils/fio.py b/deepmd/tf/nvnmd/utils/fio.py
similarity index 100%
rename from deepmd/nvnmd/utils/fio.py
rename to deepmd/tf/nvnmd/utils/fio.py
diff --git a/deepmd/nvnmd/utils/network.py b/deepmd/tf/nvnmd/utils/network.py
similarity index 98%
rename from deepmd/nvnmd/utils/network.py
rename to deepmd/tf/nvnmd/utils/network.py
index f0c357eabe..76c80ed4e7 100644
--- a/deepmd/nvnmd/utils/network.py
+++ b/deepmd/tf/nvnmd/utils/network.py
@@ -3,18 +3,18 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.weight import (
+from deepmd.tf.nvnmd.utils.weight import (
     get_constant_initializer,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     variable_summaries,
 )
 
diff --git a/deepmd/nvnmd/utils/op.py b/deepmd/tf/nvnmd/utils/op.py
similarity index 100%
rename from deepmd/nvnmd/utils/op.py
rename to deepmd/tf/nvnmd/utils/op.py
diff --git a/deepmd/nvnmd/utils/weight.py b/deepmd/tf/nvnmd/utils/weight.py
similarity index 98%
rename from deepmd/nvnmd/utils/weight.py
rename to deepmd/tf/nvnmd/utils/weight.py
index cc5ab15219..7a60712455 100644
--- a/deepmd/nvnmd/utils/weight.py
+++ b/deepmd/tf/nvnmd/utils/weight.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
 
diff --git a/deepmd/op/__init__.py b/deepmd/tf/op/__init__.py
similarity index 96%
rename from deepmd/op/__init__.py
rename to deepmd/tf/op/__init__.py
index 9cdfec70cc..421ef0b123 100644
--- a/deepmd/op/__init__.py
+++ b/deepmd/tf/op/__init__.py
@@ -8,7 +8,7 @@
 )
 
 NOT_LOADABLE = ("__init__.py",)
-PACKAGE_BASE = "deepmd.op"
+PACKAGE_BASE = "deepmd.tf.op"
 
 log = logging.getLogger(__name__)
 
diff --git a/deepmd/op/_add_flt_nvnmd_grad.py b/deepmd/tf/op/_add_flt_nvnmd_grad.py
similarity index 90%
rename from deepmd/op/_add_flt_nvnmd_grad.py
rename to deepmd/tf/op/_add_flt_nvnmd_grad.py
index 105ec1ec6d..3bea39fcec 100644
--- a/deepmd/op/_add_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_add_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
diff --git a/deepmd/op/_copy_flt_nvnmd_grad.py b/deepmd/tf/op/_copy_flt_nvnmd_grad.py
similarity index 91%
rename from deepmd/op/_copy_flt_nvnmd_grad.py
rename to deepmd/tf/op/_copy_flt_nvnmd_grad.py
index 09c4a72324..401acba22c 100644
--- a/deepmd/op/_copy_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_copy_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
diff --git a/deepmd/op/_dotmul_flt_nvnmd_grad.py b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
similarity index 95%
rename from deepmd/op/_dotmul_flt_nvnmd_grad.py
rename to deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
index 0f786a6d38..8a4ffb2d0c 100644
--- a/deepmd/op/_dotmul_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_dotmul_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_flt_nvnmd_grad.py b/deepmd/tf/op/_flt_nvnmd_grad.py
similarity index 90%
rename from deepmd/op/_flt_nvnmd_grad.py
rename to deepmd/tf/op/_flt_nvnmd_grad.py
index 0dd67c2c57..b0fbaea11d 100644
--- a/deepmd/op/_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
diff --git a/deepmd/op/_gelu.py b/deepmd/tf/op/_gelu.py
similarity index 97%
rename from deepmd/op/_gelu.py
rename to deepmd/tf/op/_gelu.py
index 6768ac10b3..04ae124f70 100644
--- a/deepmd/op/_gelu.py
+++ b/deepmd/tf/op/_gelu.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """First-order derivatives and second-order derivatives for gelu function."""
+
 import tensorflow
 from tensorflow.python.framework import (
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
diff --git a/deepmd/op/_map_flt_nvnmd_grad.py b/deepmd/tf/op/_map_flt_nvnmd_grad.py
similarity index 97%
rename from deepmd/op/_map_flt_nvnmd_grad.py
rename to deepmd/tf/op/_map_flt_nvnmd_grad.py
index 3e5749e74c..46f258cafe 100644
--- a/deepmd/op/_map_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_map_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_matmul_fitnet_nvnmd_grad.py b/deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py
similarity index 94%
rename from deepmd/op/_matmul_fitnet_nvnmd_grad.py
rename to deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py
index bab3905c5a..f8d566bd39 100644
--- a/deepmd/op/_matmul_fitnet_nvnmd_grad.py
+++ b/deepmd/tf/op/_matmul_fitnet_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_matmul_flt2fix_nvnmd.py b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py
similarity index 97%
rename from deepmd/op/_matmul_flt2fix_nvnmd.py
rename to deepmd/tf/op/_matmul_flt2fix_nvnmd.py
index db9af761de..319fb90ec8 100644
--- a/deepmd/op/_matmul_flt2fix_nvnmd.py
+++ b/deepmd/tf/op/_matmul_flt2fix_nvnmd.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_matmul_flt_nvnmd_grad.py b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py
similarity index 97%
rename from deepmd/op/_matmul_flt_nvnmd_grad.py
rename to deepmd/tf/op/_matmul_flt_nvnmd_grad.py
index 1e3ed74c91..6493794b00 100644
--- a/deepmd/op/_matmul_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_matmul_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_mul_flt_nvnmd_grad.py b/deepmd/tf/op/_mul_flt_nvnmd_grad.py
similarity index 96%
rename from deepmd/op/_mul_flt_nvnmd_grad.py
rename to deepmd/tf/op/_mul_flt_nvnmd_grad.py
index c50baf8c12..d05daa7dfa 100644
--- a/deepmd/op/_mul_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_mul_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/deepmd/op/_prod_force_grad.py b/deepmd/tf/op/_prod_force_grad.py
similarity index 95%
rename from deepmd/op/_prod_force_grad.py
rename to deepmd/tf/op/_prod_force_grad.py
index ffa34a8126..449901c137 100644
--- a/deepmd/op/_prod_force_grad.py
+++ b/deepmd/tf/op/_prod_force_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_force_se_a_grad.py b/deepmd/tf/op/_prod_force_se_a_grad.py
similarity index 95%
rename from deepmd/op/_prod_force_se_a_grad.py
rename to deepmd/tf/op/_prod_force_se_a_grad.py
index b58b819ee1..d732803bad 100644
--- a/deepmd/op/_prod_force_se_a_grad.py
+++ b/deepmd/tf/op/_prod_force_se_a_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_force_se_a_mask_grad.py b/deepmd/tf/op/_prod_force_se_a_mask_grad.py
similarity index 95%
rename from deepmd/op/_prod_force_se_a_mask_grad.py
rename to deepmd/tf/op/_prod_force_se_a_mask_grad.py
index d5ef829da2..a7f2d72b16 100644
--- a/deepmd/op/_prod_force_se_a_mask_grad.py
+++ b/deepmd/tf/op/_prod_force_se_a_mask_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_force_se_r_grad.py b/deepmd/tf/op/_prod_force_se_r_grad.py
similarity index 93%
rename from deepmd/op/_prod_force_se_r_grad.py
rename to deepmd/tf/op/_prod_force_se_r_grad.py
index 254e2e331a..4ec65b31f2 100644
--- a/deepmd/op/_prod_force_se_r_grad.py
+++ b/deepmd/tf/op/_prod_force_se_r_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_virial_grad.py b/deepmd/tf/op/_prod_virial_grad.py
similarity index 95%
rename from deepmd/op/_prod_virial_grad.py
rename to deepmd/tf/op/_prod_virial_grad.py
index 4a946f3ba8..7fe245ed6b 100644
--- a/deepmd/op/_prod_virial_grad.py
+++ b/deepmd/tf/op/_prod_virial_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_virial_se_a_grad.py b/deepmd/tf/op/_prod_virial_se_a_grad.py
similarity index 95%
rename from deepmd/op/_prod_virial_se_a_grad.py
rename to deepmd/tf/op/_prod_virial_se_a_grad.py
index 0e738f86b3..c95d3b58e2 100644
--- a/deepmd/op/_prod_virial_se_a_grad.py
+++ b/deepmd/tf/op/_prod_virial_se_a_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_prod_virial_se_r_grad.py b/deepmd/tf/op/_prod_virial_se_r_grad.py
similarity index 94%
rename from deepmd/op/_prod_virial_se_r_grad.py
rename to deepmd/tf/op/_prod_virial_se_r_grad.py
index a943b35670..8f51310c8c 100644
--- a/deepmd/op/_prod_virial_se_r_grad.py
+++ b/deepmd/tf/op/_prod_virial_se_r_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_quantize_nvnmd_grad.py b/deepmd/tf/op/_quantize_nvnmd_grad.py
similarity index 93%
rename from deepmd/op/_quantize_nvnmd_grad.py
rename to deepmd/tf/op/_quantize_nvnmd_grad.py
index 2ef282fa78..f1d99dc18d 100644
--- a/deepmd/op/_quantize_nvnmd_grad.py
+++ b/deepmd/tf/op/_quantize_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
diff --git a/deepmd/op/_soft_min_force_grad.py b/deepmd/tf/op/_soft_min_force_grad.py
similarity index 95%
rename from deepmd/op/_soft_min_force_grad.py
rename to deepmd/tf/op/_soft_min_force_grad.py
index ae9cf882c8..cd18f3e186 100644
--- a/deepmd/op/_soft_min_force_grad.py
+++ b/deepmd/tf/op/_soft_min_force_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_soft_min_virial_grad.py b/deepmd/tf/op/_soft_min_virial_grad.py
similarity index 95%
rename from deepmd/op/_soft_min_virial_grad.py
rename to deepmd/tf/op/_soft_min_virial_grad.py
index 56b828b12c..4d4f4790dd 100644
--- a/deepmd/op/_soft_min_virial_grad.py
+++ b/deepmd/tf/op/_soft_min_virial_grad.py
@@ -6,7 +6,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_grads_module,
 )
 
diff --git a/deepmd/op/_tabulate_grad.py b/deepmd/tf/op/_tabulate_grad.py
similarity index 97%
rename from deepmd/op/_tabulate_grad.py
rename to deepmd/tf/op/_tabulate_grad.py
index 8ad8908d7e..667981ef9f 100644
--- a/deepmd/op/_tabulate_grad.py
+++ b/deepmd/tf/op/_tabulate_grad.py
@@ -6,11 +6,11 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
 )
 
-# from deepmd.DescrptSeATabulate import last_layer_size
+# from deepmd.tf.DescrptSeATabulate import last_layer_size
 
 
 @ops.RegisterGradient("TabulateFusion")
diff --git a/deepmd/op/_tanh4_flt_nvnmd_grad.py b/deepmd/tf/op/_tanh4_flt_nvnmd_grad.py
similarity index 97%
rename from deepmd/op/_tanh4_flt_nvnmd_grad.py
rename to deepmd/tf/op/_tanh4_flt_nvnmd_grad.py
index 45d7366545..04d1724d0b 100644
--- a/deepmd/op/_tanh4_flt_nvnmd_grad.py
+++ b/deepmd/tf/op/_tanh4_flt_nvnmd_grad.py
@@ -5,7 +5,7 @@
     ops,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
diff --git a/deepmd/tf/train/__init__.py b/deepmd/tf/train/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/deepmd/tf/train/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/deepmd/train/run_options.py b/deepmd/tf/train/run_options.py
similarity index 69%
rename from deepmd/train/run_options.py
rename to deepmd/tf/train/run_options.py
index 451632949e..b835d63852 100644
--- a/deepmd/train/run_options.py
+++ b/deepmd/tf/train/run_options.py
@@ -16,63 +16,63 @@
     Version,
 )
 
-from deepmd.cluster import (
+from deepmd.tf.cluster import (
     get_resource,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_CONFIG,
     TF_VERSION,
-    get_tf_default_nthreads,
-    global_float_prec,
     tf,
 )
-from deepmd.loggers import (
+from deepmd.tf.loggers import (
     set_log_handles,
 )
+from deepmd.utils.summary import SummaryPrinter as BaseSummaryPrinter
 
 if TYPE_CHECKING:
     import horovod.tensorflow as HVD
 
 
 __all__ = [
-    "WELCOME",
-    "CITATION",
-    "BUILD",
     "RunOptions",
 ]
 
 log = logging.getLogger(__name__)
 
 
-# http://patorjk.com/software/taag. Font:Big"
-WELCOME = (
-    r" _____               _____   __  __  _____           _     _  _   ",
-    r"|  __ \             |  __ \ |  \/  ||  __ \         | |   (_)| |  ",
-    r"| |  | |  ___   ___ | |__) || \  / || |  | | ______ | | __ _ | |_ ",
-    r"| |  | | / _ \ / _ \|  ___/ | |\/| || |  | ||______|| |/ /| || __|",
-    r"| |__| ||  __/|  __/| |     | |  | || |__| |        |   < | || |_ ",
-    r"|_____/  \___| \___||_|     |_|  |_||_____/         |_|\_\|_| \__|",
-)
+class SummaryPrinter(BaseSummaryPrinter):
+    """Summary printer for TensorFlow."""
 
-CITATION = (
-    "Please read and cite:",
-    "Wang, Zhang, Han and E, Comput.Phys.Comm. 228, 178-184 (2018)",
-    "Zeng et al, J. Chem. Phys., 159, 054801 (2023)",
-    "See https://deepmd.rtfd.io/credits/ for details.",
-)
+    def __init__(self, compute_device: str, ngpus: int) -> None:
+        super().__init__()
+        self.compute_device = compute_device
+        self.ngpus = ngpus
 
-_sep = "\n                      "
-BUILD = (
-    f"installed to:         {GLOBAL_CONFIG['install_prefix']}",
-    f"source :              {GLOBAL_CONFIG['git_summ']}",
-    f"source brach:         {GLOBAL_CONFIG['git_branch']}",
-    f"source commit:        {GLOBAL_CONFIG['git_hash']}",
-    f"source commit at:     {GLOBAL_CONFIG['git_date']}",
-    f"build float prec:     {global_float_prec}",
-    f"build variant:        {GLOBAL_CONFIG['dp_variant']}",
-    f"build with tf inc:    {GLOBAL_CONFIG['tf_include_dir']}",
-    f"build with tf lib:    {GLOBAL_CONFIG['tf_libs'].replace(';', _sep)}",
-)
+    def is_built_with_cuda(self) -> bool:
+        """Check if the backend is built with CUDA."""
+        return tf.test.is_built_with_cuda()
+
+    def is_built_with_rocm(self) -> bool:
+        """Check if the backend is built with ROCm."""
+        return tf.test.is_built_with_rocm()
+
+    def get_compute_device(self) -> str:
+        """Get Compute device."""
+        return self.compute_device
+
+    def get_ngpus(self) -> int:
+        """Get the number of GPUs."""
+        return self.ngpus
+
+    def get_backend_info(self) -> dict:
+        """Get backend information."""
+        return {
+            "Backend": "TensorFlow",
+            "TF ver": tf.version.GIT_VERSION,
+            "build with TF ver": TF_VERSION,
+            "build with TF inc": GLOBAL_CONFIG["tf_include_dir"].replace(";", "\n"),
+            "build with TF lib": GLOBAL_CONFIG["tf_libs"].replace(";", "\n"),
+        }
 
 
 class RunOptions:
@@ -148,25 +148,7 @@ def is_chief(self):
 
     def print_resource_summary(self):
         """Print build and current running cluster configuration summary."""
-        log.info("---Summary of the training---------------------------------------")
-        if self.is_distrib:
-            log.info("distributed")
-            log.info(f"world size:           {self.world_size}")
-            log.info(f"my rank:              {self.my_rank}")
-            log.info(f"node list:            {self.nodelist}")
-        log.info(f"running on:           {self.nodename}")
-        log.info(f"computing device:     {self.my_device}")
-        if tf.test.is_built_with_cuda():
-            env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "unset")
-            log.info(f"CUDA_VISIBLE_DEVICES: {env_value}")
-        if hasattr(tf.test, "is_built_with_rocm") and tf.test.is_built_with_rocm():
-            env_value = os.environ.get("HIP_VISIBLE_DEVICES", "unset")
-            log.info(f"HIP_VISIBLE_DEVICES:  {env_value}")
-        log.info(f"Count of visible GPU: {len(self.gpus or [])}")
-        intra, inter = get_tf_default_nthreads()
-        log.info(f"num_intra_threads:    {intra:d}")
-        log.info(f"num_inter_threads:    {inter:d}")
-        log.info("-----------------------------------------------------------------")
+        SummaryPrinter(self.my_device, len(self.gpus or []))()
 
     def _setup_logger(
         self,
diff --git a/deepmd/train/trainer.py b/deepmd/tf/train/trainer.py
similarity index 93%
rename from deepmd/train/trainer.py
rename to deepmd/tf/train/trainer.py
index 3b81740a93..931cf87246 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/tf/train/trainer.py
@@ -1,9 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import glob
 import logging
 import os
-import platform
 import shutil
 import time
 from typing import (
@@ -21,13 +19,20 @@
 )
 
 # load grad of force module
-import deepmd.op  # noqa: F401
+import deepmd.tf.op  # noqa: F401
 from deepmd.common import (
+    symlink_prefix_files,
+)
+from deepmd.loggers.training import (
+    format_training_message,
+    format_training_message_per_task,
+)
+from deepmd.tf.common import (
     data_requirement,
     get_precision,
     j_must_have,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     TF_VERSION,
@@ -35,38 +40,38 @@
     tf,
     tfv2,
 )
-from deepmd.fit.ener import (
+from deepmd.tf.fit.ener import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     MultiModel,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
-from deepmd.utils import random as dp_random
-from deepmd.utils.data_system import (
+from deepmd.tf.utils import random as dp_random
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphTooLargeError,
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name_from_graph,
     load_graph_def,
 )
-from deepmd.utils.learning_rate import (
+from deepmd.tf.utils.learning_rate import (
     LearningRateExp,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
 log = logging.getLogger(__name__)
 
 # nvnmd
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
 
@@ -159,6 +164,7 @@ def get_lr_and_coef(lr_param):
         self.disp_freq = tr_data.get("disp_freq", 1000)
         self.save_freq = tr_data.get("save_freq", 1000)
         self.save_ckpt = tr_data.get("save_ckpt", "model.ckpt")
+        self.max_ckpt_keep = tr_data.get("max_ckpt_keep", 5)
         self.display_in_training = tr_data.get("disp_training", True)
         self.timing_in_training = tr_data.get("time_training", True)
         self.profiling = self.run_opt.is_chief and tr_data.get("profiling", False)
@@ -230,9 +236,7 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
                 if data[fitting_key].mixed_type:
                     assert isinstance(
                         self.fitting[fitting_key], EnerFitting
-                    ), "Data for fitting net {} in mixed_type format must use ener fitting!".format(
-                        fitting_key
-                    )
+                    ), f"Data for fitting net {fitting_key} in mixed_type format must use ener fitting!"
                 if self.numb_fparam_dict[fitting_key] > 0:
                     log.info(
                         "fitting net %s training with %d frame parameter(s)"
@@ -292,8 +296,6 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
                 )
 
             # neighbor_stat is moved to train.py as duplicated
-            # TODO: this is a simple fix but we should have a clear
-            #       architecture to call neighbor stat
         else:
             self.model.enable_compression()
 
@@ -493,7 +495,9 @@ def _init_session(self):
         # Initializes or restore global variables
         init_op = tf.global_variables_initializer()
         if self.run_opt.is_chief:
-            self.saver = tf.train.Saver(save_relative_paths=True)
+            self.saver = tf.train.Saver(
+                save_relative_paths=True, max_to_keep=self.max_ckpt_keep
+            )
             if self.run_opt.init_mode == "init_from_scratch":
                 log.info("initialize model from scratch")
                 run_sess(self.sess, init_op)
@@ -773,8 +777,10 @@ def train(self, train_data=None, valid_data=None):
                     test_time = toc - tic
                     wall_time = toc - wall_time_tic
                     log.info(
-                        "batch %7d training time %.2f s, testing time %.2f s, total wall time %.2f s"
-                        % (cur_batch, train_time, test_time, wall_time)
+                        format_training_message(
+                            batch=cur_batch,
+                            wall_time=wall_time,
+                        )
                     )
                     # the first training time is not accurate
                     if cur_batch > self.disp_freq or stop_batch < 2 * self.disp_freq:
@@ -830,19 +836,7 @@ def save_checkpoint(self, cur_batch: int):
             ) from e
         # make symlinks from prefix with step to that without step to break nothing
         # get all checkpoint files
-        original_files = glob.glob(ckpt_prefix + ".*")
-        for ori_ff in original_files:
-            new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :]
-            try:
-                # remove old one
-                os.remove(new_ff)
-            except OSError:
-                pass
-            if platform.system() != "Windows":
-                # by default one does not have access to create symlink on Windows
-                os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff)
-            else:
-                shutil.copyfile(ori_ff, new_ff)
+        symlink_prefix_files(ckpt_prefix, self.save_ckpt)
         log.info("saved checkpoint %s" % self.save_ckpt)
 
     def get_feed_dict(self, batch, is_training):
@@ -970,6 +964,23 @@ def print_on_training(
                 for k in train_results.keys():
                     print_str += prop_fmt % (train_results[k])
             print_str += "   %8.1e\n" % cur_lr
+            log.info(
+                format_training_message_per_task(
+                    batch=cur_batch,
+                    task_name="trn",
+                    rmse=train_results,
+                    learning_rate=cur_lr,
+                )
+            )
+            if valid_results is not None:
+                log.info(
+                    format_training_message_per_task(
+                        batch=cur_batch,
+                        task_name="val",
+                        rmse=valid_results,
+                        learning_rate=None,
+                    )
+                )
         else:
             for fitting_key in train_results:
                 if valid_results[fitting_key] is not None:
@@ -985,6 +996,23 @@ def print_on_training(
                     for k in train_results[fitting_key].keys():
                         print_str += prop_fmt % (train_results[fitting_key][k])
                 print_str += "   %8.1e\n" % cur_lr_dict[fitting_key]
+                log.info(
+                    format_training_message_per_task(
+                        batch=cur_batch,
+                        task_name=f"{fitting_key}_trn",
+                        rmse=train_results[fitting_key],
+                        learning_rate=cur_lr_dict[fitting_key],
+                    )
+                )
+                if valid_results is not None:
+                    log.info(
+                        format_training_message_per_task(
+                            batch=cur_batch,
+                            task_name=f"{fitting_key}_val",
+                            rmse=valid_results[fitting_key],
+                            learning_rate=None,
+                        )
+                    )
         fp.write(print_str)
         fp.flush()
 
@@ -1054,10 +1082,7 @@ def _init_from_frz_model(self):
         except FileNotFoundError as e:
             # throw runtime error if there's no frozen model
             raise RuntimeError(
-                "The input frozen model {} ({}) does not exist! Please check the path of the frozen model. ".format(
-                    self.run_opt.init_frz_model,
-                    os.path.abspath(self.run_opt.init_frz_model),
-                )
+                f"The input frozen model {self.run_opt.init_frz_model} ({os.path.abspath(self.run_opt.init_frz_model)}) does not exist! Please check the path of the frozen model. "
             ) from e
         # get the model type from the frozen model(self.run_opt.init_frz_model)
         try:
@@ -1089,7 +1114,7 @@ def _init_from_ckpt(self, ckpt_meta: str):
             self.ckpt_meta = ckpt_meta
 
     def _init_from_pretrained_model(
-        self, data, origin_type_map=None, bias_shift="delta"
+        self, data, origin_type_map=None, bias_adjust_mode="change-by-statistic"
     ):
         """Init the embedding net variables with the given frozen model.
 
@@ -1099,21 +1124,19 @@ def _init_from_pretrained_model(
             The training data.
         origin_type_map : list
             The original type_map in dataset, they are targets to change the energy bias.
-        bias_shift : str
-            The mode for changing energy bias : ['delta', 'statistic']
-            'delta' : perform predictions on energies of target dataset,
+        bias_adjust_mode : str
+            The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+            'change-by-statistic' : perform predictions on energies of target dataset,
                     and do least sqaure on the errors to obtain the target shift as bias.
-            'statistic' : directly use the statistic energy bias in the target dataset.
+            'set-by-statistic' : directly use the statistic energy bias in the target dataset.
         """
         try:
             graph, graph_def = load_graph_def(self.run_opt.finetune)
         except FileNotFoundError as e:
             # throw runtime error if there's no frozen model
             raise RuntimeError(
-                "The input frozen pretrained model {} ({}) does not exist! "
-                "Please check the path of the frozen pretrained model. ".format(
-                    self.run_opt.finetune, os.path.abspath(self.run_opt.finetune)
-                )
+                f"The input frozen pretrained model {self.run_opt.finetune} ({os.path.abspath(self.run_opt.finetune)}) does not exist! "
+                "Please check the path of the frozen pretrained model. "
             ) from e
         # get the model type from the frozen model(self.run_opt.finetune)
         try:
@@ -1132,15 +1155,19 @@ def _init_from_pretrained_model(
         ), "Compressed models are not supported for finetuning!"
         self.model.init_variables(graph, graph_def, model_type=self.model_type)
         log.info(
-            "Changing energy bias in pretrained model for types {}... "
-            "(this step may take long time)".format(str(origin_type_map))
+            f"Changing energy bias in pretrained model for types {origin_type_map!s}... "
+            "(this step may take long time)"
         )
         self._change_energy_bias(
-            data, self.run_opt.finetune, origin_type_map, bias_shift
+            data, self.run_opt.finetune, origin_type_map, bias_adjust_mode
         )
 
     def _change_energy_bias(
-        self, data, frozen_model, origin_type_map, bias_shift="delta"
+        self,
+        data,
+        frozen_model,
+        origin_type_map,
+        bias_adjust_mode="change-by-statistic",
     ):
         full_type_map = data.get_type_map()
         self.model.change_energy_bias(
@@ -1148,7 +1175,7 @@ def _change_energy_bias(
             frozen_model,
             origin_type_map,
             full_type_map,
-            bias_shift=bias_shift,
+            bias_adjust_mode=bias_adjust_mode,
         )
 
 
diff --git a/deepmd/tf/utils/__init__.py b/deepmd/tf/utils/__init__.py
new file mode 100644
index 0000000000..7d1e7e67d0
--- /dev/null
+++ b/deepmd/tf/utils/__init__.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+from .data import (
+    DeepmdData,
+)
+from .data_system import (
+    DeepmdDataSystem,
+)
+from .learning_rate import (
+    LearningRateExp,
+)
+from .pair_tab import (
+    PairTab,
+)
+from .plugin import (
+    Plugin,
+    PluginVariant,
+)
+
+__all__ = [
+    "DeepmdData",
+    "DeepmdDataSystem",
+    "LearningRateExp",
+    "PairTab",
+    "Plugin",
+    "PluginVariant",
+]
diff --git a/deepmd/tf/utils/argcheck.py b/deepmd/tf/utils/argcheck.py
new file mode 100644
index 0000000000..caec33c319
--- /dev/null
+++ b/deepmd/tf/utils/argcheck.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.argcheck import (
+    gen_args,
+    gen_doc,
+    gen_json,
+    list_to_doc,
+    normalize,
+    type_embedding_args,
+)
+
+__all__ = [
+    "list_to_doc",
+    "normalize",
+    "gen_doc",
+    "gen_json",
+    "gen_args",
+    "type_embedding_args",
+]
diff --git a/deepmd/tf/utils/batch_size.py b/deepmd/tf/utils/batch_size.py
new file mode 100644
index 0000000000..33f1ec0da0
--- /dev/null
+++ b/deepmd/tf/utils/batch_size.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from packaging.version import (
+    Version,
+)
+
+from deepmd.tf.env import (
+    TF_VERSION,
+    tf,
+)
+from deepmd.tf.utils.errors import (
+    OutOfMemoryError,
+)
+from deepmd.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
+
+
+class AutoBatchSize(AutoBatchSizeBase):
+    def is_gpu_available(self) -> bool:
+        """Check if GPU is available.
+
+        Returns
+        -------
+        bool
+            True if GPU is available
+        """
+        return (
+            Version(TF_VERSION) >= Version("1.14")
+            and tf.config.experimental.get_visible_devices("GPU")
+        ) or tf.test.is_gpu_available()
+
+    def is_oom_error(self, e: Exception) -> bool:
+        """Check if the exception is an OOM error.
+
+        Parameters
+        ----------
+        e : Exception
+            Exception
+        """
+        return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError))
diff --git a/deepmd/tf/utils/compat.py b/deepmd/tf/utils/compat.py
new file mode 100644
index 0000000000..e80a366b83
--- /dev/null
+++ b/deepmd/tf/utils/compat.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.compat import (
+    convert_input_v0_v1,
+    convert_input_v1_v2,
+    deprecate_numb_test,
+    update_deepmd_input,
+)
+
+__all__ = [
+    "convert_input_v0_v1",
+    "convert_input_v1_v2",
+    "deprecate_numb_test",
+    "update_deepmd_input",
+]
diff --git a/deepmd/utils/compress.py b/deepmd/tf/utils/compress.py
similarity index 98%
rename from deepmd/utils/compress.py
rename to deepmd/tf/utils/compress.py
index 7a79dec520..0bce633573 100644
--- a/deepmd/utils/compress.py
+++ b/deepmd/tf/utils/compress.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_pattern_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
 )
diff --git a/deepmd/utils/convert.py b/deepmd/tf/utils/convert.py
similarity index 99%
rename from deepmd/utils/convert.py
rename to deepmd/tf/utils/convert.py
index 13e07f0885..625f54a9a0 100644
--- a/deepmd/utils/convert.py
+++ b/deepmd/tf/utils/convert.py
@@ -14,10 +14,10 @@
 )
 from packaging.version import parse as parse_version
 
-from deepmd import (
+from deepmd.tf import (
     __version__,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
diff --git a/deepmd/tf/utils/data.py b/deepmd/tf/utils/data.py
new file mode 100644
index 0000000000..54130c18f4
--- /dev/null
+++ b/deepmd/tf/utils/data.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+__all__ = [
+    "DeepmdData",
+]
diff --git a/deepmd/tf/utils/data_system.py b/deepmd/tf/utils/data_system.py
new file mode 100644
index 0000000000..da0cce28e8
--- /dev/null
+++ b/deepmd/tf/utils/data_system.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+    prob_sys_size_ext,
+    process_sys_probs,
+)
+
+__all__ = [
+    "DeepmdDataSystem",
+    "process_sys_probs",
+    "prob_sys_size_ext",
+]
diff --git a/deepmd/tf/utils/errors.py b/deepmd/tf/utils/errors.py
new file mode 100644
index 0000000000..5f7291c7ce
--- /dev/null
+++ b/deepmd/tf/utils/errors.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.utils.errors import (
+    OutOfMemoryError,
+)
+
+
+class GraphTooLargeError(Exception):
+    """The graph is too large, exceeding protobuf's hard limit of 2GB."""
+
+
+class GraphWithoutTensorError(Exception):
+    pass
+
+
+__all__ = [
+    "OutOfMemoryError",
+    "GraphTooLargeError",
+    "GraphWithoutTensorError",
+]
diff --git a/deepmd/tf/utils/finetune.py b/deepmd/tf/utils/finetune.py
new file mode 100644
index 0000000000..3d11130ba7
--- /dev/null
+++ b/deepmd/tf/utils/finetune.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import logging
+from typing import (
+    Any,
+    Dict,
+)
+
+from deepmd.tf.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.tf.utils.graph import (
+    get_tensor_by_name,
+)
+
+log = logging.getLogger(__name__)
+
+
+def replace_model_params_with_pretrained_model(
+    jdata: Dict[str, Any], pretrained_model: str
+):
+    """Replace the model params in input script according to pretrained model.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        input script
+    pretrained_model : str
+        filename of the pretrained model
+    """
+    # Get the input script from the pretrained model
+    try:
+        t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script")
+    except GraphWithoutTensorError as e:
+        raise RuntimeError(
+            "The input frozen pretrained model: %s has no training script, "
+            "which is not supported to perform finetuning. "
+            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit."
+            % input
+        ) from e
+    pretrained_jdata = json.loads(t_jdata)
+
+    # Check the model type
+    assert (
+        pretrained_jdata["model"]["descriptor"]["type"]
+        in [
+            "se_atten",
+            "se_atten_v2",
+        ]
+        and pretrained_jdata["model"]["fitting_net"]["type"] in ["ener"]
+    ), "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!"
+
+    # Check the type map
+    pretrained_type_map = pretrained_jdata["model"]["type_map"]
+    cur_type_map = jdata["model"].get("type_map", [])
+    out_line_type = []
+    for i in cur_type_map:
+        if i not in pretrained_type_map:
+            out_line_type.append(i)
+    assert not out_line_type, (
+        f"{out_line_type!s} type(s) not contained in the pretrained model! "
+        "Please choose another suitable one."
+    )
+    if cur_type_map != pretrained_type_map:
+        log.info(
+            f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}."
+        )
+        jdata["model"]["type_map"] = pretrained_type_map
+
+    # Change model configurations
+    log.info("Change the model configurations according to the pretrained one...")
+    for config_key in ["type_embedding", "descriptor", "fitting_net"]:
+        if (
+            config_key not in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+        ):
+            log.info(
+                "Add the '{}' from pretrained model: {}.".format(
+                    config_key, str(pretrained_jdata["model"][config_key])
+                )
+            )
+            jdata["model"][config_key] = pretrained_jdata["model"][config_key]
+        elif (
+            config_key == "type_embedding"
+            and config_key in jdata["model"].keys()
+            and config_key not in pretrained_jdata["model"].keys()
+        ):
+            # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None.
+            cur_para = jdata["model"].pop(config_key)
+            if "trainable" in cur_para and not cur_para["trainable"]:
+                jdata["model"][config_key] = {
+                    "trainable": False,
+                    "activation_function": "None",
+                }
+                log.info("The type_embeddings from pretrained model will be frozen.")
+        elif (
+            config_key in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+            and jdata["model"][config_key] != pretrained_jdata["model"][config_key]
+        ):
+            target_para = pretrained_jdata["model"][config_key]
+            cur_para = jdata["model"][config_key]
+            # TODO: keep some params that are irrelevant to model structures (need to discuss)
+            if "trainable" in cur_para.keys():
+                target_para["trainable"] = cur_para["trainable"]
+            log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.")
+            jdata["model"][config_key] = target_para
+
+    return jdata, cur_type_map
diff --git a/deepmd/utils/graph.py b/deepmd/tf/utils/graph.py
similarity index 73%
rename from deepmd/utils/graph.py
rename to deepmd/tf/utils/graph.py
index ad4ee0224a..a6e2ab7422 100644
--- a/deepmd/utils/graph.py
+++ b/deepmd/tf/utils/graph.py
@@ -7,22 +7,21 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     ATTENTION_LAYER_PATTERN,
     EMBEDDING_NET_PATTERN,
     FITTING_NET_PATTERN,
     TYPE_EMBEDDING_PATTERN,
     tf,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphWithoutTensorError,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
 
-# TODO (JZ): I think in this file we can merge some duplicated lines into one method...
 def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
     """Load graph as well as the graph_def from the frozen model(model_file).
 
@@ -99,30 +98,6 @@ def get_tensor_by_name(model_file: str, tensor_name: str) -> tf.Tensor:
     return get_tensor_by_name_from_graph(graph, tensor_name)
 
 
-def get_tensor_by_type(node, data_type: np.dtype) -> tf.Tensor:
-    """Get the tensor value within the given node according to the input data_type.
-
-    Parameters
-    ----------
-    node
-        The given tensorflow graph node
-    data_type
-        The data type of the node
-
-    Returns
-    -------
-    tf.Tensor
-        The tensor value of the given node
-    """
-    if data_type == np.float64:
-        tensor = np.array(node.double_val)
-    elif data_type == np.float32:
-        tensor = np.array(node.float_val)
-    else:
-        raise RuntimeError("model compression does not support the half precision")
-    return tensor
-
-
 def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Dict:
     """Get the pattern nodes with the given tf.GraphDef object.
 
@@ -166,9 +141,9 @@ def get_embedding_net_nodes_from_graph_def(
     # embedding_net_pattern = f"filter_type_\d+{suffix}/matrix_\d+_\d+|filter_type_\d+{suffix}/bias_\d+_\d+|filter_type_\d+{suffix}/idt_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+_\d+|filter_type_all{suffix}/idt_\d+_\d+"
     if suffix != "":
         embedding_net_pattern = (
-            EMBEDDING_NET_PATTERN.replace("/idt", suffix + "/idt")
-            .replace("/bias", suffix + "/bias")
-            .replace("/matrix", suffix + "/matrix")
+            EMBEDDING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)")
+            .replace("/(bias)", suffix + "/(bias)")
+            .replace("/(matrix)", suffix + "/(matrix)")
         )
     else:
         embedding_net_pattern = EMBEDDING_NET_PATTERN
@@ -176,10 +151,6 @@ def get_embedding_net_nodes_from_graph_def(
     embedding_net_nodes = get_pattern_nodes_from_graph_def(
         graph_def, embedding_net_pattern
     )
-    for key in embedding_net_nodes.keys():
-        assert (
-            key.find("bias") > 0 or key.find("matrix") > 0
-        ), "currently, only support weight matrix and bias matrix at the tabulation op!"
     return embedding_net_nodes
 
 
@@ -219,22 +190,10 @@ def get_embedding_net_variables_from_graph_def(
     Dict
         The embedding net variables within the given tf.GraphDef object
     """
-    embedding_net_variables = {}
     embedding_net_nodes = get_embedding_net_nodes_from_graph_def(
         graph_def, suffix=suffix
     )
-    for item in embedding_net_nodes:
-        node = embedding_net_nodes[item]
-        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(
-                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
-            )
-        else:
-            tensor_value = get_tensor_by_type(node, dtype)
-        embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape)
-    return embedding_net_variables
+    return convert_tensor_to_ndarray_in_dict(embedding_net_nodes)
 
 
 def get_extra_embedding_net_suffix(type_one_side: bool):
@@ -273,16 +232,7 @@ def get_variables_from_graph_def_as_numpy_array(graph_def: tf.GraphDef, pattern:
         The numpy array of the variable
     """
     node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern]
-    dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-    tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-    if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-        tensor_value = np.frombuffer(
-            node.tensor_content,
-            dtype=tf.as_dtype(node.dtype).as_numpy_dtype,
-        )
-    else:
-        tensor_value = get_tensor_by_type(node, dtype)
-    return np.reshape(tensor_value, tensor_shape)
+    return tf.make_ndarray(node)
 
 
 def get_extra_embedding_net_variables_from_graph_def(
@@ -312,13 +262,13 @@ def get_extra_embedding_net_variables_from_graph_def(
     extra_embedding_net_variables = {}
     for i in range(1, layer_size + 1):
         matrix_pattern = f"filter_type_all{suffix}/matrix_{i}{extra_suffix}"
-        extra_embedding_net_variables[
-            matrix_pattern
-        ] = get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern)
+        extra_embedding_net_variables[matrix_pattern] = (
+            get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern)
+        )
         bias_pattern = f"filter_type_all{suffix}/bias_{i}{extra_suffix}"
-        extra_embedding_net_variables[
-            bias_pattern
-        ] = get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern)
+        extra_embedding_net_variables[bias_pattern] = (
+            get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern)
+        )
     return extra_embedding_net_variables
 
 
@@ -360,9 +310,9 @@ def get_fitting_net_nodes_from_graph_def(
     """
     if suffix != "":
         fitting_net_pattern = (
-            FITTING_NET_PATTERN.replace("/idt", suffix + "/idt")
-            .replace("/bias", suffix + "/bias")
-            .replace("/matrix", suffix + "/matrix")
+            FITTING_NET_PATTERN.replace("/(idt)", suffix + "/(idt)")
+            .replace("/(bias)", suffix + "/(bias)")
+            .replace("/(matrix)", suffix + "/(matrix)")
         )
     else:
         fitting_net_pattern = FITTING_NET_PATTERN
@@ -408,20 +358,8 @@ def get_fitting_net_variables_from_graph_def(
     Dict
         The fitting net variables within the given tf.GraphDef object
     """
-    fitting_net_variables = {}
     fitting_net_nodes = get_fitting_net_nodes_from_graph_def(graph_def, suffix=suffix)
-    for item in fitting_net_nodes:
-        node = fitting_net_nodes[item]
-        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(
-                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
-            )
-        else:
-            tensor_value = get_tensor_by_type(node, dtype)
-        fitting_net_variables[item] = np.reshape(tensor_value, tensor_shape)
-    return fitting_net_variables
+    return convert_tensor_to_ndarray_in_dict(fitting_net_nodes)
 
 
 def get_fitting_net_variables(model_file: str, suffix: str = "") -> Dict:
@@ -462,9 +400,9 @@ def get_type_embedding_net_nodes_from_graph_def(
     """
     if suffix != "":
         type_embedding_net_pattern = (
-            TYPE_EMBEDDING_PATTERN.replace("/idt", suffix + "/idt")
-            .replace("/bias", suffix + "/bias")
-            .replace("/matrix", suffix + "/matrix")
+            TYPE_EMBEDDING_PATTERN.replace("/(idt)", suffix + "/(idt)")
+            .replace("/(bias)", suffix + "/(bias)")
+            .replace("/(matrix)", suffix + "/(matrix)")
         )
     else:
         type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN
@@ -492,22 +430,10 @@ def get_type_embedding_net_variables_from_graph_def(
     Dict
         The embedding net variables within the given tf.GraphDef object
     """
-    type_embedding_net_variables = {}
     type_embedding_net_nodes = get_type_embedding_net_nodes_from_graph_def(
         graph_def, suffix=suffix
     )
-    for item in type_embedding_net_nodes:
-        node = type_embedding_net_nodes[item]
-        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(
-                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
-            )
-        else:
-            tensor_value = get_tensor_by_type(node, dtype)
-        type_embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape)
-    return type_embedding_net_variables
+    return convert_tensor_to_ndarray_in_dict(type_embedding_net_nodes)
 
 
 def get_attention_layer_nodes_from_graph_def(
@@ -561,19 +487,27 @@ def get_attention_layer_variables_from_graph_def(
     Dict
         The attention layer variables within the given tf.GraphDef object
     """
-    attention_layer_variables = {}
     attention_layer_net_nodes = get_attention_layer_nodes_from_graph_def(
         graph_def, suffix=suffix
     )
-    for item in attention_layer_net_nodes:
-        node = attention_layer_net_nodes[item]
-        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
-        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
-        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(
-                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
-            )
-        else:
-            tensor_value = get_tensor_by_type(node, dtype)
-        attention_layer_variables[item] = np.reshape(tensor_value, tensor_shape)
-    return attention_layer_variables
+    return convert_tensor_to_ndarray_in_dict(attention_layer_net_nodes)
+
+
+def convert_tensor_to_ndarray_in_dict(
+    tensor_dict: Dict[str, tf.Tensor],
+) -> Dict[str, np.ndarray]:
+    """Convert tensor to ndarray in dict.
+
+    Parameters
+    ----------
+    tensor_dict : Dict[str, tf.Tensor]
+        The input tensor dict
+
+    Returns
+    -------
+    Dict[str, np.ndarray]
+        The converted tensor dict
+    """
+    for key in tensor_dict:
+        tensor_dict[key] = tf.make_ndarray(tensor_dict[key])
+    return tensor_dict
diff --git a/deepmd/utils/learning_rate.py b/deepmd/tf/utils/learning_rate.py
similarity index 99%
rename from deepmd/utils/learning_rate.py
rename to deepmd/tf/utils/learning_rate.py
index 5bec5120cd..519bf20bd0 100644
--- a/deepmd/utils/learning_rate.py
+++ b/deepmd/tf/utils/learning_rate.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
diff --git a/deepmd/utils/multi_init.py b/deepmd/tf/utils/multi_init.py
similarity index 95%
rename from deepmd/utils/multi_init.py
rename to deepmd/tf/utils/multi_init.py
index 6c070dc67e..aafa9461b0 100644
--- a/deepmd/utils/multi_init.py
+++ b/deepmd/tf/utils/multi_init.py
@@ -6,10 +6,10 @@
     Dict,
 )
 
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     GraphWithoutTensorError,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name,
 )
 
@@ -59,9 +59,7 @@ def replace_model_params_with_frz_multi_model(
     )
     if cur_type_map != pretrained_type_map:
         log.info(
-            "Change the type_map from {} to {}.".format(
-                str(cur_type_map), str(pretrained_type_map)
-            )
+            f"Change the type_map from {cur_type_map!s} to {pretrained_type_map!s}."
         )
         jdata["model"]["type_map"] = pretrained_type_map
 
@@ -166,7 +164,7 @@ def replace_model_params_with_frz_multi_model(
 def _change_sub_config(jdata: Dict[str, Any], src_jdata: Dict[str, Any], sub_key: str):
     target_para = src_jdata[sub_key]
     cur_para = jdata[sub_key]
-    # keep some params that are irrelevant to model structures (need to discuss) TODO
+    # TODO: keep some params that are irrelevant to model structures (need to discuss)
     if "trainable" in cur_para.keys():
         target_para["trainable"] = cur_para["trainable"]
     log.info(f"Change the '{sub_key}' from {cur_para!s} to {target_para!s}.")
diff --git a/deepmd/tf/utils/neighbor_stat.py b/deepmd/tf/utils/neighbor_stat.py
new file mode 100644
index 0000000000..f668d4a4da
--- /dev/null
+++ b/deepmd/tf/utils/neighbor_stat.py
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    Iterator,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.tf.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    tf,
+)
+from deepmd.tf.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.tf.utils.nlist import (
+    extend_coord_with_ghosts,
+)
+from deepmd.tf.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat
+
+log = logging.getLogger(__name__)
+
+
+class NeighborStatOP:
+    """Class for getting neighbor statics data information.
+
+    Parameters
+    ----------
+    ntypes
+        The num of atom types
+    rcut
+        The cut-off radius
+    mixed_types : bool, optional
+        If True, treat neighbors of all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_types: bool,
+    ) -> None:
+        super().__init__()
+        self.rcut = rcut
+        self.ntypes = ntypes
+        self.mixed_types = mixed_types
+
+    def build(
+        self,
+        coord: tf.Tensor,
+        atype: tf.Tensor,
+        cell: tf.Tensor,
+        pbc: tf.Tensor,
+    ) -> Tuple[tf.Tensor, tf.Tensor]:
+        """Calculate the nearest neighbor distance between atoms, maximum nbor size of
+        atoms and the output data range of the environment matrix.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        cell
+            The cell.
+
+        Returns
+        -------
+        tf.Tensor
+            The minimal squared distance between two atoms, in the shape of (nframes,)
+        tf.Tensor
+            The maximal number of neighbors
+        """
+        # generated by GitHub Copilot, converted from PT codes
+        nframes = tf.shape(coord)[0]
+        coord = tf.reshape(coord, [nframes, -1, 3])
+        nloc = tf.shape(coord)[1]
+        coord = tf.reshape(coord, [nframes, nloc * 3])
+        extend_coord, extend_atype, _ = extend_coord_with_ghosts(
+            coord, atype, cell, self.rcut, pbc
+        )
+
+        coord1 = tf.reshape(extend_coord, [nframes, -1])
+        nall = tf.shape(coord1)[1] // 3
+        coord0 = coord1[:, : nloc * 3]
+        diff = (
+            tf.reshape(coord1, [nframes, -1, 3])[:, None, :, :]
+            - tf.reshape(coord0, [nframes, -1, 3])[:, :, None, :]
+        )
+        # shape of diff: nframes, nloc, nall, 3
+        # remove the diagonal elements
+        mask = tf.eye(nloc, nall, dtype=tf.bool)
+        # expand mask
+        mask = tf.tile(mask[None, :, :], [nframes, 1, 1])
+        # expand inf
+        inf_mask = tf.constant(
+            float("inf"), dtype=GLOBAL_TF_FLOAT_PRECISION, shape=[1, 1, 1]
+        )
+        inf_mask = tf.tile(inf_mask, [nframes, nloc, nall])
+        # virtual type (<0) are not counted
+        virtual_type_mask_i = tf.tile(tf.less(atype, 0)[:, :, None], [1, 1, nall])
+        virtual_type_mask_j = tf.tile(
+            tf.less(extend_atype, 0)[:, None, :], [1, nloc, 1]
+        )
+        mask = mask | virtual_type_mask_i | virtual_type_mask_j
+        rr2 = tf.reduce_sum(tf.square(diff), axis=-1)
+        rr2 = tf.where(mask, inf_mask, rr2)
+        min_rr2 = tf.reduce_min(rr2, axis=(1, 2))
+        # count the number of neighbors
+        if not self.mixed_types:
+            mask = rr2 < self.rcut**2
+            nnei = []
+            for ii in range(self.ntypes):
+                nnei.append(
+                    tf.reduce_sum(
+                        tf.cast(
+                            mask & (tf.equal(extend_atype, ii))[:, None, :], tf.int32
+                        ),
+                        axis=-1,
+                    )
+                )
+            # shape: nframes, nloc, ntypes
+            nnei = tf.stack(nnei, axis=-1)
+        else:
+            mask = rr2 < self.rcut**2
+            # virtual types (<0) are not counted
+            nnei = tf.reshape(
+                tf.reduce_sum(
+                    tf.cast(
+                        mask & tf.greater_equal(extend_atype, 0)[:, None, :], tf.int32
+                    ),
+                    axis=-1,
+                ),
+                [nframes, nloc, 1],
+            )
+        # nnei: nframes, nloc, ntypes
+        # virtual type i (<0) are not counted
+        nnei = tf.where(
+            tf.tile(
+                tf.less(atype, 0)[:, :, None],
+                [1, 1, self.ntypes if not self.mixed_types else 1],
+            ),
+            tf.zeros_like(nnei, dtype=tf.int32),
+            nnei,
+        )
+        max_nnei = tf.reduce_max(nnei, axis=1)
+        return min_rr2, max_nnei
+
+
+class NeighborStat(BaseNeighborStat):
+    """Class for getting training data information.
+
+    It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.
+
+    Parameters
+    ----------
+    ntypes
+            The num of atom types
+    rcut
+            The cut-off radius
+    mixed_type : bool, optional, default=False
+        Treat all types as a single type.
+    """
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        mixed_type: bool = False,
+    ) -> None:
+        """Constructor."""
+        super().__init__(ntypes, rcut, mixed_type)
+        self.auto_batch_size = AutoBatchSize()
+        self.neighbor_stat = NeighborStatOP(ntypes, rcut, mixed_type)
+        self.place_holders = {}
+        with tf.Graph().as_default() as sub_graph:
+            self.op = self.build()
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
+
+    def build(self) -> Tuple[tf.Tensor, tf.Tensor]:
+        """Build the graph.
+
+        Returns
+        -------
+        tf.Tensor
+            The minimal squared distance between two atoms, in the shape of (nframes,)
+        tf.Tensor
+            The maximal number of neighbors
+        """
+        for ii in ["coord", "box"]:
+            self.place_holders[ii] = tf.placeholder(
+                GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii
+            )
+        self.place_holders["type"] = tf.placeholder(
+            tf.int32, [None, None], name="t_type"
+        )
+        self.place_holders["pbc"] = tf.placeholder(tf.bool, [], name="t_pbc")
+        ret = self.neighbor_stat.build(
+            self.place_holders["coord"],
+            self.place_holders["type"],
+            self.place_holders["box"],
+            self.place_holders["pbc"],
+        )
+        return ret
+
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+        """Produce data.
+
+        Parameters
+        ----------
+        data
+            The data system
+
+        Yields
+        ------
+        np.ndarray
+            The maximal number of neighbors
+        float
+            The squared minimal distance between two atoms
+        str
+            The directory of the data system
+        """
+        for ii in range(len(data.system_dirs)):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]
+                data_set_data = data_set._load_set(jj)
+                minrr2, max_nnei = self.auto_batch_size.execute_all(
+                    self._execute,
+                    data_set_data["coord"].shape[0],
+                    data_set.get_natoms(),
+                    data_set_data["coord"],
+                    data_set_data["type"],
+                    data_set_data["box"],
+                    data_set.pbc,
+                )
+                yield np.max(max_nnei, axis=0), np.min(minrr2), jj
+
+    def _execute(
+        self,
+        coord: np.ndarray,
+        atype: np.ndarray,
+        box: Optional[np.ndarray],
+        pbc: bool,
+    ):
+        """Execute the operation.
+
+        Parameters
+        ----------
+        coord
+            The coordinates of atoms.
+        atype
+            The atom types.
+        box
+            The box.
+        pbc
+            Whether the box is periodic.
+        """
+        feed_dict = {
+            self.place_holders["coord"]: coord,
+            self.place_holders["type"]: atype,
+            self.place_holders["box"]: box,
+            self.place_holders["pbc"]: pbc,
+        }
+        minrr2, max_nnei = run_sess(self.sub_sess, self.op, feed_dict=feed_dict)
+        return minrr2, max_nnei
diff --git a/deepmd/utils/network.py b/deepmd/tf/utils/network.py
similarity index 99%
rename from deepmd/utils/network.py
rename to deepmd/tf/utils/network.py
index 36d8c42f82..fb8e89c737 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/tf/utils/network.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
diff --git a/deepmd/tf/utils/nlist.py b/deepmd/tf/utils/nlist.py
new file mode 100644
index 0000000000..87032c3e1d
--- /dev/null
+++ b/deepmd/tf/utils/nlist.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.tf.utils.region import (
+    to_face_distance,
+)
+
+
+def extend_coord_with_ghosts(
+    coord: tf.Tensor,
+    atype: tf.Tensor,
+    cell: tf.Tensor,
+    rcut: float,
+    pbc: tf.Tensor,
+):
+    """Extend the coordinates of the atoms by appending peridoc images.
+    The number of images is large enough to ensure all the neighbors
+    within rcut are appended.
+
+    Parameters
+    ----------
+    coord : tf.Tensor
+        original coordinates of shape [-1, nloc*3].
+    atype : tf.Tensor
+        atom type of shape [-1, nloc].
+    cell : tf.Tensor
+        simulation cell tensor of shape [-1, 9].
+    rcut : float
+        the cutoff radius
+    pbc : tf.Tensor
+        whether the simulation cell is periodic or not
+
+    Returns
+    -------
+    extended_coord: tf.Tensor
+        extended coordinates of shape [-1, nall*3].
+    extended_atype: tf.Tensor
+        extended atom type of shape [-1, nall].
+    index_mapping: tf.Tensor
+        maping extended index to the local index
+
+    """
+    # generated by GitHub Copilot, converted from PT codes
+    nf = tf.shape(atype)[0]
+    nloc = tf.shape(atype)[1]
+    aidx = tf.tile(tf.expand_dims(tf.range(nloc), 0), [nf, 1])
+
+    def extend_coord_with_ghosts_nopbc(coord, atype, cell):
+        return coord, atype, aidx, nloc
+
+    def extend_coord_with_ghosts_pbc(coord, atype, cell):
+        coord = tf.reshape(coord, [nf, nloc, 3])
+        cell = tf.reshape(cell, [nf, 3, 3])
+        # nf x 3
+        to_face = to_face_distance(cell)
+        # nf x 3
+        # *2: ghost copies on + and - directions
+        # +1: central cell
+        nbuff = tf.cast(tf.math.ceil(rcut / to_face), tf.int32)
+        # 3
+        nbuff = tf.reduce_max(nbuff, axis=0)
+        xi = tf.range(-nbuff[0], nbuff[0] + 1, 1)
+        yi = tf.range(-nbuff[1], nbuff[1] + 1, 1)
+        zi = tf.range(-nbuff[2], nbuff[2] + 1, 1)
+        xyz = tf.reshape(xi, [-1, 1, 1, 1]) * tf.constant([1, 0, 0], dtype=tf.int32)
+        xyz = xyz + tf.reshape(yi, [1, -1, 1, 1]) * tf.constant(
+            [0, 1, 0], dtype=tf.int32
+        )
+        xyz = xyz + tf.reshape(zi, [1, 1, -1, 1]) * tf.constant(
+            [0, 0, 1], dtype=tf.int32
+        )
+        xyz = tf.reshape(xyz, [-1, 3])
+        # ns x 3
+        shift_idx = tf.gather(
+            xyz, tf.argsort(tf.norm(tf.cast(xyz, GLOBAL_TF_FLOAT_PRECISION), axis=1))
+        )
+        ns = tf.shape(shift_idx)[0]
+        nall = ns * nloc
+        # nf x ns x 3
+        shift_vec = tf.einsum(
+            "sd,fdk->fsk", tf.cast(shift_idx, GLOBAL_TF_FLOAT_PRECISION), cell
+        )
+        # nf x ns x nloc x 3
+        extend_coord = coord[:, None, :, :] + shift_vec[:, :, None, :]
+        # nf x ns x nloc
+        extend_atype = tf.tile(tf.expand_dims(atype, -2), [1, ns, 1])
+        # nf x ns x nloc
+        extend_aidx = tf.tile(tf.expand_dims(aidx, -2), [1, ns, 1])
+        return extend_coord, extend_atype, extend_aidx, nall
+
+    extend_coord, extend_atype, extend_aidx, nall = tf.cond(
+        pbc,
+        lambda: extend_coord_with_ghosts_pbc(coord, atype, cell),
+        lambda: extend_coord_with_ghosts_nopbc(coord, atype, cell),
+    )
+
+    return (
+        tf.reshape(extend_coord, [nf, nall * 3]),
+        tf.reshape(extend_atype, [nf, nall]),
+        tf.reshape(extend_aidx, [nf, nall]),
+    )
diff --git a/deepmd/tf/utils/pair_tab.py b/deepmd/tf/utils/pair_tab.py
new file mode 100644
index 0000000000..a9747c4367
--- /dev/null
+++ b/deepmd/tf/utils/pair_tab.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+__all__ = [
+    "PairTab",
+]
diff --git a/deepmd/utils/parallel_op.py b/deepmd/tf/utils/parallel_op.py
similarity index 94%
rename from deepmd/utils/parallel_op.py
rename to deepmd/tf/utils/parallel_op.py
index 9ef68bbd84..5eeb1fab7f 100644
--- a/deepmd/utils/parallel_op.py
+++ b/deepmd/tf/utils/parallel_op.py
@@ -8,10 +8,10 @@
     Tuple,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
@@ -30,17 +30,15 @@ class ParallelOp:
 
     Examples
     --------
-    >>> from deepmd.env import tf
-    >>> from deepmd.utils.parallel_op import ParallelOp
+    >>> from deepmd.tf.env import tf
+    >>> from deepmd.tf.utils.parallel_op import ParallelOp
     >>> def builder():
     ...     x = tf.placeholder(tf.int32, [1])
     ...     return {"x": x}, (x + 1)
-    ...
     >>> p = ParallelOp(builder, nthreads=4)
     >>> def feed():
     ...     for ii in range(10):
     ...         yield {"x": [ii]}
-    ...
     >>> print(*p.generate(tf.Session(), feed()))
     [1] [2] [3] [4] [5] [6] [7] [8] [9] [10]
     """
diff --git a/deepmd/tf/utils/path.py b/deepmd/tf/utils/path.py
new file mode 100644
index 0000000000..67990543ae
--- /dev/null
+++ b/deepmd/tf/utils/path.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.path import (
+    DPH5Path,
+    DPOSPath,
+    DPPath,
+)
+
+__all__ = [
+    "DPPath",
+    "DPOSPath",
+    "DPH5Path",
+]
diff --git a/deepmd/tf/utils/plugin.py b/deepmd/tf/utils/plugin.py
new file mode 100644
index 0000000000..f2f0336691
--- /dev/null
+++ b/deepmd/tf/utils/plugin.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.plugin import (
+    Plugin,
+    PluginVariant,
+    VariantABCMeta,
+    VariantMeta,
+)
+
+__all__ = [
+    "Plugin",
+    "VariantMeta",
+    "VariantABCMeta",
+    "PluginVariant",
+]
diff --git a/deepmd/tf/utils/random.py b/deepmd/tf/utils/random.py
new file mode 100644
index 0000000000..55b8eba91e
--- /dev/null
+++ b/deepmd/tf/utils/random.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.random import (
+    choice,
+    random,
+    seed,
+    shuffle,
+)
+
+__all__ = [
+    "choice",
+    "random",
+    "seed",
+    "shuffle",
+]
diff --git a/deepmd/tf/utils/region.py b/deepmd/tf/utils/region.py
new file mode 100644
index 0000000000..82183a0413
--- /dev/null
+++ b/deepmd/tf/utils/region.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from deepmd.tf.env import (
+    tf,
+)
+
+
+def to_face_distance(cell):
+    """Compute the to-face-distance of the simulation cell.
+
+    Parameters
+    ----------
+    cell : tf.Tensor
+        simulation cell tensor of shape [*, 3, 3].
+
+    Returns
+    -------
+    dist: tf.Tensor
+        the to face distances of shape [*, 3]
+    """
+    # generated by GitHub Copilot, converted from PT codes
+    cshape = tf.shape(cell)
+    cell_reshaped = tf.reshape(cell, [-1, 3, 3])
+    dist = b_to_face_distance(cell_reshaped)
+    return tf.reshape(dist, tf.concat([cshape[:-2], [3]], 0))
+
+
+def b_to_face_distance(cell):
+    # generated by GitHub Copilot, converted from PT codes
+    volume = tf.linalg.det(cell)
+    c_yz = tf.linalg.cross(cell[:, 1], cell[:, 2])
+    _h2yz = tf.divide(volume, tf.norm(c_yz, axis=-1))
+    c_zx = tf.linalg.cross(cell[:, 2], cell[:, 0])
+    _h2zx = tf.divide(volume, tf.norm(c_zx, axis=-1))
+    c_xy = tf.linalg.cross(cell[:, 0], cell[:, 1])
+    _h2xy = tf.divide(volume, tf.norm(c_xy, axis=-1))
+    return tf.stack([_h2yz, _h2zx, _h2xy], axis=1)
diff --git a/deepmd/tf/utils/serialization.py b/deepmd/tf/utils/serialization.py
new file mode 100644
index 0000000000..7cf596f5bd
--- /dev/null
+++ b/deepmd/tf/utils/serialization.py
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import tempfile
+
+from deepmd.tf.entrypoints import (
+    freeze,
+)
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.tf.model.model import (
+    Model,
+)
+from deepmd.tf.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.tf.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.tf.utils.sess import (
+    run_sess,
+)
+
+
+def serialize_from_file(model_file: str) -> dict:
+    """Serialize the model file to a dictionary.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be serialized.
+
+    Returns
+    -------
+    dict
+        The serialized model data.
+    """
+    graph, graph_def = load_graph_def(model_file)
+    t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+    jdata = json.loads(t_jdata)
+    model = Model(**jdata["model"])
+    # important! must be called before serialize
+    model.init_variables(graph=graph, graph_def=graph_def)
+    model_dict = model.serialize()
+    data = {
+        "backend": "TensorFlow",
+        "tf_version": tf.__version__,
+        "model": model_dict,
+        "model_def_script": jdata["model"],
+    }
+    # neighbor stat information
+    try:
+        t_min_nbor_dist = get_tensor_by_name_from_graph(
+            graph, "train_attr/min_nbor_dist"
+        )
+    except GraphWithoutTensorError as e:
+        pass
+    else:
+        data.setdefault("@variables", {})
+        data["@variables"]["min_nbor_dist"] = t_min_nbor_dist
+    return data
+
+
+def deserialize_to_file(model_file: str, data: dict) -> None:
+    """Deserialize the dictionary to a model file.
+
+    Parameters
+    ----------
+    model_file : str
+        The model file to be saved.
+    data : dict
+        The dictionary to be deserialized.
+    """
+    model = Model.deserialize(data["model"])
+    with tf.Graph().as_default() as graph, tf.Session(graph=graph) as sess:
+        place_holders = {}
+        for ii in ["coord", "box"]:
+            place_holders[ii] = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None], name="t_" + ii
+            )
+        place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type")
+        place_holders["natoms_vec"] = tf.placeholder(
+            tf.int32, [model.get_ntypes() + 2], name="t_natoms"
+        )
+        place_holders["default_mesh"] = tf.placeholder(tf.int32, [None], name="t_mesh")
+        inputs = {}
+        # fparam, aparam
+        if model.get_numb_fparam() > 0:
+            inputs["fparam"] = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION,
+                [None, model.get_numb_fparam()],
+                name="t_fparam",
+            )
+        if model.get_numb_aparam() > 0:
+            inputs["aparam"] = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION,
+                [None, model.get_numb_aparam()],
+                name="t_aparam",
+            )
+        model.build(
+            place_holders["coord"],
+            place_holders["type"],
+            place_holders["natoms_vec"],
+            place_holders["box"],
+            place_holders["default_mesh"],
+            inputs,
+            reuse=False,
+        )
+        init = tf.global_variables_initializer()
+        tf.constant(
+            json.dumps({"model": data["model_def_script"]}, separators=(",", ":")),
+            name="train_attr/training_script",
+            dtype=tf.string,
+        )
+        if "min_nbor_dist" in data.get("@variables", {}):
+            tf.constant(
+                data["@variables"]["min_nbor_dist"],
+                name="train_attr/min_nbor_dist",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+        run_sess(sess, init)
+        saver = tf.train.Saver()
+        with tempfile.TemporaryDirectory() as nt:
+            saver.save(
+                sess,
+                os.path.join(nt, "model.ckpt"),
+                global_step=0,
+            )
+            freeze(checkpoint_folder=nt, output=model_file, node_names=None)
diff --git a/deepmd/utils/sess.py b/deepmd/tf/utils/sess.py
similarity index 95%
rename from deepmd/utils/sess.py
rename to deepmd/tf/utils/sess.py
index a87adffd91..ca98980f89 100644
--- a/deepmd/utils/sess.py
+++ b/deepmd/tf/utils/sess.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     OutOfMemoryError,
 )
 
diff --git a/deepmd/tf/utils/spin.py b/deepmd/tf/utils/spin.py
new file mode 100644
index 0000000000..c20d4dcc7b
--- /dev/null
+++ b/deepmd/tf/utils/spin.py
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+)
+
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+
+
+class Spin:
+    """Class for spin.
+
+    Parameters
+    ----------
+    use_spin
+                Whether to use atomic spin model for each atom type
+    spin_norm
+                The magnitude of atomic spin for each atom type with spin
+    virtual_len
+                The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin
+    """
+
+    def __init__(
+        self,
+        use_spin: Optional[List[bool]] = None,
+        spin_norm: Optional[List[float]] = None,
+        virtual_len: Optional[List[float]] = None,
+    ) -> None:
+        """Constructor."""
+        self.use_spin = use_spin
+        self.spin_norm = spin_norm
+        self.virtual_len = virtual_len
+        self.ntypes_spin = self.use_spin.count(True)
+
+    def build(
+        self,
+        reuse=None,
+        suffix="",
+    ):
+        """Build the computational graph for the spin.
+
+        Parameters
+        ----------
+        reuse
+            The weights in the networks should be reused when get the variable.
+        suffix
+            Name suffix to identify this descriptor
+
+        Returns
+        -------
+        embedded_types
+            The computational graph for embedded types
+        """
+        name = "spin_attr" + suffix
+        with tf.variable_scope(name, reuse=reuse):
+            t_ntypes_spin = tf.constant(
+                self.ntypes_spin, name="ntypes_spin", dtype=tf.int32
+            )
+            t_virtual_len = tf.constant(
+                self.virtual_len,
+                name="virtual_len",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_spin_norm = tf.constant(
+                self.spin_norm,
+                name="spin_norm",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+
+    def get_ntypes_spin(self) -> int:
+        """Returns the number of atom types which contain spin."""
+        return self.ntypes_spin
+
+    def get_use_spin(self) -> List[bool]:
+        """Returns the list of whether to use spin for each atom type."""
+        return self.use_spin
+
+    def get_spin_norm(self) -> List[float]:
+        """Returns the list of magnitude of atomic spin for each atom type."""
+        return self.spin_norm
+
+    def get_virtual_len(self) -> List[float]:
+        """Returns the list of distance between real atom and virtual atom for each atom type."""
+        return self.virtual_len
diff --git a/deepmd/utils/tabulate.py b/deepmd/tf/utils/tabulate.py
similarity index 91%
rename from deepmd/utils/tabulate.py
rename to deepmd/tf/utils/tabulate.py
index 2b270b1dbc..958e08dd86 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/tf/utils/tabulate.py
@@ -16,17 +16,17 @@
 )
 
 import deepmd
-from deepmd.common import (
+from deepmd.tf.common import (
     ACTIVATION_FN_DICT,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     Descriptor,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_embedding_net_nodes_from_graph_def,
     get_tensor_by_name_from_graph,
 )
@@ -107,15 +107,15 @@ def __init__(
         self.sub_graph, self.sub_graph_def = self._load_sub_graph()
         self.sub_sess = tf.Session(graph=self.sub_graph)
 
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             self.sel_a = self.descrpt.sel_r
             self.rcut = self.descrpt.rcut
             self.rcut_smth = self.descrpt.rcut_smth
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             self.sel_a = self.descrpt.sel_a
             self.rcut = self.descrpt.rcut_r
             self.rcut_smth = self.descrpt.rcut_r_smth
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             self.sel_a = self.descrpt.sel_a
             self.rcut = self.descrpt.rcut_r
             self.rcut_smth = self.descrpt.rcut_r_smth
@@ -133,6 +133,10 @@ def __init__(
         self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def(
             self.graph_def, suffix=self.suffix
         )
+        for key in self.embedding_net_nodes.keys():
+            assert (
+                key.find("bias") > 0 or key.find("matrix") > 0
+            ), "currently, only support weight matrix and bias matrix at the tabulation op!"
 
         # move it to the descriptor class
         # for tt in self.exclude_types:
@@ -179,8 +183,8 @@ def build(
         """
         # tabulate range [lower, upper] with stride0 'stride0'
         lower, upper = self._get_env_mat_range(min_nbor_dist)
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance(
-            self.descrpt, deepmd.descriptor.DescrptSeAEbdV2
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance(
+            self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2
         ):
             uu = np.max(upper)
             ll = np.min(lower)
@@ -196,7 +200,7 @@ def build(
             self._build_lower(
                 "filter_net", xx, 0, uu, ll, stride0, stride1, extrapolate, nspline
             )
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             for ii in range(self.table_size):
                 if (self.type_one_side and not self._all_excluded(ii)) or (
                     not self.type_one_side
@@ -233,7 +237,7 @@ def build(
                     self._build_lower(
                         net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline
                     )
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             xx_all = []
             for ii in range(self.ntypes):
                 xx = np.arange(
@@ -275,7 +279,7 @@ def build(
                         nspline[ii],
                     )
                     idx += 1
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             for ii in range(self.table_size):
                 if (self.type_one_side and not self._all_excluded(ii)) or (
                     not self.type_one_side
@@ -327,10 +331,10 @@ def _build_lower(
         )
 
         # tt.shape: [nspline, self.last_layer_size]
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             tt = np.full((nspline, self.last_layer_size), stride1)
             tt[: int((upper - lower) / stride0), :] = stride0
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             tt = np.full((nspline, self.last_layer_size), stride1)
             tt[
                 int((lower - extrapolate * lower) / stride1) + 1 : (
@@ -339,7 +343,7 @@ def _build_lower(
                 ),
                 :,
             ] = stride0
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             tt = np.full((nspline, self.last_layer_size), stride1)
             tt[: int((upper - lower) / stride0), :] = stride0
         else:
@@ -423,14 +427,14 @@ def _get_bias(self):
         bias = {}
         for layer in range(1, self.layer_size + 1):
             bias["layer_" + str(layer)] = []
-            if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance(
-                self.descrpt, deepmd.descriptor.DescrptSeAEbdV2
-            ):
+            if isinstance(
+                self.descrpt, deepmd.tf.descriptor.DescrptSeAtten
+            ) or isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2):
                 node = self.embedding_net_nodes[
                     f"filter_type_all{self.suffix}/bias_{layer}"
                 ]
                 bias["layer_" + str(layer)].append(tf.make_ndarray(node))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
@@ -452,14 +456,14 @@ def _get_bias(self):
                             bias["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             bias["layer_" + str(layer)].append(np.array([]))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
                 for ii in range(self.ntypes):
                     for jj in range(ii, self.ntypes):
                         node = self.embedding_net_nodes[
                             f"filter_type_all{self.suffix}/bias_{layer}_{ii}_{jj}"
                         ]
                         bias["layer_" + str(layer)].append(tf.make_ndarray(node))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
@@ -489,14 +493,14 @@ def _get_matrix(self):
         matrix = {}
         for layer in range(1, self.layer_size + 1):
             matrix["layer_" + str(layer)] = []
-            if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance(
-                self.descrpt, deepmd.descriptor.DescrptSeAEbdV2
-            ):
+            if isinstance(
+                self.descrpt, deepmd.tf.descriptor.DescrptSeAtten
+            ) or isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2):
                 node = self.embedding_net_nodes[
                     f"filter_type_all{self.suffix}/matrix_{layer}"
                 ]
                 matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
@@ -518,14 +522,14 @@ def _get_matrix(self):
                             matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             matrix["layer_" + str(layer)].append(np.array([]))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
                 for ii in range(self.ntypes):
                     for jj in range(ii, self.ntypes):
                         node = self.embedding_net_nodes[
                             f"filter_type_all{self.suffix}/matrix_{layer}_{ii}_{jj}"
                         ]
                         matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
-            elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+            elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
@@ -712,14 +716,14 @@ def _layer_1(self, x, w, b):
     # Change the embedding net range to sw / min_nbor_dist
     def _get_env_mat_range(self, min_nbor_dist):
         sw = self._spline5_switch(min_nbor_dist, self.rcut_smth, self.rcut)
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             lower = -self.davg[:, 0] / self.dstd[:, 0]
             upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0]
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             var = np.square(sw / (min_nbor_dist * self.dstd[:, 1:4]))
             lower = np.min(-var, axis=1)
             upper = np.max(var, axis=1)
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             lower = -self.davg[:, 0] / self.dstd[:, 0]
             upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0]
         else:
@@ -741,11 +745,11 @@ def _spline5_switch(self, xx, rmin, rmax):
 
     def _get_layer_size(self):
         layer_size = 0
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance(
-            self.descrpt, deepmd.descriptor.DescrptSeAEbdV2
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance(
+            self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2
         ):
             layer_size = len(self.embedding_net_nodes) // 2
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             layer_size = len(self.embedding_net_nodes) // (
                 (self.ntypes * self.ntypes - len(self.exclude_types)) * 2
             )
@@ -753,11 +757,11 @@ def _get_layer_size(self):
                 layer_size = len(self.embedding_net_nodes) // (
                     (self.ntypes - self._n_all_excluded) * 2
                 )
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             layer_size = len(self.embedding_net_nodes) // int(
                 comb(self.ntypes + 1, 2) * 2
             )
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             layer_size = len(self.embedding_net_nodes) // (
                 (self.ntypes * self.ntypes - len(self.exclude_types)) * 2
             )
@@ -770,12 +774,12 @@ def _get_layer_size(self):
         return layer_size
 
     @property
-    @lru_cache()
+    @lru_cache
     def _n_all_excluded(self) -> int:
         """Then number of types excluding all types."""
         return sum(int(self._all_excluded(ii)) for ii in range(0, self.ntypes))
 
-    @lru_cache()
+    @lru_cache
     def _all_excluded(self, ii: int) -> bool:
         """Check if type ii excluds all types.
 
@@ -793,17 +797,17 @@ def _all_excluded(self, ii: int) -> bool:
 
     def _get_table_size(self):
         table_size = 0
-        if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance(
-            self.descrpt, deepmd.descriptor.DescrptSeAEbdV2
+        if isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeAtten) or isinstance(
+            self.descrpt, deepmd.tf.descriptor.DescrptSeAEbdV2
         ):
             table_size = 1
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeA):
             table_size = self.ntypes * self.ntypes
             if self.type_one_side:
                 table_size = self.ntypes
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeT):
             table_size = int(comb(self.ntypes + 1, 2))
-        elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
+        elif isinstance(self.descrpt, deepmd.tf.descriptor.DescrptSeR):
             table_size = self.ntypes * self.ntypes
             if self.type_one_side:
                 table_size = self.ntypes
diff --git a/deepmd/utils/type_embed.py b/deepmd/tf/utils/type_embed.py
similarity index 60%
rename from deepmd/utils/type_embed.py
rename to deepmd/tf/utils/type_embed.py
index c8ab01f7f5..0f566027c1 100644
--- a/deepmd/utils/type_embed.py
+++ b/deepmd/tf/utils/type_embed.py
@@ -1,26 +1,34 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import re
 from typing import (
     List,
     Optional,
     Union,
 )
 
-from deepmd.common import (
+from deepmd.dpmodel.utils.network import (
+    EmbeddingNet,
+)
+from deepmd.tf.common import (
     get_activation_func,
     get_precision,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
+    TYPE_EMBEDDING_PATTERN,
     tf,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_type_embedding_net_variables_from_graph_def,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
 def embed_atom_type(
@@ -68,6 +76,8 @@ class TypeEmbedNet:
 
     Parameters
     ----------
+    ntypes : int
+        Number of atom types
     neuron : list[int]
             Number of neurons in each hidden layers of the embedding net
     resnet_dt
@@ -89,7 +99,9 @@ class TypeEmbedNet:
 
     def __init__(
         self,
-        neuron: List[int] = [],
+        *,
+        ntypes: int,
+        neuron: List[int],
         resnet_dt: bool = False,
         activation_function: Union[str, None] = "tanh",
         precision: str = "default",
@@ -100,10 +112,12 @@ def __init__(
         **kwargs,
     ) -> None:
         """Constructor."""
+        self.ntypes = ntypes
         self.neuron = neuron
         self.seed = seed
         self.filter_resnet_dt = resnet_dt
         self.filter_precision = get_precision(precision)
+        self.filter_activation_fn_name = str(activation_function)
         self.filter_activation_fn = get_activation_func(activation_function)
         self.trainable = trainable
         self.uniform_seed = uniform_seed
@@ -133,6 +147,7 @@ def build(
         embedded_types
             The computational graph for embedded types
         """
+        assert ntypes == self.ntypes
         types = tf.convert_to_tensor(list(range(ntypes)), dtype=tf.int32)
         ebd_type = tf.cast(
             tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)),
@@ -189,3 +204,98 @@ def init_variables(
         self.type_embedding_net_variables = (
             get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
         )
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        """Deserialize the model.
+
+        Parameters
+        ----------
+        data : dict
+            The serialized data
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        Model
+            The deserialized model
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data_cls = data.pop("@class")
+        assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
+
+        embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        embedding_net_variables = {}
+        for layer_idx, layer in enumerate(embedding_net.layers):
+            embedding_net_variables[
+                f"type_embed_net{suffix}/matrix_{layer_idx + 1}"
+            ] = layer.w
+            embedding_net_variables[f"type_embed_net{suffix}/bias_{layer_idx + 1}"] = (
+                layer.b
+            )
+            if layer.idt is not None:
+                embedding_net_variables[
+                    f"type_embed_net{suffix}/idt_{layer_idx + 1}"
+                ] = layer.idt.reshape(1, -1)
+            else:
+                # prevent keyError
+                embedding_net_variables[
+                    f"type_embed_net{suffix}/idt_{layer_idx + 1}"
+                ] = 0.0
+
+        type_embedding_net = cls(**data)
+        type_embedding_net.type_embedding_net_variables = embedding_net_variables
+        return type_embedding_net
+
+    def serialize(self, suffix: str = "") -> dict:
+        """Serialize the model.
+
+        Parameters
+        ----------
+        suffix : str, optional
+            The suffix of the scope
+
+        Returns
+        -------
+        dict
+            The serialized data
+        """
+        if suffix != "":
+            type_embedding_pattern = (
+                TYPE_EMBEDDING_PATTERN.replace("/(idt)", suffix + "/(idt)")
+                .replace("/(bias)", suffix + "/(bias)")
+                .replace("/(matrix)", suffix + "/(matrix)")
+            )
+        else:
+            type_embedding_pattern = TYPE_EMBEDDING_PATTERN
+        assert self.type_embedding_net_variables is not None
+        embedding_net = EmbeddingNet(
+            in_dim=self.ntypes,
+            neuron=self.neuron,
+            activation_function=self.filter_activation_fn_name,
+            resnet_dt=self.filter_resnet_dt,
+            precision=self.filter_precision.name,
+        )
+        for key, value in self.type_embedding_net_variables.items():
+            m = re.search(type_embedding_pattern, key)
+            m = [mm for mm in m.groups() if mm is not None]
+            layer_idx = int(m[1]) - 1
+            weight_name = m[0]
+            if weight_name == "idt":
+                value = value.ravel()
+            embedding_net[layer_idx][weight_name] = value
+
+        return {
+            "@class": "TypeEmbedNet",
+            "@version": 1,
+            "ntypes": self.ntypes,
+            "neuron": self.neuron,
+            "resnet_dt": self.filter_resnet_dt,
+            "precision": self.filter_precision.name,
+            "activation_function": self.filter_activation_fn_name,
+            "trainable": self.trainable,
+            "padding": self.padding,
+            "embedding": embedding_net.serialize(),
+        }
diff --git a/deepmd/tf/utils/update_sel.py b/deepmd/tf/utils/update_sel.py
new file mode 100644
index 0000000000..db0420dde8
--- /dev/null
+++ b/deepmd/tf/utils/update_sel.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Type,
+)
+
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+)
+from deepmd.tf.env import (
+    tf,
+)
+from deepmd.tf.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.update_sel import (
+    BaseUpdateSel,
+)
+
+
+class UpdateSel(BaseUpdateSel):
+    @property
+    def neighbor_stat(self) -> Type[NeighborStat]:
+        return NeighborStat
+
+    def hook(self, min_nbor_dist, max_nbor_size):
+        # moved from traier.py as duplicated
+        tf.constant(
+            min_nbor_dist,
+            name="train_attr/min_nbor_dist",
+            dtype=GLOBAL_ENER_FLOAT_PRECISION,
+        )
+        tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32)
diff --git a/deepmd/tf/utils/weight_avg.py b/deepmd/tf/utils/weight_avg.py
new file mode 100644
index 0000000000..fb3ae27934
--- /dev/null
+++ b/deepmd/tf/utils/weight_avg.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Alias for backward compatibility."""
+
+from deepmd.utils.weight_avg import (
+    weighted_average,
+)
+
+__all__ = [
+    "weighted_average",
+]
diff --git a/deepmd/utils/__init__.py b/deepmd/utils/__init__.py
index 7d1e7e67d0..bac6924ac1 100644
--- a/deepmd/utils/__init__.py
+++ b/deepmd/utils/__init__.py
@@ -1,27 +1,3 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-#
-from .data import (
-    DeepmdData,
-)
-from .data_system import (
-    DeepmdDataSystem,
-)
-from .learning_rate import (
-    LearningRateExp,
-)
-from .pair_tab import (
-    PairTab,
-)
-from .plugin import (
-    Plugin,
-    PluginVariant,
-)
-
-__all__ = [
-    "DeepmdData",
-    "DeepmdDataSystem",
-    "LearningRateExp",
-    "PairTab",
-    "Plugin",
-    "PluginVariant",
-]
+# For performance, do not add things to this file
+# import submodules instead
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 05e7c767b8..2a98bee6fe 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1,19 +1,2543 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.argcheck import (
-    gen_args,
-    gen_doc,
-    gen_json,
-    list_to_doc,
-    normalize,
-    type_embedding_args,
+import json
+import logging
+from typing import (
+    Callable,
+    List,
+    Optional,
 )
 
-__all__ = [
-    "list_to_doc",
-    "normalize",
-    "gen_doc",
-    "gen_json",
-    "gen_args",
-    "type_embedding_args",
-]
+from dargs import (
+    Argument,
+    ArgumentEncoder,
+    Variant,
+    dargs,
+)
+
+from deepmd.common import (
+    VALID_ACTIVATION,
+    VALID_PRECISION,
+)
+from deepmd.utils.argcheck_nvnmd import (
+    nvnmd_args,
+)
+from deepmd.utils.plugin import (
+    Plugin,
+)
+
+log = logging.getLogger(__name__)
+
+
+ACTIVATION_FN_DICT = dict.fromkeys(VALID_ACTIVATION)
+PRECISION_DICT = dict.fromkeys(VALID_PRECISION)
+
+doc_only_tf_supported = "(Supported Backend: TensorFlow) "
+doc_only_pt_supported = "(Supported Backend: PyTorch) "
+
+
+def list_to_doc(xx):
+    items = []
+    for ii in xx:
+        if len(items) == 0:
+            items.append(f'"{ii}"')
+        else:
+            items.append(f', "{ii}"')
+    items.append(".")
+    return "".join(items)
+
+
+def make_link(content, ref_key):
+    return (
+        f"`{content} <{ref_key}_>`_"
+        if not dargs.RAW_ANCHOR
+        else f"`{content} <#{ref_key}>`_"
+    )
+
+
+def type_embedding_args():
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_seed = "Random seed for parameter initialization"
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+
+    return [
+        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, default=None, doc=doc_seed),
+    ]
+
+
+def spin_args():
+    doc_use_spin = (
+        "Whether to use atomic spin model for each atom type. "
+        "List of boolean values with the shape of [ntypes] to specify which types use spin, "
+        f"or a list of integer values {doc_only_pt_supported} "
+        "to indicate the index of the type that uses spin."
+    )
+    doc_spin_norm = "The magnitude of atomic spin for each atom type with spin"
+    doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
+    doc_virtual_scale = (
+        "The scaling factor to determine the virtual distance between a virtual atom "
+        "representing spin and its corresponding real atom for each atom type with spin. "
+        "This factor is defined as the virtual distance divided by the magnitude of atomic spin "
+        "for each atom type with spin. The virtual coordinate is defined as the real coordinate "
+        "plus spin * virtual_scale. List of float values with shape of [ntypes] or [ntypes_spin] "
+        "or one single float value for all types, only used when use_spin is True for each atom type."
+    )
+
+    return [
+        Argument("use_spin", [List[bool], List[int]], doc=doc_use_spin),
+        Argument(
+            "spin_norm",
+            List[float],
+            optional=True,
+            doc=doc_only_tf_supported + doc_spin_norm,
+        ),
+        Argument(
+            "virtual_len",
+            List[float],
+            optional=True,
+            doc=doc_only_tf_supported + doc_virtual_len,
+        ),
+        Argument(
+            "virtual_scale",
+            [List[float], float],
+            optional=True,
+            doc=doc_only_pt_supported + doc_virtual_scale,
+        ),
+    ]
+
+
+#  --- Descriptor configurations: --- #
+
+
+class ArgsPlugin:
+    def __init__(self) -> None:
+        self.__plugin = Plugin()
+
+    def register(
+        self, name: str, alias: Optional[List[str]] = None, doc: str = ""
+    ) -> Callable[[], List[Argument]]:
+        """Register a descriptor argument plugin.
+
+        Parameters
+        ----------
+        name : str
+            the name of a descriptor
+        alias : List[str], optional
+            the list of aliases of this descriptor
+
+        Returns
+        -------
+        Callable[[], List[Argument]]
+            the registered descriptor argument method
+
+        Examples
+        --------
+        >>> some_plugin = ArgsPlugin()
+        >>> @some_plugin.register("some_descrpt")
+            def descrpt_some_descrpt_args():
+                return []
+        """
+        # convert alias to hashed item
+        if isinstance(alias, list):
+            alias = tuple(alias)
+        return self.__plugin.register((name, alias, doc))
+
+    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
+        """Get all arguments.
+
+        Parameters
+        ----------
+        exclude_hybrid : bool
+            exclude hybrid descriptor to prevent circular calls
+
+        Returns
+        -------
+        List[Argument]
+            all arguments
+        """
+        arguments = []
+        for (name, alias, doc), metd in self.__plugin.plugins.items():
+            if exclude_hybrid and name == "hybrid":
+                continue
+            arguments.append(
+                Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias, doc=doc)
+            )
+        return arguments
+
+
+descrpt_args_plugin = ArgsPlugin()
+
+
+@descrpt_args_plugin.register("loc_frame", doc=doc_only_tf_supported)
+def descrpt_local_frame_args():
+    doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
+    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
+    doc_rcut = "The cut-off radius. The default value is 6.0"
+    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
+- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
+- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
+- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
+- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
+- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
+
+    return [
+        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
+        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
+def descrpt_se_a_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument(
+            "env_protection",
+            float,
+            optional=True,
+            default=0.0,
+            doc=doc_only_tf_supported + doc_env_protection,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register(
+    "se_e3", alias=["se_at", "se_a_3be", "se_t"], doc=doc_only_tf_supported
+)
+def descrpt_se_t_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"], doc=doc_only_tf_supported)
+def descrpt_se_a_tpe_args():
+    doc_type_nchanl = "number of channels for type embedding"
+    doc_type_nlayer = "number of hidden layers of type embedding net"
+    doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded."
+
+    return [
+        *descrpt_se_a_args(),
+        Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl),
+        Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+    ]
+
+
+@descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
+def descrpt_se_r_args():
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("hybrid")
+def descrpt_hybrid_args():
+    doc_list = "A list of descriptor definitions"
+
+    return [
+        Argument(
+            "list",
+            list,
+            optional=False,
+            doc=doc_list,
+            repeat=True,
+            sub_fields=[],
+            sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)],
+            fold_subdoc=True,
+        )
+    ]
+
+
+def descrpt_se_atten_common_args():
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = (
+        doc_only_tf_supported + 'Whether to use a "Timestep" in the skip connection'
+    )
+    doc_type_one_side = (
+        doc_only_tf_supported
+        + r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    )
+    doc_precision = (
+        doc_only_tf_supported
+        + f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    )
+    doc_trainable = (
+        doc_only_tf_supported + "If the parameters in the embedding net is trainable"
+    )
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = (
+        doc_only_tf_supported
+        + "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    )
+    doc_attn = "The length of hidden vectors in attention layers"
+    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True"
+    doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
+    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
+
+    return [
+        Argument(
+            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
+        ),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument("attn", int, optional=True, default=128, doc=doc_attn),
+        Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
+        Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
+        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
+    ]
+
+
+@descrpt_args_plugin.register("se_atten", alias=["dpa1"])
+def descrpt_se_atten_args():
+    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
+    doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
+    doc_scaling_factor = (
+        "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
+        "If `temperature` is None, the scaling of attention weights is (N_hidden_dim * scaling_factor)**0.5. "
+        "Else, the scaling of attention weights is setting to `temperature`."
+    )
+    doc_normalize = (
+        "Whether to normalize the hidden vectors during attention calculation."
+    )
+    doc_concat_output_tebd = (
+        "Whether to concat type embedding at the output of the descriptor."
+    )
+    doc_deprecated = "This feature will be removed in a future release."
+
+    return [
+        *descrpt_se_atten_common_args(),
+        Argument(
+            "stripped_type_embedding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_tf_supported + doc_stripped_type_embedding,
+        ),
+        Argument(
+            "smooth_type_embdding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_tf_supported + doc_smooth_type_embdding,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
+        ),
+        # pt only
+        Argument(
+            "tebd_dim",
+            int,
+            optional=True,
+            default=8,
+            doc=doc_only_pt_supported + doc_tebd_dim,
+        ),
+        Argument(
+            "tebd_input_mode",
+            str,
+            optional=True,
+            default="concat",
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "post_ln",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "ffn",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "ffn_embed_dim",
+            int,
+            optional=True,
+            default=1024,
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "scaling_factor",
+            float,
+            optional=True,
+            default=1.0,
+            doc=doc_only_pt_supported + doc_scaling_factor,
+        ),
+        Argument(
+            "head_num",
+            int,
+            optional=True,
+            default=1,
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "normalize",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_only_pt_supported + doc_normalize,
+        ),
+        Argument(
+            "temperature",
+            float,
+            optional=True,
+            doc=doc_only_pt_supported + doc_temperature,
+        ),
+        Argument(
+            "return_rot",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_pt_supported + doc_deprecated,
+        ),
+        Argument(
+            "concat_output_tebd",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_only_pt_supported + doc_concat_output_tebd,
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_atten_v2", doc=doc_only_tf_supported)
+def descrpt_se_atten_v2_args():
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+
+    return [
+        *descrpt_se_atten_common_args(),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("dpa2", doc=doc_only_pt_supported)
+def descrpt_dpa2_args():
+    # Generate by GitHub Copilot
+    doc_repinit_rcut = "The cut-off radius of the repinit block"
+    doc_repinit_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repinit block."
+    doc_repinit_nsel = "Maximally possible number of neighbors for repinit block."
+    doc_repformer_rcut = "The cut-off radius of the repformer block"
+    doc_repformer_rcut_smth = "From this position the inverse distance smoothly decays to 0 at the cut-off. Use in the repformer block."
+    doc_repformer_nsel = "Maximally possible number of neighbors for repformer block."
+    doc_tebd_dim = "The dimension of atom type embedding"
+    doc_concat_output_tebd = (
+        "Whether to concat type embedding at the output of the descriptor."
+    )
+    doc_repinit_neuron = "repinit block: the number of neurons in the embedding net."
+    doc_repinit_axis_neuron = (
+        "repinit block: the number of dimension of split in the symmetrization op."
+    )
+    doc_repinit_activation = (
+        "repinit block: the activation function in the embedding net"
+    )
+    doc_repformer_nlayers = "repformers block: the number of repformer layers"
+    doc_repformer_g1_dim = "repformers block: the dimension of single-atom rep"
+    doc_repformer_g2_dim = "repformers block: the dimension of invariant pair-atom rep"
+    doc_repformer_axis_dim = (
+        "repformers block: the number of dimension of split in the symmetrization ops."
+    )
+    doc_repformer_do_bn_mode = "repformers block: do batch norm in the repformer layers"
+    doc_repformer_bn_momentum = "repformers block: moment in the batch normalization"
+    doc_repformer_update_g1_has_conv = (
+        "repformers block: update the g1 rep with convolution term"
+    )
+    doc_repformer_update_g1_has_drrd = (
+        "repformers block: update the g1 rep with the drrd term"
+    )
+    doc_repformer_update_g1_has_grrg = (
+        "repformers block: update the g1 rep with the grrg term"
+    )
+    doc_repformer_update_g1_has_attn = (
+        "repformers block: update the g1 rep with the localized self-attention"
+    )
+    doc_repformer_update_g2_has_g1g1 = (
+        "repformers block: update the g2 rep with the g1xg1 term"
+    )
+    doc_repformer_update_g2_has_attn = (
+        "repformers block: update the g2 rep with the gated self-attention"
+    )
+    doc_repformer_update_h2 = "repformers block: update the h2 rep"
+    doc_repformer_attn1_hidden = (
+        "repformers block: the hidden dimension of localized self-attention"
+    )
+    doc_repformer_attn1_nhead = (
+        "repformers block: the number of heads in localized self-attention"
+    )
+    doc_repformer_attn2_hidden = (
+        "repformers block: the hidden dimension of gated self-attention"
+    )
+    doc_repformer_attn2_nhead = (
+        "repformers block: the number of heads in gated self-attention"
+    )
+    doc_repformer_attn2_has_gate = (
+        "repformers block: has gate in the gated self-attention"
+    )
+    doc_repformer_activation = "repformers block: the activation function in the MLPs."
+    doc_repformer_update_style = "repformers block: style of update a rep. can be res_avg or res_incr. res_avg updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) res_incr updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n)"
+    doc_repformer_set_davg_zero = "repformers block: set the avg to zero in statistics"
+    doc_repformer_add_type_ebd_to_seq = (
+        "repformers block: concatenate the type embedding at the output"
+    )
+    return [
+        Argument("repinit_rcut", float, doc=doc_repinit_rcut),
+        Argument("repinit_rcut_smth", float, doc=doc_repinit_rcut_smth),
+        Argument("repinit_nsel", int, doc=doc_repinit_nsel),
+        Argument("repformer_rcut", float, doc=doc_repformer_rcut),
+        Argument("repformer_rcut_smth", float, doc=doc_repformer_rcut_smth),
+        Argument("repformer_nsel", int, doc=doc_repformer_nsel),
+        Argument("tebd_dim", int, optional=True, default=8, doc=doc_tebd_dim),
+        Argument(
+            "concat_output_tebd",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_concat_output_tebd,
+        ),
+        Argument(
+            "repinit_neuron",
+            list,
+            optional=True,
+            default=[25, 50, 100],
+            doc=doc_repinit_neuron,
+        ),
+        Argument(
+            "repinit_axis_neuron",
+            int,
+            optional=True,
+            default=16,
+            doc=doc_repinit_axis_neuron,
+        ),
+        Argument("repinit_set_davg_zero", bool, optional=True, default=True),
+        Argument(
+            "repinit_activation",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_repinit_activation,
+        ),
+        Argument(
+            "repformer_nlayers",
+            int,
+            optional=True,
+            default=3,
+            doc=doc_repformer_nlayers,
+        ),
+        Argument(
+            "repformer_g1_dim",
+            int,
+            optional=True,
+            default=128,
+            doc=doc_repformer_g1_dim,
+        ),
+        Argument(
+            "repformer_g2_dim", int, optional=True, default=16, doc=doc_repformer_g2_dim
+        ),
+        Argument(
+            "repformer_axis_dim",
+            int,
+            optional=True,
+            default=4,
+            doc=doc_repformer_axis_dim,
+        ),
+        Argument(
+            "repformer_do_bn_mode",
+            str,
+            optional=True,
+            default="no",
+            doc=doc_repformer_do_bn_mode,
+        ),
+        Argument(
+            "repformer_bn_momentum",
+            float,
+            optional=True,
+            default=0.1,
+            doc=doc_repformer_bn_momentum,
+        ),
+        Argument(
+            "repformer_update_g1_has_conv",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g1_has_conv,
+        ),
+        Argument(
+            "repformer_update_g1_has_drrd",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g1_has_drrd,
+        ),
+        Argument(
+            "repformer_update_g1_has_grrg",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g1_has_grrg,
+        ),
+        Argument(
+            "repformer_update_g1_has_attn",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g1_has_attn,
+        ),
+        Argument(
+            "repformer_update_g2_has_g1g1",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g2_has_g1g1,
+        ),
+        Argument(
+            "repformer_update_g2_has_attn",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_update_g2_has_attn,
+        ),
+        Argument(
+            "repformer_update_h2",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_repformer_update_h2,
+        ),
+        Argument(
+            "repformer_attn1_hidden",
+            int,
+            optional=True,
+            default=64,
+            doc=doc_repformer_attn1_hidden,
+        ),
+        Argument(
+            "repformer_attn1_nhead",
+            int,
+            optional=True,
+            default=4,
+            doc=doc_repformer_attn1_nhead,
+        ),
+        Argument(
+            "repformer_attn2_hidden",
+            int,
+            optional=True,
+            default=16,
+            doc=doc_repformer_attn2_hidden,
+        ),
+        Argument(
+            "repformer_attn2_nhead",
+            int,
+            optional=True,
+            default=4,
+            doc=doc_repformer_attn2_nhead,
+        ),
+        Argument(
+            "repformer_attn2_has_gate",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_repformer_attn2_has_gate,
+        ),
+        Argument(
+            "repformer_activation",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_repformer_activation,
+        ),
+        Argument(
+            "repformer_update_style",
+            str,
+            optional=True,
+            default="res_avg",
+            doc=doc_repformer_update_style,
+        ),
+        Argument(
+            "repformer_set_davg_zero",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_repformer_set_davg_zero,
+        ),
+        Argument(
+            "repformer_add_type_ebd_to_seq",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_repformer_add_type_ebd_to_seq,
+        ),
+    ]
+
+
+@descrpt_args_plugin.register(
+    "se_a_ebd_v2", alias=["se_a_tpe_v2"], doc=doc_only_tf_supported
+)
+def descrpt_se_a_ebd_v2_args():
+    return descrpt_se_a_args()
+
+
+@descrpt_args_plugin.register("se_a_mask", doc=doc_only_tf_supported)
+def descrpt_se_a_mask_args():
+    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+
+    return [
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument(
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
+    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
+    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
+    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
+    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
+    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
+    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
+    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
+    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
+    doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
+- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
+- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
+- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
+- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
+- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
+- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
+- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
+- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
+- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
+
+    return Variant(
+        "type",
+        descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid),
+        doc=doc_descrpt_type,
+    )
+
+
+#  --- Fitting net configurations: --- #
+fitting_args_plugin = ArgsPlugin()
+
+
+@fitting_args_plugin.register("ener")
+def fitting_ener():
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
+    doc_layer_name = (
+        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
+        "If two layers, either in the same fitting or different fittings, "
+        "have the same name, they will share the same neural network parameters. "
+        "The shape of these layers should be the same. "
+        "If null is given for a layer, parameters will not be shared."
+    )
+    doc_use_aparam_as_mask = (
+        "Whether to use the aparam as a mask in input."
+        "If True, the aparam will not be used in fitting net for embedding."
+        "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True."
+    )
+
+    return [
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
+        ),
+        Argument(
+            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "atom_ener",
+            List[Optional[float]],
+            optional=True,
+            default=[],
+            doc=doc_atom_ener,
+        ),
+        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
+        Argument(
+            "use_aparam_as_mask",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_aparam_as_mask,
+        ),
+    ]
+
+
+@fitting_args_plugin.register("dos", doc=doc_only_tf_supported)
+def fitting_dos():
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_numb_dos = (
+        "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)"
+    )
+
+    return [
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="float64", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
+        ),
+        Argument(
+            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos),
+    ]
+
+
+@fitting_args_plugin.register("polar")
+def fitting_polar():
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``"
+    # doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
+    doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix."
+    doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+
+    # YWolfeee: user can decide whether to use shift diag
+    doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true."
+
+    return [
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
+        Argument(
+            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
+        ),
+        # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
+        Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
+        Argument(
+            "sel_type",
+            [List[int], int, None],
+            optional=True,
+            alias=["pol_type"],
+            doc=doc_sel_type + doc_only_tf_supported,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+# def fitting_global_polar():
+#    return fitting_polar()
+
+
+@fitting_args_plugin.register("dipole")
+def fitting_dipole():
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    return [
+        Argument(
+            "neuron",
+            List[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument(
+            "sel_type",
+            [List[int], int, None],
+            optional=True,
+            alias=["dipole_type"],
+            doc=doc_sel_type + doc_only_tf_supported,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
+
+#   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
+def fitting_variant_type_args():
+    doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\
+- `ener`: Fit an energy model (potential energy surface).\n\n\
+- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\
+- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
+- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
+
+    return Variant(
+        "type",
+        fitting_args_plugin.get_all_argument(),
+        optional=True,
+        default_tag="ener",
+        doc=doc_descrpt_type,
+    )
+
+
+#  --- Modifier configurations: --- #
+def modifier_dipole_charge():
+    doc_model_name = "The name of the frozen dipole model file."
+    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
+    doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
+    doc_ewald_h = "The grid spacing of the FFT grid. Unit is A"
+    doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
+
+    return [
+        Argument("model_name", str, optional=False, doc=doc_model_name),
+        Argument(
+            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
+        ),
+        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
+        Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
+        Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
+    ]
+
+
+def modifier_variant_type_args():
+    doc_modifier_type = "The type of modifier. See explanation below.\n\n\
+-`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
+    return Variant(
+        "type",
+        [
+            Argument("dipole_charge", dict, modifier_dipole_charge()),
+        ],
+        optional=False,
+        doc=doc_modifier_type,
+    )
+
+
+#  --- model compression configurations: --- #
+def model_compression():
+    doc_model_file = "The input model file, which will be compressed by the DeePMD-kit."
+    doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
+    doc_min_nbor_dist = (
+        "The nearest distance between neighbor atoms saved in the frozen model."
+    )
+
+    return [
+        Argument("model_file", str, optional=False, doc=doc_model_file),
+        Argument("table_config", List[float], optional=False, doc=doc_table_config),
+        Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
+    ]
+
+
+#  --- model compression configurations: --- #
+def model_compression_type_args():
+    doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
+
+    return Variant(
+        "type",
+        [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])],
+        optional=True,
+        default_tag="se_e2_a",
+        doc=doc_compress_type,
+    )
+
+
+def model_args(exclude_hybrid=False):
+    doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
+    doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
+    doc_data_stat_protect = "Protect parameter for atomic energy regression."
+    doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
+    doc_type_embedding = "The type embedding."
+    doc_modifier = "The modifier of model output."
+    doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
+    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
+    doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided."
+    doc_compress_config = "Model compression configurations"
+    doc_spin = "The settings for systems with spin."
+    doc_atom_exclude_types = "Exclude the atomic contribution of the listed atom types"
+    doc_pair_exclude_types = "The atom pairs of the listed types are not treated to be neighbors, i.e. they do not see each other."
+
+    hybrid_models = []
+    if not exclude_hybrid:
+        hybrid_models.extend(
+            [
+                pairwise_dprc(),
+                linear_ener_model_args(),
+            ]
+        )
+    return Argument(
+        "model",
+        dict,
+        [
+            Argument("type_map", List[str], optional=True, doc=doc_type_map),
+            Argument(
+                "data_stat_nbatch",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_stat_nbatch,
+            ),
+            Argument(
+                "data_stat_protect",
+                float,
+                optional=True,
+                default=1e-2,
+                doc=doc_data_stat_protect,
+            ),
+            Argument(
+                "data_bias_nsample",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_bias_nsample,
+            ),
+            Argument(
+                "use_srtab",
+                str,
+                optional=True,
+                doc=doc_only_tf_supported + doc_use_srtab,
+            ),
+            Argument(
+                "smin_alpha",
+                float,
+                optional=True,
+                doc=doc_only_tf_supported + doc_smin_alpha,
+            ),
+            Argument(
+                "sw_rmin", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmin
+            ),
+            Argument(
+                "sw_rmax", float, optional=True, doc=doc_only_tf_supported + doc_sw_rmax
+            ),
+            Argument(
+                "pair_exclude_types",
+                list,
+                optional=True,
+                default=[],
+                doc=doc_only_pt_supported + doc_pair_exclude_types,
+            ),
+            Argument(
+                "atom_exclude_types",
+                list,
+                optional=True,
+                default=[],
+                doc=doc_only_pt_supported + doc_atom_exclude_types,
+            ),
+            Argument(
+                "srtab_add_bias",
+                bool,
+                optional=True,
+                default=True,
+                doc=doc_only_tf_supported + doc_srtab_add_bias,
+            ),
+            Argument(
+                "type_embedding",
+                dict,
+                type_embedding_args(),
+                [],
+                optional=True,
+                doc=doc_only_tf_supported + doc_type_embedding,
+            ),
+            Argument(
+                "modifier",
+                dict,
+                [],
+                [modifier_variant_type_args()],
+                optional=True,
+                doc=doc_only_tf_supported + doc_modifier,
+            ),
+            Argument(
+                "compress",
+                dict,
+                [],
+                [model_compression_type_args()],
+                optional=True,
+                doc=doc_only_tf_supported + doc_compress_config,
+                fold_subdoc=True,
+            ),
+            Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin),
+        ],
+        [
+            Variant(
+                "type",
+                [
+                    standard_model_args(),
+                    multi_model_args(),
+                    frozen_model_args(),
+                    pairtab_model_args(),
+                    *hybrid_models,
+                ],
+                optional=True,
+                default_tag="standard",
+            ),
+        ],
+    )
+
+
+def standard_model_args() -> Argument:
+    doc_descrpt = "The descriptor of atomic environment."
+    doc_fitting = "The fitting of physical properties."
+
+    ca = Argument(
+        "standard",
+        dict,
+        [
+            Argument(
+                "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt
+            ),
+            Argument(
+                "fitting_net",
+                dict,
+                [],
+                [fitting_variant_type_args()],
+                doc=doc_fitting,
+            ),
+        ],
+        doc="Stardard model, which contains a descriptor and a fitting.",
+    )
+    return ca
+
+
+def multi_model_args() -> Argument:
+    doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details."
+    doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`."
+
+    ca = Argument(
+        "multi",
+        dict,
+        [
+            Argument(
+                "descriptor",
+                dict,
+                [],
+                [descrpt_variant_type_args()],
+                doc=doc_descrpt,
+                fold_subdoc=True,
+            ),
+            Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict),
+        ],
+        doc=doc_only_tf_supported + "Multiple-task model.",
+    )
+    return ca
+
+
+def pairwise_dprc() -> Argument:
+    qm_model_args = model_args(exclude_hybrid=True)
+    qm_model_args.name = "qm_model"
+    qm_model_args.fold_subdoc = True
+    qmmm_model_args = model_args(exclude_hybrid=True)
+    qmmm_model_args.name = "qmmm_model"
+    qmmm_model_args.fold_subdoc = True
+    ca = Argument(
+        "pairwise_dprc",
+        dict,
+        [
+            qm_model_args,
+            qmmm_model_args,
+        ],
+        doc=doc_only_tf_supported,
+    )
+    return ca
+
+
+def frozen_model_args() -> Argument:
+    doc_model_file = "Path to the frozen model file."
+    ca = Argument(
+        "frozen",
+        dict,
+        [
+            Argument("model_file", str, optional=False, doc=doc_model_file),
+        ],
+    )
+    return ca
+
+
+def pairtab_model_args() -> Argument:
+    doc_tab_file = "Path to the tabulation file."
+    doc_rcut = "The cut-off radius."
+    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    ca = Argument(
+        "pairtab",
+        dict,
+        [
+            Argument("tab_file", str, optional=False, doc=doc_tab_file),
+            Argument("rcut", float, optional=False, doc=doc_rcut),
+            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
+        ],
+        doc=doc_only_tf_supported + "Pairwise tabulation energy model.",
+    )
+    return ca
+
+
+def linear_ener_model_args() -> Argument:
+    doc_weights = (
+        "If the type is list of float, a list of weights for each model. "
+        'If "mean", the weights are set to be 1 / len(models). '
+        'If "sum", the weights are set to be 1.'
+    )
+    models_args = model_args(exclude_hybrid=True)
+    models_args.name = "models"
+    models_args.fold_subdoc = True
+    models_args.set_dtype(list)
+    models_args.set_repeat(True)
+    models_args.doc = "The sub-models."
+    ca = Argument(
+        "linear_ener",
+        dict,
+        [
+            models_args,
+            Argument(
+                "weights",
+                [list, str],
+                optional=False,
+                doc=doc_weights,
+            ),
+        ],
+        doc=doc_only_tf_supported,
+    )
+    return ca
+
+
+#  --- Learning rate configurations: --- #
+def learning_rate_exp():
+    doc_start_lr = "The learning rate at the start of the training."
+    doc_stop_lr = (
+        "The desired learning rate at the end of the training. "
+        f"When decay_rate {doc_only_pt_supported}is explicitly set, "
+        "this value will serve as the minimum learning rate during training. "
+        "In other words, if the learning rate decays below stop_lr, stop_lr will be applied instead."
+    )
+    doc_decay_steps = (
+        "The learning rate is decaying every this number of training steps."
+    )
+    doc_decay_rate = (
+        "The decay rate for the learning rate. "
+        "If this is provided, it will be used directly as the decay rate for learning rate "
+        "instead of calculating it through interpolation between start_lr and stop_lr."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
+        Argument(
+            "decay_rate",
+            float,
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_decay_rate,
+        ),
+    ]
+    return args
+
+
+def learning_rate_variant_type_args():
+    doc_lr = "The type of the learning rate."
+
+    return Variant(
+        "type",
+        [Argument("exp", dict, learning_rate_exp())],
+        optional=True,
+        default_tag="exp",
+        doc=doc_lr,
+    )
+
+
+def learning_rate_args():
+    doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
+    doc_lr = "The definitio of learning rate"
+    return Argument(
+        "learning_rate",
+        dict,
+        [
+            Argument(
+                "scale_by_worker",
+                str,
+                optional=True,
+                default="linear",
+                doc=doc_scale_by_worker,
+            )
+        ],
+        [learning_rate_variant_type_args()],
+        optional=True,
+        doc=doc_lr,
+    )
+
+
+def learning_rate_dict_args():
+    doc_learning_rate_dict = (
+        "The dictionary of definitions of learning rates in multi-task mode. "
+        "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n"
+    )
+    ca = Argument(
+        "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict
+    )
+    return ca
+
+
+#  --- Loss configurations: --- #
+def start_pref(item, label=None, abbr=None):
+    if label is None:
+        label = item
+    if abbr is None:
+        abbr = item
+    return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored."
+
+
+def limit_pref(item):
+    return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity."
+
+
+loss_args_plugin = ArgsPlugin()
+
+
+@loss_args_plugin.register("ener")
+def loss_ener():
+    doc_start_pref_e = start_pref("energy", abbr="e")
+    doc_limit_pref_e = limit_pref("energy")
+    doc_start_pref_f = start_pref("force", abbr="f")
+    doc_limit_pref_f = limit_pref("force")
+    doc_start_pref_v = start_pref("virial", abbr="v")
+    doc_limit_pref_v = limit_pref("virial")
+    doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae")
+    doc_limit_pref_ae = limit_pref("atomic energy")
+    doc_start_pref_pf = start_pref(
+        "atomic prefactor force", label="atom_pref", abbr="pf"
+    )
+    doc_limit_pref_pf = limit_pref("atomic prefactor force")
+    doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf")
+    doc_limit_pref_gf = limit_pref("generalized force")
+    doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used."
+    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
+    doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
+    return [
+        Argument(
+            "start_pref_e",
+            [float, int],
+            optional=True,
+            default=0.02,
+            doc=doc_start_pref_e,
+        ),
+        Argument(
+            "limit_pref_e",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_e,
+        ),
+        Argument(
+            "start_pref_f",
+            [float, int],
+            optional=True,
+            default=1000,
+            doc=doc_start_pref_f,
+        ),
+        Argument(
+            "limit_pref_f",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_f,
+        ),
+        Argument(
+            "start_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_v,
+        ),
+        Argument(
+            "limit_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_v,
+        ),
+        Argument(
+            "start_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_ae,
+        ),
+        Argument(
+            "limit_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_ae,
+        ),
+        Argument(
+            "start_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_pf,
+        ),
+        Argument(
+            "limit_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_pf,
+        ),
+        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
+        Argument(
+            "enable_atom_ener_coeff",
+            [bool],
+            optional=True,
+            default=False,
+            doc=doc_enable_atom_ener_coeff,
+        ),
+        Argument(
+            "start_pref_gf",
+            float,
+            optional=True,
+            default=0.0,
+            doc=doc_start_pref_gf,
+        ),
+        Argument(
+            "limit_pref_gf",
+            float,
+            optional=True,
+            default=0.0,
+            doc=doc_limit_pref_gf,
+        ),
+        Argument(
+            "numb_generalized_coord",
+            int,
+            optional=True,
+            default=0,
+            doc=doc_numb_generalized_coord,
+        ),
+    ]
+
+
+@loss_args_plugin.register("ener_spin")
+def loss_ener_spin():
+    doc_start_pref_e = start_pref("energy")
+    doc_limit_pref_e = limit_pref("energy")
+    doc_start_pref_fr = start_pref("force_real_atom")
+    doc_limit_pref_fr = limit_pref("force_real_atom")
+    doc_start_pref_fm = start_pref("force_magnetic")
+    doc_limit_pref_fm = limit_pref("force_magnetic")
+    doc_start_pref_v = start_pref("virial")
+    doc_limit_pref_v = limit_pref("virial")
+    doc_start_pref_ae = start_pref("atom_ener")
+    doc_limit_pref_ae = limit_pref("atom_ener")
+    doc_start_pref_pf = start_pref("atom_pref")
+    doc_limit_pref_pf = limit_pref("atom_pref")
+    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
+    doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
+    return [
+        Argument(
+            "start_pref_e",
+            [float, int],
+            optional=True,
+            default=0.02,
+            doc=doc_start_pref_e,
+        ),
+        Argument(
+            "limit_pref_e",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_e,
+        ),
+        Argument(
+            "start_pref_fr",
+            [float, int],
+            optional=True,
+            default=1000,
+            doc=doc_start_pref_fr,
+        ),
+        Argument(
+            "limit_pref_fr",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_fr,
+        ),
+        Argument(
+            "start_pref_fm",
+            [float, int],
+            optional=True,
+            default=10000,
+            doc=doc_start_pref_fm,
+        ),
+        Argument(
+            "limit_pref_fm",
+            [float, int],
+            optional=True,
+            default=10.0,
+            doc=doc_limit_pref_fm,
+        ),
+        Argument(
+            "start_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_v,
+        ),
+        Argument(
+            "limit_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_v,
+        ),
+        Argument(
+            "start_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_ae,
+        ),
+        Argument(
+            "limit_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_ae,
+        ),
+        Argument(
+            "start_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_pf,
+        ),
+        Argument(
+            "limit_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_pf,
+        ),
+        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
+        Argument(
+            "enable_atom_ener_coeff",
+            [bool],
+            optional=True,
+            default=False,
+            doc=doc_enable_atom_ener_coeff,
+        ),
+    ]
+
+
+@loss_args_plugin.register("dos", doc=doc_only_tf_supported)
+def loss_dos():
+    doc_start_pref_dos = start_pref("Density of State (DOS)")
+    doc_limit_pref_dos = limit_pref("Density of State (DOS)")
+    doc_start_pref_cdf = start_pref(
+        "Cumulative Distribution Function (cumulative intergral of DOS)"
+    )
+    doc_limit_pref_cdf = limit_pref(
+        "Cumulative Distribution Function (cumulative intergral of DOS)"
+    )
+    doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)")
+    doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)")
+    doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS")
+    doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS")
+    return [
+        Argument(
+            "start_pref_dos",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_dos,
+        ),
+        Argument(
+            "limit_pref_dos",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_dos,
+        ),
+        Argument(
+            "start_pref_cdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_cdf,
+        ),
+        Argument(
+            "limit_pref_cdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_cdf,
+        ),
+        Argument(
+            "start_pref_ados",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_start_pref_ados,
+        ),
+        Argument(
+            "limit_pref_ados",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_ados,
+        ),
+        Argument(
+            "start_pref_acdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_acdf,
+        ),
+        Argument(
+            "limit_pref_acdf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_acdf,
+        ),
+    ]
+
+
+# YWolfeee: Modified to support tensor type of loss args.
+@loss_args_plugin.register("tensor", doc=doc_only_tf_supported)
+def loss_tensor():
+    # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]."
+    # doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well."
+    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included."
+    doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0."
+    return [
+        Argument(
+            "pref", [float, int], optional=False, default=None, doc=doc_global_weight
+        ),
+        Argument(
+            "pref_atomic",
+            [float, int],
+            optional=False,
+            default=None,
+            doc=doc_local_weight,
+        ),
+    ]
+
+
+def loss_variant_type_args():
+    doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`."
+
+    return Variant(
+        "type",
+        loss_args_plugin.get_all_argument(),
+        optional=True,
+        default_tag="ener",
+        doc=doc_loss,
+    )
+
+
+def loss_args():
+    doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset."
+    ca = Argument(
+        "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss
+    )
+    return ca
+
+
+def loss_dict_args():
+    doc_loss_dict = (
+        "The dictionary of definitions of multiple loss functions in multi-task mode. "
+        "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n"
+    )
+    ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict)
+    return ca
+
+
+#  --- Training configurations: --- #
+def training_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/training_data/systems")
+    doc_systems = (
+        "The data systems for training. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\
+- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\
+If MPI is used, the value should be considered as the batch size per task.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
+        "specifying the probability of each system."
+    )
+
+    args = [
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [List[int], int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            List[float],
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
+    ]
+
+    doc_training_data = "Configurations of training data."
+    return Argument(
+        "training_data",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_training_data,
+    )
+
+
+def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
+    link_sys = make_link("systems", "training/validation_data/systems")
+    doc_systems = (
+        "The data systems for validation. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
+    doc_batch_size = f'This key can be \n\n\
+- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
+- int: all {link_sys} use the same batch size.\n\n\
+- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
+- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
+    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
+- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
+- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
+- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
+        "specifying the probability of each system."
+    )
+    doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
+
+    args = [
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [List[int], int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            List[float],
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
+        Argument(
+            "numb_btch",
+            int,
+            optional=True,
+            default=1,
+            doc=doc_numb_btch,
+            alias=[
+                "numb_batch",
+            ],
+        ),
+    ]
+
+    doc_validation_data = (
+        "Configurations of validation data. Similar to that of training data, "
+        "except that a `numb_btch` argument may be configured"
+    )
+    return Argument(
+        "validation_data",
+        dict,
+        optional=True,
+        default=None,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_validation_data,
+    )
+
+
+def mixed_precision_args():  # ! added by Denghui.
+    doc_output_prec = 'The precision for mixed precision params. " \
+        "The trainable variables precision during the mixed precision training process, " \
+        "supported options are float32 only currently.'
+    doc_compute_prec = 'The precision for mixed precision compute. " \
+        "The compute precision during the mixed precision training process, "" \
+        "supported options are float16 and bfloat16 currently.'
+
+    args = [
+        Argument(
+            "output_prec", str, optional=True, default="float32", doc=doc_output_prec
+        ),
+        Argument(
+            "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec
+        ),
+    ]
+
+    doc_mixed_precision = "Configurations of mixed precision."
+    return Argument(
+        "mixed_precision",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_mixed_precision,
+    )
+
+
+def training_args():  # ! modified by Ziyao: data configuration isolated.
+    doc_numb_steps = "Number of training batch. Each training uses one batch of data."
+    doc_seed = "The random seed for getting frames from the training data set."
+    doc_disp_file = "The file for printing learning curve."
+    doc_disp_freq = "The frequency of printing learning curve."
+    doc_save_freq = "The frequency of saving check point."
+    doc_save_ckpt = "The path prefix of saving check point files."
+    doc_max_ckpt_keep = (
+        "The maximum number of checkpoints to keep. "
+        "The oldest checkpoints will be deleted once the number of checkpoints exceeds max_ckpt_keep. "
+        "Defaults to 5."
+    )
+    doc_disp_training = "Displaying verbose information during training."
+    doc_time_training = "Timing durining training."
+    doc_profiling = "Profiling during training."
+    doc_profiling_file = "Output file for profiling."
+    doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) or PyTorch Profiler to analyze performance. The log will be saved to `tensorboard_log_dir`."
+    doc_tensorboard = "Enable tensorboard"
+    doc_tensorboard_log_dir = "The log directory of tensorboard outputs"
+    doc_tensorboard_freq = "The frequency of writing tensorboard events."
+    doc_data_dict = (
+        "The dictionary of multi DataSystems in multi-task mode. "
+        "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "contains training data and optional validation data definitions."
+    )
+    doc_fitting_weight = (
+        "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "is the training weight of fitting net `fitting_key`. "
+        "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. "
+        "Weights will be normalized and minus ones will be ignored. "
+        "If not set, each fitting net will be equally selected when training."
+    )
+    doc_warmup_steps = (
+        "The number of steps for learning rate warmup. During warmup, "
+        "the learning rate begins at zero and progressively increases linearly to `start_lr`, "
+        "rather than starting directly from `start_lr`"
+    )
+    doc_gradient_max_norm = (
+        "Clips the gradient norm to a maximum value. "
+        "If the gradient norm exceeds this value, it will be clipped to this limit. "
+        "No gradient clipping will occur if set to 0."
+    )
+    doc_stat_file = (
+        "The file path for saving the data statistics results. "
+        "If set, the results will be saved and directly loaded during the next training session, "
+        "avoiding the need to recalculate the statistics"
+    )
+    doc_opt_type = "The type of optimizer to use."
+    doc_kf_blocksize = "The blocksize for the Kalman filter."
+
+    arg_training_data = training_data_args()
+    arg_validation_data = validation_data_args()
+    mixed_precision_data = mixed_precision_args()
+
+    args = [
+        arg_training_data,
+        arg_validation_data,
+        mixed_precision_data,
+        Argument(
+            "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file
+        ),
+        Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
+        Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
+        Argument(
+            "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt
+        ),
+        Argument("max_ckpt_keep", int, optional=True, default=5, doc=doc_max_ckpt_keep),
+        Argument(
+            "disp_training", bool, optional=True, default=True, doc=doc_disp_training
+        ),
+        Argument(
+            "time_training", bool, optional=True, default=True, doc=doc_time_training
+        ),
+        Argument(
+            "profiling",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_only_tf_supported + doc_profiling,
+        ),
+        Argument(
+            "profiling_file",
+            str,
+            optional=True,
+            default="timeline.json",
+            doc=doc_only_tf_supported + doc_profiling_file,
+        ),
+        Argument(
+            "enable_profiler",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_enable_profiler,
+        ),
+        Argument(
+            "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard
+        ),
+        Argument(
+            "tensorboard_log_dir",
+            str,
+            optional=True,
+            default="log",
+            doc=doc_tensorboard_log_dir,
+        ),
+        Argument(
+            "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq
+        ),
+        Argument("data_dict", dict, optional=True, doc=doc_data_dict),
+        Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight),
+        Argument(
+            "warmup_steps",
+            int,
+            optional=True,
+            doc=doc_only_pt_supported + doc_warmup_steps,
+        ),
+        Argument(
+            "gradient_max_norm",
+            float,
+            optional=True,
+            doc=doc_only_pt_supported + doc_gradient_max_norm,
+        ),
+        Argument(
+            "stat_file", str, optional=True, doc=doc_only_pt_supported + doc_stat_file
+        ),
+    ]
+    variants = [
+        Variant(
+            "opt_type",
+            choices=[
+                Argument("Adam", dict, [], [], optional=True),
+                Argument(
+                    "LKF",
+                    dict,
+                    [
+                        Argument(
+                            "kf_blocksize",
+                            int,
+                            optional=True,
+                            doc=doc_only_pt_supported + doc_kf_blocksize,
+                        ),
+                    ],
+                    [],
+                    optional=True,
+                ),
+            ],
+            optional=True,
+            default_tag="Adam",
+            doc=doc_only_pt_supported + doc_opt_type,
+        )
+    ]
+
+    doc_training = "The training options."
+    return Argument("training", dict, args, variants, doc=doc_training)
+
+
+def make_index(keys):
+    ret = []
+    for ii in keys:
+        ret.append(make_link(ii, ii))
+    return ", ".join(ret)
+
+
+def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
+    if make_link:
+        make_anchor = True
+    ptr = []
+    for ii in gen_args():
+        ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
+
+    key_words = []
+    for ii in "\n\n".join(ptr).split("\n"):
+        if "argument path" in ii:
+            key_words.append(ii.split(":")[1].replace("`", "").strip())
+    # ptr.insert(0, make_index(key_words))
+
+    return "\n\n".join(ptr)
+
+
+def gen_json(**kwargs):
+    return json.dumps(
+        tuple(gen_args()),
+        cls=ArgumentEncoder,
+    )
+
+
+def gen_args(**kwargs) -> List[Argument]:
+    return [
+        model_args(),
+        learning_rate_args(),
+        learning_rate_dict_args(),
+        loss_args(),
+        loss_dict_args(),
+        training_args(),
+        nvnmd_args(),
+    ]
+
+
+def normalize_multi_task(data):
+    # single-task or multi-task mode
+    if data["model"].get("type", "standard") not in ("standard", "multi"):
+        return data
+    single_fitting_net = "fitting_net" in data["model"].keys()
+    single_training_data = "training_data" in data["training"].keys()
+    single_valid_data = "validation_data" in data["training"].keys()
+    single_loss = "loss" in data.keys()
+    single_learning_rate = "learning_rate" in data.keys()
+    multi_fitting_net = "fitting_net_dict" in data["model"].keys()
+    multi_training_data = "data_dict" in data["training"].keys()
+    multi_loss = "loss_dict" in data.keys()
+    multi_fitting_weight = "fitting_weight" in data["training"].keys()
+    multi_learning_rate = "learning_rate_dict" in data.keys()
+    assert (single_fitting_net == single_training_data) and (
+        multi_fitting_net == multi_training_data
+    ), (
+        "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! "
+        "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' "
+        "must be defined at the same time! Please check your input script. "
+    )
+    assert not (single_fitting_net and multi_fitting_net), (
+        "Single-task mode and multi-task mode can not be performed together. "
+        "Please check your input script and choose just one format! "
+    )
+    assert (
+        single_fitting_net or multi_fitting_net
+    ), "Please define your fitting net and training data! "
+    if multi_fitting_net:
+        assert not single_valid_data, (
+            "In multi-task mode, 'training/validation_data' should not appear "
+            "outside 'training/data_dict'! Please check your input script."
+        )
+        assert (
+            not single_loss
+        ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
+        assert (
+            "type_map" in data["model"]
+        ), "In multi-task mode, 'model/type_map' must be defined! "
+        data["model"]["type"] = "multi"
+        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(
+            data["model"]["fitting_net_dict"]
+        )
+        data["training"]["data_dict"] = normalize_data_dict(
+            data["training"]["data_dict"]
+        )
+        data["loss_dict"] = (
+            normalize_loss_dict(
+                data["model"]["fitting_net_dict"].keys(), data["loss_dict"]
+            )
+            if multi_loss
+            else {}
+        )
+        if multi_learning_rate:
+            data["learning_rate_dict"] = normalize_learning_rate_dict(
+                data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"]
+            )
+        elif single_learning_rate:
+            data["learning_rate_dict"] = (
+                normalize_learning_rate_dict_with_single_learning_rate(
+                    data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
+                )
+            )
+        fitting_weight = (
+            data["training"]["fitting_weight"] if multi_fitting_weight else None
+        )
+        data["training"]["fitting_weight"] = normalize_fitting_weight(
+            data["model"]["fitting_net_dict"].keys(),
+            data["training"]["data_dict"].keys(),
+            fitting_weight=fitting_weight,
+        )
+    else:
+        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
+        assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
+    return data
+
+
+def normalize_fitting_net_dict(fitting_net_dict):
+    new_dict = {}
+    base = Argument("base", dict, [], [fitting_variant_type_args()], doc="")
+    for fitting_key_item in fitting_net_dict:
+        data = base.normalize_value(
+            fitting_net_dict[fitting_key_item], trim_pattern="_*"
+        )
+        base.check_value(data, strict=True)
+        new_dict[fitting_key_item] = data
+    return new_dict
+
+
+def normalize_data_dict(data_dict):
+    new_dict = {}
+    base = Argument(
+        "base", dict, [training_data_args(), validation_data_args()], [], doc=""
+    )
+    for data_system_key_item in data_dict:
+        data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[data_system_key_item] = data
+    return new_dict
+
+
+def normalize_loss_dict(fitting_keys, loss_dict):
+    # check the loss dict
+    failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
+    assert not failed_loss_keys, f"Loss dict key(s) {failed_loss_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
+    new_dict = {}
+    base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
+    for item in loss_dict:
+        data = base.normalize_value(loss_dict[item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[item] = data
+    return new_dict
+
+
+def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
+    # check the learning_rate dict
+    failed_learning_rate_keys = [
+        item for item in learning_rate_dict if item not in fitting_keys
+    ]
+    assert not failed_learning_rate_keys, f"Learning rate dict key(s) {failed_learning_rate_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
+    new_dict = {}
+    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
+    for item in learning_rate_dict:
+        data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*")
+        base.check_value(data, strict=True)
+        new_dict[item] = data
+    return new_dict
+
+
+def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate):
+    new_dict = {}
+    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
+    data = base.normalize_value(learning_rate, trim_pattern="_*")
+    base.check_value(data, strict=True)
+    for fitting_key in fitting_keys:
+        new_dict[fitting_key] = data
+    return new_dict
+
+
+def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
+    # check the mapping
+    failed_data_keys = [item for item in data_keys if item not in fitting_keys]
+    assert not failed_data_keys, f"Data dict key(s) {failed_data_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
+    empty_fitting_keys = []
+    valid_fitting_keys = []
+    for item in fitting_keys:
+        if item not in data_keys:
+            empty_fitting_keys.append(item)
+        else:
+            valid_fitting_keys.append(item)
+    if empty_fitting_keys:
+        log.warning(
+            f"Fitting net(s) {empty_fitting_keys!s} have no data and will not be used in training."
+        )
+    num_pair = len(valid_fitting_keys)
+    assert num_pair > 0, "No valid training data systems for fitting nets!"
+
+    # check and normalize the fitting weight
+    new_weight = {}
+    if fitting_weight is None:
+        equal_weight = 1.0 / num_pair
+        for item in fitting_keys:
+            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0
+    else:
+        failed_weight_keys = [
+            item for item in fitting_weight if item not in fitting_keys
+        ]
+        assert not failed_weight_keys, f"Fitting weight key(s) {failed_weight_keys!s} not have corresponding fitting keys in {list(fitting_keys)!s}! "
+        sum_prob = 0.0
+        for item in fitting_keys:
+            if item in valid_fitting_keys:
+                if (
+                    item in fitting_weight
+                    and isinstance(fitting_weight[item], (int, float))
+                    and fitting_weight[item] > 0.0
+                ):
+                    sum_prob += fitting_weight[item]
+                    new_weight[item] = fitting_weight[item]
+                else:
+                    valid_fitting_keys.remove(item)
+                    log.warning(
+                        f"Fitting net '{item}' has zero or invalid weight "
+                        "and will not be used in training."
+                    )
+                    new_weight[item] = 0.0
+            else:
+                new_weight[item] = 0.0
+        assert sum_prob > 0.0, "No valid training weight for fitting nets!"
+        # normalize
+        for item in new_weight:
+            new_weight[item] /= sum_prob
+    return new_weight
+
+
+def normalize(data):
+    data = normalize_multi_task(data)
+
+    base = Argument("base", dict, gen_args())
+    data = base.normalize_value(data, trim_pattern="_*")
+    base.check_value(data, strict=True)
+
+    return data
+
+
+if __name__ == "__main__":
+    gen_doc()
diff --git a/deepmd_utils/utils/argcheck_nvnmd.py b/deepmd/utils/argcheck_nvnmd.py
similarity index 100%
rename from deepmd_utils/utils/argcheck_nvnmd.py
rename to deepmd/utils/argcheck_nvnmd.py
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 863520b3f4..b35d9833d5 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -1,19 +1,212 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from packaging.version import (
-    Version,
+import logging
+import os
+from abc import (
+    ABC,
+    abstractmethod,
 )
-
-from deepmd.env import (
-    TF_VERSION,
-    tf,
+from typing import (
+    Callable,
+    Tuple,
 )
+
+import numpy as np
+
 from deepmd.utils.errors import (
     OutOfMemoryError,
 )
-from deepmd_utils.utils.batch_size import AutoBatchSize as AutoBatchSizeBase
 
+log = logging.getLogger(__name__)
+
+
+class AutoBatchSize(ABC):
+    """This class allows DeePMD-kit to automatically decide the maximum
+    batch size that will not cause an OOM error.
+
+    Notes
+    -----
+    In some CPU environments, the program may be directly killed when OOM. In
+    this case, by default the batch size will not be increased for CPUs. The
+    environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size.
+
+    In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`.
+
+    Parameters
+    ----------
+    initial_batch_size : int, default: 1024
+        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
+        is not set
+    factor : float, default: 2.
+        increased factor
+
+    Attributes
+    ----------
+    current_batch_size : int
+        current batch size (number of total atoms)
+    maximum_working_batch_size : int
+        maximum working batch size
+    minimal_not_working_batch_size : int
+        minimal not working batch size
+    """
 
-class AutoBatchSize(AutoBatchSizeBase):
+    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
+        # See also PyTorchLightning/pytorch-lightning#1638
+        self.current_batch_size = initial_batch_size
+        DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
+        if DP_INFER_BATCH_SIZE > 0:
+            self.current_batch_size = DP_INFER_BATCH_SIZE
+            self.maximum_working_batch_size = DP_INFER_BATCH_SIZE
+            self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
+        else:
+            self.maximum_working_batch_size = initial_batch_size
+            if self.is_gpu_available():
+                self.minimal_not_working_batch_size = 2**31
+            else:
+                self.minimal_not_working_batch_size = (
+                    self.maximum_working_batch_size + 1
+                )
+                log.warning(
+                    "You can use the environment variable DP_INFER_BATCH_SIZE to"
+                    "control the inference batch size (nframes * natoms). "
+                    "The default value is %d." % initial_batch_size
+                )
+
+        self.factor = factor
+
+    def execute(
+        self, callable: Callable, start_index: int, natoms: int
+    ) -> Tuple[int, tuple]:
+        """Excuate a method with given batch size.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept the batch size and start_index as parameters,
+            and returns executed batch size and data.
+        start_index : int
+            start index
+        natoms : int
+            natoms
+
+        Returns
+        -------
+        int
+            executed batch size * number of atoms
+        tuple
+            result from callable, None if failing to execute
+
+        Raises
+        ------
+        OutOfMemoryError
+            OOM when batch size is 1
+        """
+        if natoms > 0:
+            batch_nframes = self.current_batch_size // natoms
+        else:
+            batch_nframes = self.current_batch_size
+        try:
+            n_batch, result = callable(max(batch_nframes, 1), start_index)
+        except Exception as e:
+            if not self.is_oom_error(e):
+                raise e
+            self.minimal_not_working_batch_size = min(
+                self.minimal_not_working_batch_size, self.current_batch_size
+            )
+            if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
+                self.maximum_working_batch_size = int(
+                    self.minimal_not_working_batch_size / self.factor
+                )
+            if self.minimal_not_working_batch_size <= natoms:
+                raise OutOfMemoryError(
+                    "The callable still throws an out-of-memory (OOM) error even when batch size is 1!"
+                ) from e
+            # adjust the next batch size
+            self._adjust_batch_size(1.0 / self.factor)
+            return 0, None
+        else:
+            n_tot = n_batch * natoms
+            self.maximum_working_batch_size = max(
+                self.maximum_working_batch_size, n_tot
+            )
+            # adjust the next batch size
+            if (
+                n_tot + natoms > self.current_batch_size
+                and self.current_batch_size * self.factor
+                < self.minimal_not_working_batch_size
+            ):
+                self._adjust_batch_size(self.factor)
+            return n_batch, result
+
+    def _adjust_batch_size(self, factor: float):
+        old_batch_size = self.current_batch_size
+        self.current_batch_size = int(self.current_batch_size * factor)
+        log.info(
+            "Adjust batch size from %d to %d"
+            % (old_batch_size, self.current_batch_size)
+        )
+
+    def execute_all(
+        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
+    ) -> Tuple[np.ndarray]:
+        """Excuate a method with all given data.
+
+        Parameters
+        ----------
+        callable : Callable
+            The method should accept *args and **kwargs as input and return the similiar array.
+        total_size : int
+            Total size
+        natoms : int
+            The number of atoms
+        *args
+            Variable length argument list.
+        **kwargs
+            If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
+        """
+
+        def execute_with_batch_size(
+            batch_size: int, start_index: int
+        ) -> Tuple[int, Tuple[np.ndarray]]:
+            end_index = start_index + batch_size
+            end_index = min(end_index, total_size)
+            return (end_index - start_index), callable(
+                *[
+                    (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for vv in args
+                ],
+                **{
+                    kk: (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for kk, vv in kwargs.items()
+                },
+            )
+
+        index = 0
+        results = []
+        while index < total_size:
+            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
+            if not isinstance(result, tuple):
+                result = (result,)
+            index += n_batch
+            if n_batch:
+                for rr in result:
+                    rr.reshape((n_batch, -1))
+                results.append(result)
+
+        r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
+        if len(r) == 1:
+            # avoid returning tuple if callable doesn't return tuple
+            r = r[0]
+        return r
+
+    @abstractmethod
     def is_gpu_available(self) -> bool:
         """Check if GPU is available.
 
@@ -22,11 +215,8 @@ def is_gpu_available(self) -> bool:
         bool
             True if GPU is available
         """
-        return (
-            Version(TF_VERSION) >= Version("1.14")
-            and tf.config.experimental.get_visible_devices("GPU")
-        ) or tf.test.is_gpu_available()
 
+    @abstractmethod
     def is_oom_error(self, e: Exception) -> bool:
         """Check if the exception is an OOM error.
 
@@ -34,7 +224,9 @@ def is_oom_error(self, e: Exception) -> bool:
         ----------
         e : Exception
             Exception
+
+        Returns
+        -------
+        bool
+            True if the exception is an OOM error
         """
-        # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
-        # but luckily we only need to catch once
-        return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError))
diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
index 91bf4021ee..5f9c14e6d8 100644
--- a/deepmd/utils/compat.py
+++ b/deepmd/utils/compat.py
@@ -1,15 +1,392 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.compat import (
-    convert_input_v0_v1,
-    convert_input_v1_v2,
-    deprecate_numb_test,
-    update_deepmd_input,
+"""Module providing compatibility between `0.x.x` and `1.x.x` input versions."""
+
+import json
+import warnings
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+    Dict,
+    Optional,
+    Sequence,
+    Union,
+)
+
+import numpy as np
+
+from deepmd.common import (
+    j_must_have,
 )
 
-__all__ = [
-    "convert_input_v0_v1",
-    "convert_input_v1_v2",
-    "deprecate_numb_test",
-    "update_deepmd_input",
-]
+
+def convert_input_v0_v1(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    """Convert input from v0 format to v1.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        loaded json/yaml file
+    warning : bool, optional
+        whether to show deprecation warning, by default True
+    dump : Optional[Union[str, Path]], optional
+        whether to dump converted file, by default None
+
+    Returns
+    -------
+    Dict[str, Any]
+        converted output
+    """
+    output = {}
+    output["model"] = _model(jdata, jdata["use_smooth"])
+    output["learning_rate"] = _learning_rate(jdata)
+    output["loss"] = _loss(jdata)
+    output["training"] = _training(jdata)
+    if warning:
+        _warning_input_v0_v1(dump)
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(output, fp, indent=4)
+    return output
+
+
+def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 0.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
+    if fname is not None:
+        msg += f", and output it to file {fname}"
+    warnings.warn(msg)
+
+
+def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
+    """Convert data to v1 input for non-smooth model.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+    smooth : bool
+        whether to use smooth or non-smooth descriptor version
+
+    Returns
+    -------
+    Dict[str, Dict[str, Any]]
+        dictionary with model input parameters and sub-dictionaries for descriptor and
+        fitting net
+    """
+    model = {}
+    model["descriptor"] = (
+        _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata)
+    )
+    model["fitting_net"] = _fitting_net(jdata)
+    return model
+
+
+def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for non-smooth descriptor.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with descriptor parameters
+    """
+    descriptor = {}
+    descriptor["type"] = "loc_frame"
+    _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule"))
+    return descriptor
+
+
+def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for smooth descriptor.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with descriptor parameters
+    """
+    descriptor = {}
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        descriptor["seed"] = seed
+    descriptor["type"] = "se_a"
+    descriptor["sel"] = jdata["sel_a"]
+    _jcopy(jdata, descriptor, ("rcut",))
+    descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
+    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
+    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
+    descriptor["resnet_dt"] = False
+    if "resnet_dt" in jdata:
+        descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
+
+    return descriptor
+
+
+def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for fitting net.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with fitting net parameters
+    """
+    fitting_net = {}
+
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        fitting_net["seed"] = seed
+    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
+    fitting_net["resnet_dt"] = True
+    if "resnet_dt" in jdata:
+        fitting_net["resnet_dt"] = jdata["resnet_dt"]
+    if "fitting_resnet_dt" in jdata:
+        fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"]
+    return fitting_net
+
+
+def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for learning rate section.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with learning rate parameters
+    """
+    learning_rate = {}
+    learning_rate["type"] = "exp"
+    _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr"))
+    return learning_rate
+
+
+def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for loss function.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with loss function parameters
+    """
+    loss: Dict[str, Any] = {}
+    _jcopy(
+        jdata,
+        loss,
+        (
+            "start_pref_e",
+            "limit_pref_e",
+            "start_pref_f",
+            "limit_pref_f",
+            "start_pref_v",
+            "limit_pref_v",
+        ),
+    )
+    if "start_pref_ae" in jdata:
+        loss["start_pref_ae"] = jdata["start_pref_ae"]
+    if "limit_pref_ae" in jdata:
+        loss["limit_pref_ae"] = jdata["limit_pref_ae"]
+    return loss
+
+
+def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert data to v1 input for training.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        parsed input json/yaml data
+
+    Returns
+    -------
+    Dict[str, Any]
+        dict with training parameters
+    """
+    training = {}
+    seed = jdata.get("seed", None)
+    if seed is not None:
+        training["seed"] = seed
+
+    _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size"))
+    training["disp_file"] = "lcurve.out"
+    if "disp_file" in jdata:
+        training["disp_file"] = jdata["disp_file"]
+    training["disp_freq"] = j_must_have(jdata, "disp_freq")
+    training["numb_test"] = j_must_have(jdata, "numb_test")
+    training["save_freq"] = j_must_have(jdata, "save_freq")
+    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
+    training["disp_training"] = j_must_have(jdata, "disp_training")
+    training["time_training"] = j_must_have(jdata, "time_training")
+    if "profiling" in jdata:
+        training["profiling"] = jdata["profiling"]
+        if training["profiling"]:
+            training["profiling_file"] = j_must_have(jdata, "profiling_file")
+    return training
+
+
+def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
+    """Copy specified keys from one dict to another.
+
+    Parameters
+    ----------
+    src : Dict[str, Any]
+        source dictionary
+    dst : Dict[str, Any]
+        destination dictionary, will be modified in place
+    keys : Sequence[str]
+        list of keys to copy
+    """
+    for k in keys:
+        dst[k] = src[k]
+
+
+def remove_decay_rate(jdata: Dict[str, Any]):
+    """Convert decay_rate to stop_lr.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        input data
+    """
+    lr = jdata["learning_rate"]
+    if "decay_rate" in lr:
+        decay_rate = lr["decay_rate"]
+        start_lr = lr["start_lr"]
+        stop_step = jdata["training"]["stop_batch"]
+        decay_steps = lr["decay_steps"]
+        stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr
+        lr["stop_lr"] = stop_lr
+        lr.pop("decay_rate")
+
+
+def convert_input_v1_v2(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    tr_cfg = jdata["training"]
+    tr_data_keys = {
+        "systems",
+        "set_prefix",
+        "batch_size",
+        "sys_prob",
+        "auto_prob",
+        # alias included
+        "sys_weights",
+        "auto_prob_style",
+    }
+
+    tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys}
+    new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys}
+    new_tr_cfg["training_data"] = tr_data_cfg
+    if "training_data" in tr_cfg:
+        raise RuntimeError(
+            "Both v1 (training/systems) and v2 (training/training_data) parameters are given."
+        )
+
+    jdata["training"] = new_tr_cfg
+
+    # remove deprecated arguments
+    remove_decay_rate(jdata)
+
+    if warning:
+        _warning_input_v1_v2(dump)
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+
+    return jdata
+
+
+def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 1.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
+    if fname is not None:
+        msg += f", and output it to file {fname}"
+    warnings.warn(msg)
+
+
+def deprecate_numb_test(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
+
+    See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
+
+    Parameters
+    ----------
+    jdata : Dict[str, Any]
+        loaded json/yaml file
+    warning : bool, optional
+        whether to show deprecation warning, by default True
+    dump : Optional[Union[str, Path]], optional
+        whether to dump converted file, by default None
+
+    Returns
+    -------
+    Dict[str, Any]
+        converted output
+    """
+    try:
+        jdata.get("training", {}).pop("numb_test")
+    except KeyError:
+        pass
+    else:
+        if warning:
+            warnings.warn(
+                "The argument training->numb_test has been deprecated since v2.0.0. "
+                "Use training->validation_data->batch_size instead."
+            )
+
+    if dump is not None:
+        with open(dump, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+    return jdata
+
+
+def update_deepmd_input(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
+    def is_deepmd_v0_input(jdata):
+        return "model" not in jdata.keys()
+
+    def is_deepmd_v1_input(jdata):
+        return "systems" in j_must_have(jdata, "training").keys()
+
+    if is_deepmd_v0_input(jdata):
+        jdata = convert_input_v0_v1(jdata, warning, None)
+        jdata = convert_input_v1_v2(jdata, False, None)
+        jdata = deprecate_numb_test(jdata, False, dump)
+    elif is_deepmd_v1_input(jdata):
+        jdata = convert_input_v1_v2(jdata, warning, None)
+        jdata = deprecate_numb_test(jdata, False, dump)
+    else:
+        jdata = deprecate_numb_test(jdata, warning, dump)
+
+    return jdata
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index a6f888beac..3cf73dc093 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -1,9 +1,787 @@
+#!/usr/bin/env python3
+
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.data import (
-    DeepmdData,
+import bisect
+import logging
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
 )
+from deepmd.utils import random as dp_random
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DeepmdData:
+    """Class for a data system.
+
+    It loads data from hard disk, and mantains the data as a `data_dict`
+
+    Parameters
+    ----------
+    sys_path
+            Path to the data system
+    set_prefix
+            Prefix for the directories of different sets
+    shuffle_test
+            If the test data are shuffled
+    type_map
+            Gives the name of different atom types
+    optional_type_map
+            If the type_map.raw in each system is optional
+    modifier
+            Data modifier that has the method `modify_data`
+    trn_all_set
+            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
+    sort_atoms : bool
+            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            descriptors except mixed types.
+    """
+
+    def __init__(
+        self,
+        sys_path: str,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: Optional[List[str]] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set: bool = False,
+        sort_atoms: bool = True,
+    ):
+        """Constructor."""
+        root = DPPath(sys_path)
+        self.dirs = root.glob(set_prefix + ".*")
+        if not len(self.dirs):
+            raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}")
+        self.dirs.sort()
+        # check mix_type format
+        error_format_msg = (
+            "if one of the set is of mixed_type format, "
+            "then all of the sets in this system should be of mixed_type format!"
+        )
+        self.mixed_type = self._check_mode(self.dirs[0])
+        for set_item in self.dirs[1:]:
+            assert self._check_mode(set_item) == self.mixed_type, error_format_msg
+        # load atom type
+        self.atom_type = self._load_type(root)
+        self.natoms = len(self.atom_type)
+        # load atom type map
+        self.type_map = self._load_type_map(root)
+        assert (
+            optional_type_map or self.type_map is not None
+        ), f"System {sys_path} must have type_map.raw in this mode! "
+        if self.type_map is not None:
+            assert len(self.type_map) >= max(self.atom_type) + 1
+        # check pbc
+        self.pbc = self._check_pbc(root)
+        # enforce type_map if necessary
+        self.enforce_type_map = False
+        if type_map is not None and self.type_map is not None and len(type_map):
+            if not self.mixed_type:
+                atom_type_ = [
+                    type_map.index(self.type_map[ii]) for ii in self.atom_type
+                ]
+                self.atom_type = np.array(atom_type_, dtype=np.int32)
+            else:
+                self.enforce_type_map = True
+                sorter = np.argsort(type_map)
+                self.type_idx_map = np.array(
+                    sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
+                )
+                # padding for virtual atom
+                self.type_idx_map = np.append(
+                    self.type_idx_map, np.array([-1], dtype=np.int32)
+                )
+            self.type_map = type_map
+        if type_map is None and self.type_map is None and self.mixed_type:
+            raise RuntimeError("mixed_type format must have type_map!")
+        # make idx map
+        self.sort_atoms = sort_atoms
+        self.idx_map = self._make_idx_map(self.atom_type)
+        # train dirs
+        self.test_dir = self.dirs[-1]
+        if trn_all_set:
+            self.train_dirs = self.dirs
+        else:
+            if len(self.dirs) == 1:
+                self.train_dirs = self.dirs
+            else:
+                self.train_dirs = self.dirs[:-1]
+        self.data_dict = {}
+        # add box and coord
+        self.add("box", 9, must=self.pbc)
+        self.add("coord", 3, atomic=True, must=True)
+        # the training times of each frame
+        self.add("numb_copy", 1, must=False, default=1, dtype=int)
+        # set counters
+        self.set_count = 0
+        self.iterator = 0
+        self.shuffle_test = shuffle_test
+        # set modifier
+        self.modifier = modifier
+        # calculate prefix sum for get_item method
+        frames_list = [self._get_nframes(item) for item in self.dirs]
+        self.nframes = np.sum(frames_list)
+        # The prefix sum stores the range of indices contained in each directory, which is needed by get_item method
+        self.prefix_sum = np.cumsum(frames_list).tolist()
+
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+        output_natoms_for_type_sel: bool = False,
+    ):
+        """Add a data item that to be loaded.
+
+        Parameters
+        ----------
+        key
+            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+        ndof
+            The number of dof
+        atomic
+            The item is an atomic property.
+            If False, the size of the data should be nframes x ndof
+            If True, the size of data should be nframes x natoms x ndof
+        must
+            The data file `sys_path/set.*/key.npy` must exist.
+            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+        high_prec
+            Load the data and store in float64, otherwise in float32
+        type_sel
+            Select certain type of atoms
+        repeat
+            The data will be repeated `repeat` times.
+        default : float, default=0.
+            default value of data
+        dtype : np.dtype, optional
+            the dtype of data, overwrites `high_prec` if provided
+        output_natoms_for_type_sel : bool, optional
+            if True and type_sel is True, the atomic dimension will be natoms instead of nsel
+        """
+        self.data_dict[key] = {
+            "ndof": ndof,
+            "atomic": atomic,
+            "must": must,
+            "high_prec": high_prec,
+            "type_sel": type_sel,
+            "repeat": repeat,
+            "reduce": None,
+            "default": default,
+            "dtype": dtype,
+            "output_natoms_for_type_sel": output_natoms_for_type_sel,
+        }
+        return self
+
+    def reduce(self, key_out: str, key_in: str):
+        """Generate a new item from the reduction of another atom.
+
+        Parameters
+        ----------
+        key_out
+            The name of the reduced item
+        key_in
+            The name of the data item to be reduced
+        """
+        assert key_in in self.data_dict, "cannot find input key"
+        assert self.data_dict[key_in]["atomic"], "reduced property should be atomic"
+        assert key_out not in self.data_dict, "output key should not have been added"
+        assert (
+            self.data_dict[key_in]["repeat"] == 1
+        ), "reduced proerties should not have been repeated"
+
+        self.data_dict[key_out] = {
+            "ndof": self.data_dict[key_in]["ndof"],
+            "atomic": False,
+            "must": True,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "reduce": key_in,
+        }
+        return self
+
+    def get_data_dict(self) -> dict:
+        """Get the `data_dict`."""
+        return self.data_dict
+
+    def check_batch_size(self, batch_size):
+        """Check if the system can get a batch of data with `batch_size` frames."""
+        for ii in self.train_dirs:
+            if self.data_dict["coord"]["high_prec"]:
+                tmpe = (
+                    (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+                )
+            else:
+                tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
+            if tmpe.ndim == 1:
+                tmpe = tmpe.reshape([1, -1])
+            if tmpe.shape[0] < batch_size:
+                return ii, tmpe.shape[0]
+        return None
+
+    def check_test_size(self, test_size):
+        """Check if the system can get a test dataset with `test_size` frames."""
+        if self.data_dict["coord"]["high_prec"]:
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_ENER_FLOAT_PRECISION)
+            )
+        else:
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_NP_FLOAT_PRECISION)
+            )
+        if tmpe.ndim == 1:
+            tmpe = tmpe.reshape([1, -1])
+        if tmpe.shape[0] < test_size:
+            return self.test_dir, tmpe.shape[0]
+        else:
+            return None
+
+    def get_item_torch(self, index: int) -> dict:
+        """Get a single frame data . The frame is picked from the data system by index. The index is coded across all the sets.
+
+        Parameters
+        ----------
+        index
+            index of the frame
+        """
+        i = bisect.bisect_right(self.prefix_sum, index)
+        frames = self._load_set(self.dirs[i])
+        frame = self._get_subdata(frames, index - self.prefix_sum[i])
+        frame = self.reformat_data_torch(frame)
+        frame["fid"] = index
+        return frame
+
+    def get_batch(self, batch_size: int) -> dict:
+        """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
+
+        Parameters
+        ----------
+        batch_size
+            size of the batch
+        """
+        if hasattr(self, "batch_set"):
+            set_size = self.batch_set["coord"].shape[0]
+        else:
+            set_size = 0
+        if self.iterator + batch_size > set_size:
+            self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
+            self.set_count += 1
+            set_size = self.batch_set["coord"].shape[0]
+        iterator_1 = self.iterator + batch_size
+        if iterator_1 >= set_size:
+            iterator_1 = set_size
+        idx = np.arange(self.iterator, iterator_1)
+        self.iterator += batch_size
+        ret = self._get_subdata(self.batch_set, idx)
+        return ret
+
+    def get_test(self, ntests: int = -1) -> dict:
+        """Get the test data with `ntests` frames.
+
+        Parameters
+        ----------
+        ntests
+            Size of the test data set. If `ntests` is -1, all test data will be get.
+        """
+        if not hasattr(self, "test_set"):
+            self._load_test_set(self.test_dir, self.shuffle_test)
+        if ntests == -1:
+            idx = None
+        else:
+            ntests_ = (
+                ntests
+                if ntests < self.test_set["type"].shape[0]
+                else self.test_set["type"].shape[0]
+            )
+            # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
+            idx = np.arange(ntests_)
+        ret = self._get_subdata(self.test_set, idx=idx)
+        if self.modifier is not None:
+            self.modifier.modify_data(ret, self)
+        return ret
+
+    def get_ntypes(self) -> int:
+        """Number of atom types in the system."""
+        if self.type_map is not None:
+            return len(self.type_map)
+        else:
+            return max(self.get_atom_type()) + 1
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_atom_type(self) -> List[int]:
+        """Get atom types."""
+        return self.atom_type
+
+    def get_numb_set(self) -> int:
+        """Get number of training sets."""
+        return len(self.train_dirs)
+
+    def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
+        """Get the number of batches in a set."""
+        data = self._load_set(self.train_dirs[set_idx])
+        ret = data["coord"].shape[0] // batch_size
+        if ret == 0:
+            ret = 1
+        return ret
+
+    def get_sys_numb_batch(self, batch_size: int) -> int:
+        """Get the number of batches in the data system."""
+        ret = 0
+        for ii in range(len(self.train_dirs)):
+            ret += self.get_numb_batch(batch_size, ii)
+        return ret
+
+    def get_natoms(self):
+        """Get number of atoms."""
+        return len(self.atom_type)
+
+    def get_natoms_vec(self, ntypes: int):
+        """Get number of atoms and number of atoms in different types.
+
+        Parameters
+        ----------
+        ntypes
+            Number of types (may be larger than the actual number of types in the system).
+
+        Returns
+        -------
+        natoms
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+        """
+        natoms, natoms_vec = self._get_natoms_2(ntypes)
+        tmp = [natoms, natoms]
+        tmp = np.append(tmp, natoms_vec)
+        return tmp.astype(np.int32)
+
+    def avg(self, key):
+        """Return the average value of an item."""
+        if key not in self.data_dict.keys():
+            raise RuntimeError("key %s has not been added" % key)
+        info = self.data_dict[key]
+        ndof = info["ndof"]
+        eners = []
+        for ii in self.train_dirs:
+            data = self._load_set(ii)
+            ei = data[key].reshape([-1, ndof])
+            eners.append(ei)
+        eners = np.concatenate(eners, axis=0)
+        if eners.size == 0:
+            return 0
+        else:
+            return np.average(eners, axis=0)
+
+    def _idx_map_sel(self, atom_type, type_sel):
+        new_types = []
+        for ii in atom_type:
+            if ii in type_sel:
+                new_types.append(ii)
+        new_types = np.array(new_types, dtype=int)
+        natoms = new_types.shape[0]
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, new_types))
+        return idx_map
+
+    def _get_natoms_2(self, ntypes):
+        sample_type = self.atom_type
+        natoms = len(sample_type)
+        natoms_vec = np.zeros(ntypes).astype(int)
+        for ii in range(ntypes):
+            natoms_vec[ii] = np.count_nonzero(sample_type == ii)
+        return natoms, natoms_vec
+
+    def _get_subdata(self, data, idx=None):
+        new_data = {}
+        for ii in data:
+            dd = data[ii]
+            if "find_" in ii:
+                new_data[ii] = dd
+            else:
+                if idx is not None:
+                    new_data[ii] = dd[idx]
+                else:
+                    new_data[ii] = dd
+        return new_data
+
+    def _load_batch_set(self, set_name: DPPath):
+        if not hasattr(self, "batch_set") or self.get_numb_set() > 1:
+            self.batch_set = self._load_set(set_name)
+            if self.modifier is not None:
+                self.modifier.modify_data(self.batch_set, self)
+        self.batch_set, _ = self._shuffle_data(self.batch_set)
+        self.reset_get_batch()
+
+    def reset_get_batch(self):
+        self.iterator = 0
+
+    def _load_test_set(self, set_name: DPPath, shuffle_test):
+        self.test_set = self._load_set(set_name)
+        if shuffle_test:
+            self.test_set, _ = self._shuffle_data(self.test_set)
+
+    def _shuffle_data(self, data):
+        ret = {}
+        nframes = data["coord"].shape[0]
+        idx = np.arange(nframes)
+        # the training times of each frame
+        idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,)))
+        dp_random.shuffle(idx)
+        for kk in data:
+            if (
+                type(data[kk]) == np.ndarray
+                and len(data[kk].shape) == 2
+                and data[kk].shape[0] == nframes
+                and "find_" not in kk
+            ):
+                ret[kk] = data[kk][idx]
+            else:
+                ret[kk] = data[kk]
+        return ret, idx
+
+    def _get_nframes(self, set_name: DPPath):
+        # get nframes
+        if not isinstance(set_name, DPPath):
+            set_name = DPPath(set_name)
+        path = set_name / "coord.npy"
+        if self.data_dict["coord"]["high_prec"]:
+            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        else:
+            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
+        if coord.ndim == 1:
+            coord = coord.reshape([1, -1])
+        nframes = coord.shape[0]
+        return nframes
+
+    def reformat_data_torch(self, data):
+        """Modify the data format for the requirements of Torch backend.
+
+        Parameters
+        ----------
+        data
+            original data
+        """
+        for kk in self.data_dict.keys():
+            if "find_" in kk:
+                pass
+            else:
+                if kk in data and self.data_dict[kk]["atomic"]:
+                    data[kk] = data[kk].reshape(-1, self.data_dict[kk]["ndof"])
+        data["atype"] = data["type"]
+        if not self.pbc:
+            data["box"] = None
+        return data
+
+    def _load_set(self, set_name: DPPath):
+        # get nframes
+        if not isinstance(set_name, DPPath):
+            set_name = DPPath(set_name)
+        path = set_name / "coord.npy"
+        if self.data_dict["coord"]["high_prec"]:
+            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        else:
+            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
+        if coord.ndim == 1:
+            coord = coord.reshape([1, -1])
+        nframes = coord.shape[0]
+        assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms
+        # load keys
+        data = {}
+        for kk in self.data_dict.keys():
+            if self.data_dict[kk]["reduce"] is None:
+                data["find_" + kk], data[kk] = self._load_data(
+                    set_name,
+                    kk,
+                    nframes,
+                    self.data_dict[kk]["ndof"],
+                    atomic=self.data_dict[kk]["atomic"],
+                    high_prec=self.data_dict[kk]["high_prec"],
+                    must=self.data_dict[kk]["must"],
+                    type_sel=self.data_dict[kk]["type_sel"],
+                    repeat=self.data_dict[kk]["repeat"],
+                    default=self.data_dict[kk]["default"],
+                    dtype=self.data_dict[kk]["dtype"],
+                    output_natoms_for_type_sel=self.data_dict[kk][
+                        "output_natoms_for_type_sel"
+                    ],
+                )
+        for kk in self.data_dict.keys():
+            if self.data_dict[kk]["reduce"] is not None:
+                k_in = self.data_dict[kk]["reduce"]
+                ndof = self.data_dict[kk]["ndof"]
+                data["find_" + kk] = data["find_" + k_in]
+                tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION)
+                data[kk] = np.sum(
+                    np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1
+                )
+
+        if self.mixed_type:
+            # nframes x natoms
+            atom_type_mix = self._load_type_mix(set_name)
+            if self.enforce_type_map:
+                try:
+                    atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32)
+                except IndexError as e:
+                    raise IndexError(
+                        f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!"
+                    ) from e
+                atom_type_mix = atom_type_mix_
+            real_type = atom_type_mix.reshape([nframes, self.natoms])
+            data["type"] = real_type
+            natoms = data["type"].shape[1]
+            # nframes x ntypes
+            atom_type_nums = np.array(
+                [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
+                dtype=np.int32,
+            ).T
+            ghost_nums = np.array(
+                [(real_type == -1).sum(axis=-1)],
+                dtype=np.int32,
+            ).T
+            assert (
+                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
+            ).all(), f"some types in 'real_atom_types.npy' of set {set_name} are not contained in {self.get_ntypes()} types!"
+            data["real_natoms_vec"] = np.concatenate(
+                (
+                    np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
+                    atom_type_nums,
+                ),
+                axis=-1,
+            )
+        else:
+            data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
+
+        return data
+
+    def _load_data(
+        self,
+        set_name,
+        key,
+        nframes,
+        ndof_,
+        atomic=False,
+        must=True,
+        repeat=1,
+        high_prec=False,
+        type_sel=None,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+        output_natoms_for_type_sel: bool = False,
+    ):
+        if atomic:
+            natoms = self.natoms
+            idx_map = self.idx_map
+            # if type_sel, then revise natoms and idx_map
+            if type_sel is not None:
+                natoms_sel = 0
+                for jj in type_sel:
+                    natoms_sel += np.sum(self.atom_type == jj)
+                idx_map_sel = self._idx_map_sel(self.atom_type, type_sel)
+            else:
+                natoms_sel = natoms
+                idx_map_sel = idx_map
+            ndof = ndof_ * natoms
+        else:
+            ndof = ndof_
+            natoms_sel = 0
+            idx_map_sel = None
+        if dtype is not None:
+            pass
+        elif high_prec:
+            dtype = GLOBAL_ENER_FLOAT_PRECISION
+        else:
+            dtype = GLOBAL_NP_FLOAT_PRECISION
+        path = set_name / (key + ".npy")
+        if path.is_file():
+            data = path.load_numpy().astype(dtype)
+            try:  # YWolfeee: deal with data shape error
+                if atomic:
+                    if type_sel is not None:
+                        # check the data shape is nsel or natoms
+                        if data.size == nframes * natoms_sel * ndof_:
+                            if output_natoms_for_type_sel:
+                                tmp = np.zeros(
+                                    [nframes, natoms, ndof_], dtype=data.dtype
+                                )
+                                sel_mask = np.isin(self.atom_type, type_sel)
+                                tmp[:, sel_mask] = data.reshape(
+                                    [nframes, natoms_sel, ndof_]
+                                )
+                                data = tmp
+                            else:
+                                natoms = natoms_sel
+                                idx_map = idx_map_sel
+                                ndof = ndof_ * natoms
+                        elif data.size == nframes * natoms * ndof_:
+                            if output_natoms_for_type_sel:
+                                pass
+                            else:
+                                sel_mask = np.isin(self.atom_type, type_sel)
+                                data = data.reshape([nframes, natoms, ndof_])
+                                data = data[:, sel_mask]
+                                natoms = natoms_sel
+                                idx_map = idx_map_sel
+                                ndof = ndof_ * natoms
+                        else:
+                            raise ValueError(
+                                f"The shape of the data {key} in {set_name}"
+                                f"is {data.shape}, which doesn't match either"
+                                f"({nframes}, {natoms_sel}, {ndof_}) or"
+                                f"({nframes}, {natoms}, {ndof_})"
+                            )
+                    data = data.reshape([nframes, natoms, -1])
+                    data = data[:, idx_map, :]
+                    data = data.reshape([nframes, -1])
+                data = np.reshape(data, [nframes, ndof])
+            except ValueError as err_message:
+                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
+                log.error(str(err_message))
+                log.error(explanation)
+                raise ValueError(str(err_message) + ". " + explanation) from err_message
+            if repeat != 1:
+                data = np.repeat(data, repeat).reshape([nframes, -1])
+            return np.float32(1.0), data
+        elif must:
+            raise RuntimeError("%s not found!" % path)
+        else:
+            if atomic and type_sel is not None and not output_natoms_for_type_sel:
+                ndof = ndof_ * natoms_sel
+            data = np.full([nframes, ndof], default, dtype=dtype)
+            if repeat != 1:
+                data = np.repeat(data, repeat).reshape([nframes, -1])
+            return np.float32(0.0), data
+
+    def _load_type(self, sys_path: DPPath):
+        atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32)
+        return atom_type
+
+    def _load_type_mix(self, set_name: DPPath):
+        type_path = set_name / "real_atom_types.npy"
+        real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms])
+        return real_type
+
+    def _make_idx_map(self, atom_type):
+        natoms = atom_type.shape[0]
+        idx = np.arange(natoms)
+        if self.sort_atoms:
+            idx_map = np.lexsort((idx, atom_type))
+        else:
+            idx_map = idx
+        return idx_map
+
+    def _load_type_map(self, sys_path: DPPath):
+        fname = sys_path / "type_map.raw"
+        if fname.is_file():
+            return fname.load_txt(dtype=str, ndmin=1).tolist()
+        else:
+            return None
+
+    def _check_pbc(self, sys_path: DPPath):
+        pbc = True
+        if (sys_path / "nopbc").is_file():
+            pbc = False
+        return pbc
+
+    def _check_mode(self, set_path: DPPath):
+        return (set_path / "real_atom_types.npy").is_file()
+
+
+class DataRequirementItem:
+    """A class to store the data requirement for data systems.
+
+    Parameters
+    ----------
+    key
+        The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+    ndof
+        The number of dof
+    atomic
+        The item is an atomic property.
+        If False, the size of the data should be nframes x ndof
+        If True, the size of data should be nframes x natoms x ndof
+    must
+        The data file `sys_path/set.*/key.npy` must exist.
+        If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+    high_prec
+        Load the data and store in float64, otherwise in float32
+    type_sel
+        Select certain type of atoms
+    repeat
+        The data will be repeated `repeat` times.
+    default : float, default=0.
+        default value of data
+    dtype : np.dtype, optional
+        the dtype of data, overwrites `high_prec` if provided
+    output_natoms_for_type_sel : bool, optional
+        if True and type_sel is True, the atomic dimension will be natoms instead of nsel
+    """
+
+    def __init__(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+        output_natoms_for_type_sel: bool = False,
+    ) -> None:
+        self.key = key
+        self.ndof = ndof
+        self.atomic = atomic
+        self.must = must
+        self.high_prec = high_prec
+        self.type_sel = type_sel
+        self.repeat = repeat
+        self.default = default
+        self.dtype = dtype
+        self.output_natoms_for_type_sel = output_natoms_for_type_sel
+        self.dict = self.to_dict()
+
+    def to_dict(self) -> dict:
+        return {
+            "key": self.key,
+            "ndof": self.ndof,
+            "atomic": self.atomic,
+            "must": self.must,
+            "high_prec": self.high_prec,
+            "type_sel": self.type_sel,
+            "repeat": self.repeat,
+            "default": self.default,
+            "dtype": self.dtype,
+            "output_natoms_for_type_sel": self.output_natoms_for_type_sel,
+        }
 
-__all__ = [
-    "DeepmdData",
-]
+    def __getitem__(self, key: str):
+        if key not in self.dict:
+            raise KeyError(key)
+        return self.dict[key]
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 65e87d8ebc..640083bc33 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -1,13 +1,812 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.data_system import (
-    DeepmdDataSystem,
-    prob_sys_size_ext,
-    process_sys_probs,
-)
-
-__all__ = [
-    "DeepmdDataSystem",
-    "process_sys_probs",
-    "prob_sys_size_ext",
-]
+import collections
+import logging
+import warnings
+from functools import (
+    lru_cache,
+)
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+import deepmd.utils.random as dp_random
+from deepmd.common import (
+    data_requirement,
+    expand_sys_str,
+    j_must_have,
+    make_default_mesh,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
+from deepmd.utils.out_stat import (
+    compute_stats_from_redu,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+class DeepmdDataSystem:
+    """Class for manipulating many data systems.
+
+    It is implemented with the help of DeepmdData
+    """
+
+    def __init__(
+        self,
+        systems: List[str],
+        batch_size: int,
+        test_size: int,
+        rcut: Optional[float] = None,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: Optional[List[str]] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set=False,
+        sys_probs=None,
+        auto_prob_style="prob_sys_size",
+        sort_atoms: bool = True,
+    ):
+        """Constructor.
+
+        Parameters
+        ----------
+        systems
+            Specifying the paths to systems
+        batch_size
+            The batch size
+        test_size
+            The size of test data
+        rcut
+            The cut-off radius. Not used.
+        set_prefix
+            Prefix for the directories of different sets
+        shuffle_test
+            If the test data are shuffled
+        type_map
+            Gives the name of different atom types
+        optional_type_map
+            If the type_map.raw in each system is optional
+        modifier
+            Data modifier that has the method `modify_data`
+        trn_all_set
+            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
+        sys_probs : list of float
+            The probabilitis of systems to get the batch.
+            Summation of positive elements of this list should be no greater than 1.
+            Element of this list can be negative, the probability of the corresponding system is determined
+                automatically by the number of batches in the system.
+        auto_prob_style : str
+            Determine the probability of systems automatically. The method is assigned by this key and can be
+            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
+            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
+            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." :
+                                the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`,
+                                where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system,
+                                the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional
+                to the number of batches in the system.
+        sort_atoms : bool
+            Sort atoms by atom types. Required to enable when the data is directly feeded to
+            descriptors except mixed types.
+        """
+        # init data
+        del rcut
+        self.system_dirs = systems
+        self.nsystems = len(self.system_dirs)
+        self.data_systems = []
+        for ii in self.system_dirs:
+            self.data_systems.append(
+                DeepmdData(
+                    ii,
+                    set_prefix=set_prefix,
+                    shuffle_test=shuffle_test,
+                    type_map=type_map,
+                    optional_type_map=optional_type_map,
+                    modifier=modifier,
+                    trn_all_set=trn_all_set,
+                    sort_atoms=sort_atoms,
+                )
+            )
+        # check mix_type format
+        error_format_msg = (
+            "if one of the system is of mixed_type format, "
+            "then all of the systems should be of mixed_type format!"
+        )
+        if self.data_systems[0].mixed_type:
+            for data_sys in self.data_systems[1:]:
+                assert data_sys.mixed_type, error_format_msg
+            self.mixed_type = True
+        else:
+            for data_sys in self.data_systems[1:]:
+                assert not data_sys.mixed_type, error_format_msg
+            self.mixed_type = False
+        # batch size
+        self.batch_size = batch_size
+        is_auto_bs = False
+        self.mixed_systems = False
+        if isinstance(self.batch_size, int):
+            self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
+        elif isinstance(self.batch_size, str):
+            words = self.batch_size.split(":")
+            if "auto" == words[0]:
+                is_auto_bs = True
+                rule = 32
+                if len(words) == 2:
+                    rule = int(words[1])
+                self.batch_size = self._make_auto_bs(rule)
+            elif "mixed" == words[0]:
+                self.mixed_type = True
+                self.mixed_systems = True
+                if len(words) == 2:
+                    rule = int(words[1])
+                else:
+                    raise RuntimeError("batch size must be specified for mixed systems")
+                self.batch_size = rule * np.ones(self.nsystems, dtype=int)
+            else:
+                raise RuntimeError("unknown batch_size rule " + words[0])
+        elif isinstance(self.batch_size, list):
+            pass
+        else:
+            raise RuntimeError("invalid batch_size")
+        assert isinstance(self.batch_size, (list, np.ndarray))
+        assert len(self.batch_size) == self.nsystems
+
+        # natoms, nbatches
+        ntypes = []
+        for ii in self.data_systems:
+            ntypes.append(ii.get_ntypes())
+        self.sys_ntypes = max(ntypes)
+        self.natoms = []
+        self.natoms_vec = []
+        self.nbatches = []
+        type_map_list = []
+        for ii in range(self.nsystems):
+            self.natoms.append(self.data_systems[ii].get_natoms())
+            self.natoms_vec.append(
+                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
+            )
+            self.nbatches.append(
+                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
+            )
+            type_map_list.append(self.data_systems[ii].get_type_map())
+        self.type_map = self._check_type_map_consistency(type_map_list)
+
+        # ! altered by Marián Rynik
+        # test size
+        # now test size can be set as a percentage of systems data or test size
+        # can be set for each system individualy in the same manner as batch
+        # size. This enables one to use systems with diverse number of
+        # structures and different number of atoms.
+        self.test_size = test_size
+        if isinstance(self.test_size, int):
+            self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
+        elif isinstance(self.test_size, str):
+            words = self.test_size.split("%")
+            try:
+                percent = int(words[0])
+            except ValueError:
+                raise RuntimeError("unknown test_size rule " + words[0])
+            self.test_size = self._make_auto_ts(percent)
+        elif isinstance(self.test_size, list):
+            pass
+        else:
+            raise RuntimeError("invalid test_size")
+        assert isinstance(self.test_size, (list, np.ndarray))
+        assert len(self.test_size) == self.nsystems
+
+        # init pick idx
+        self.pick_idx = 0
+
+        # derive system probabilities
+        self.sys_probs = None
+        self.set_sys_probs(sys_probs, auto_prob_style)
+
+        # check batch and test size
+        for ii in range(self.nsystems):
+            chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
+            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
+                warnings.warn(
+                    "system %s required batch size is larger than the size of the dataset %s (%d > %d)"
+                    % (
+                        self.system_dirs[ii],
+                        chk_ret[0],
+                        self.batch_size[ii],
+                        chk_ret[1],
+                    )
+                )
+            chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
+            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
+                warnings.warn(
+                    "system %s required test size is larger than the size of the dataset %s (%d > %d)"
+                    % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])
+                )
+
+    def _load_test(self, ntests=-1):
+        self.test_data = collections.defaultdict(list)
+        for ii in range(self.nsystems):
+            test_system_data = self.data_systems[ii].get_test(ntests=ntests)
+            for nn in test_system_data:
+                self.test_data[nn].append(test_system_data[nn])
+
+    @property
+    @lru_cache(maxsize=None)
+    def default_mesh(self) -> List[np.ndarray]:
+        """Mesh for each system."""
+        return [
+            make_default_mesh(
+                self.data_systems[ii].pbc, self.data_systems[ii].mixed_type
+            )
+            for ii in range(self.nsystems)
+        ]
+
+    def compute_energy_shift(self, rcond=None, key="energy"):
+        sys_ener = []
+        for ss in self.data_systems:
+            sys_ener.append(ss.avg(key))
+        sys_ener = np.concatenate(sys_ener)
+        sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
+        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
+        sys_tynatom = sys_tynatom[:, 2:]
+        energy_shift, _ = compute_stats_from_redu(
+            sys_ener.reshape(-1, 1),
+            sys_tynatom,
+            rcond=rcond,
+        )
+        return energy_shift.ravel()
+
+    def add_dict(self, adict: dict) -> None:
+        """Add items to the data system by a `dict`.
+        `adict` should have items like
+        .. code-block:: python.
+
+           adict[key] = {
+               "ndof": ndof,
+               "atomic": atomic,
+               "must": must,
+               "high_prec": high_prec,
+               "type_sel": type_sel,
+               "repeat": repeat,
+           }
+
+        For the explaination of the keys see `add`
+        """
+        for kk in adict:
+            self.add(
+                kk,
+                adict[kk]["ndof"],
+                atomic=adict[kk]["atomic"],
+                must=adict[kk]["must"],
+                high_prec=adict[kk]["high_prec"],
+                type_sel=adict[kk]["type_sel"],
+                repeat=adict[kk]["repeat"],
+                default=adict[kk]["default"],
+                dtype=adict[kk].get("dtype"),
+                output_natoms_for_type_sel=adict[kk].get(
+                    "output_natoms_for_type_sel", False
+                ),
+            )
+
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+        output_natoms_for_type_sel: bool = False,
+    ):
+        """Add a data item that to be loaded.
+
+        Parameters
+        ----------
+        key
+            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+        ndof
+            The number of dof
+        atomic
+            The item is an atomic property.
+            If False, the size of the data should be nframes x ndof
+            If True, the size of data should be nframes x natoms x ndof
+        must
+            The data file `sys_path/set.*/key.npy` must exist.
+            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+        high_prec
+            Load the data and store in float64, otherwise in float32
+        type_sel
+            Select certain type of atoms
+        repeat
+            The data will be repeated `repeat` times.
+        default, default=0.
+            Default value of data
+        dtype
+            The dtype of data, overwrites `high_prec` if provided
+        output_natoms_for_type_sel : bool
+            If True and type_sel is True, the atomic dimension will be natoms instead of nsel
+        """
+        for ii in self.data_systems:
+            ii.add(
+                key,
+                ndof,
+                atomic=atomic,
+                must=must,
+                high_prec=high_prec,
+                repeat=repeat,
+                type_sel=type_sel,
+                default=default,
+                dtype=dtype,
+                output_natoms_for_type_sel=output_natoms_for_type_sel,
+            )
+
+    def reduce(self, key_out, key_in):
+        """Generate a new item from the reduction of another atom.
+
+        Parameters
+        ----------
+        key_out
+            The name of the reduced item
+        key_in
+            The name of the data item to be reduced
+        """
+        for ii in self.data_systems:
+            ii.reduce(key_out, key_in)
+
+    def get_data_dict(self, ii: int = 0) -> dict:
+        return self.data_systems[ii].get_data_dict()
+
+    def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
+        if sys_probs is None:
+            if auto_prob_style == "prob_uniform":
+                prob_v = 1.0 / float(self.nsystems)
+                probs = [prob_v for ii in range(self.nsystems)]
+            elif auto_prob_style[:13] == "prob_sys_size":
+                if auto_prob_style == "prob_sys_size":
+                    prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0"
+                else:
+                    prob_style = auto_prob_style
+                probs = prob_sys_size_ext(
+                    prob_style, self.get_nsystems(), self.nbatches
+                )
+            else:
+                raise RuntimeError("Unknown auto prob style: " + auto_prob_style)
+        else:
+            probs = process_sys_probs(sys_probs, self.nbatches)
+        self.sys_probs = probs
+
+    def get_batch(self, sys_idx: Optional[int] = None) -> dict:
+        # batch generation style altered by Ziyao Li:
+        # one should specify the "sys_prob" and "auto_prob_style" params
+        # via set_sys_prob() function. The sys_probs this function uses is
+        # defined as a private variable, self.sys_probs, initialized in __init__().
+        # This is to optimize the (vain) efforts in evaluating sys_probs every batch.
+        """Get a batch of data from the data systems.
+
+        Parameters
+        ----------
+        sys_idx : int
+            The index of system from which the batch is get.
+            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
+            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+            This option does not work for mixed systems.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        if not self.mixed_systems:
+            b_data = self.get_batch_standard(sys_idx)
+        else:
+            b_data = self.get_batch_mixed()
+        return b_data
+
+    def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict:
+        """Get a batch of data from the data systems in the standard way.
+
+        Parameters
+        ----------
+        sys_idx : int
+            The index of system from which the batch is get.
+            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
+            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        if sys_idx is not None:
+            self.pick_idx = sys_idx
+        else:
+            # prob = self._get_sys_probs(sys_probs, auto_prob_style)
+            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
+        b_data = self.data_systems[self.pick_idx].get_batch(
+            self.batch_size[self.pick_idx]
+        )
+        b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
+        b_data["default_mesh"] = self.default_mesh[self.pick_idx]
+        return b_data
+
+    def get_batch_mixed(self) -> dict:
+        """Get a batch of data from the data systems in the mixed way.
+
+        Returns
+        -------
+        dict
+            The batch data
+        """
+        # mixed systems have a global batch size
+        batch_size = self.batch_size[0]
+        batch_data = []
+        for _ in range(batch_size):
+            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
+            bb_data = self.data_systems[self.pick_idx].get_batch(1)
+            bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
+            bb_data["default_mesh"] = self.default_mesh[self.pick_idx]
+            batch_data.append(bb_data)
+        b_data = self._merge_batch_data(batch_data)
+        return b_data
+
+    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
+        """Merge batch data from different systems.
+
+        Parameters
+        ----------
+        batch_data : list of dict
+            A list of batch data from different systems.
+
+        Returns
+        -------
+        dict
+            The merged batch data.
+        """
+        b_data = {}
+        max_natoms = max(bb["natoms_vec"][0] for bb in batch_data)
+        # natoms_vec
+        natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int)
+        natoms_vec[0:3] = max_natoms
+        b_data["natoms_vec"] = natoms_vec
+        # real_natoms_vec
+        real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data])
+        b_data["real_natoms_vec"] = real_natoms_vec
+        # type
+        type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int)
+        for ii, bb in enumerate(batch_data):
+            type_vec[ii, : bb["type"].shape[1]] = bb["type"][0]
+        b_data["type"] = type_vec
+        # default_mesh
+        default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0)
+        b_data["default_mesh"] = default_mesh
+        # other data
+        data_dict = self.get_data_dict(0)
+        for kk, vv in data_dict.items():
+            if kk not in batch_data[0]:
+                continue
+            b_data["find_" + kk] = batch_data[0]["find_" + kk]
+            if not vv["atomic"]:
+                b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0)
+            else:
+                b_data[kk] = np.zeros(
+                    (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]),
+                    dtype=batch_data[0][kk].dtype,
+                )
+                for ii, bb in enumerate(batch_data):
+                    b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0]
+        return b_data
+
+    # ! altered by Marián Rynik
+    def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1):  # depreciated
+        """Get test data from the the data systems.
+
+        Parameters
+        ----------
+        sys_idx
+            The test dat of system with index `sys_idx` will be returned.
+            If is None, the currently selected system will be returned.
+        n_test
+            Number of test data. If set to -1 all test data will be get.
+        """
+        if not hasattr(self, "test_data"):
+            self._load_test(ntests=n_test)
+        if sys_idx is not None:
+            idx = sys_idx
+        else:
+            idx = self.pick_idx
+
+        test_system_data = {}
+        for nn in self.test_data:
+            test_system_data[nn] = self.test_data[nn][idx]
+        test_system_data["natoms_vec"] = self.natoms_vec[idx]
+        test_system_data["default_mesh"] = self.default_mesh[idx]
+        return test_system_data
+
+    def get_sys_ntest(self, sys_idx=None):
+        """Get number of tests for the currently selected system,
+        or one defined by sys_idx.
+        """
+        if sys_idx is not None:
+            return self.test_size[sys_idx]
+        else:
+            return self.test_size[self.pick_idx]
+
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.type_map
+
+    def get_nbatches(self) -> int:
+        """Get the total number of batches."""
+        return self.nbatches
+
+    def get_ntypes(self) -> int:
+        """Get the number of types."""
+        return self.sys_ntypes
+
+    def get_nsystems(self) -> int:
+        """Get the number of data systems."""
+        return self.nsystems
+
+    def get_sys(self, idx: int) -> DeepmdData:
+        """Get a certain data system."""
+        return self.data_systems[idx]
+
+    def get_batch_size(self) -> int:
+        """Get the batch size."""
+        return self.batch_size
+
+    def print_summary(self, name: str):
+        print_summary(
+            name,
+            self.nsystems,
+            self.system_dirs,
+            self.natoms,
+            self.batch_size,
+            self.nbatches,
+            self.sys_probs,
+            [ii.pbc for ii in self.data_systems],
+        )
+
+    def _make_auto_bs(self, rule):
+        bs = []
+        for ii in self.data_systems:
+            ni = ii.get_natoms()
+            bsi = rule // ni
+            if bsi * ni < rule:
+                bsi += 1
+            bs.append(bsi)
+        return bs
+
+    # ! added by Marián Rynik
+    def _make_auto_ts(self, percent):
+        ts = []
+        for ii in range(self.nsystems):
+            ni = self.batch_size[ii] * self.nbatches[ii]
+            tsi = int(ni * percent / 100)
+            ts.append(tsi)
+
+        return ts
+
+    def _check_type_map_consistency(self, type_map_list):
+        ret = []
+        for ii in type_map_list:
+            if ii is not None:
+                min_len = min([len(ii), len(ret)])
+                for idx in range(min_len):
+                    if ii[idx] != ret[idx]:
+                        raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}")
+                if len(ii) > len(ret):
+                    ret = ii
+        return ret
+
+
+def _format_name_length(name, width):
+    if len(name) <= width:
+        return "{: >{}}".format(name, width)
+    else:
+        name = name[-(width - 3) :]
+        name = "-- " + name
+        return name
+
+
+def print_summary(
+    name: str,
+    nsystems: int,
+    system_dirs: List[str],
+    natoms: List[int],
+    batch_size: List[int],
+    nbatches: List[int],
+    sys_probs: List[float],
+    pbc: List[bool],
+):
+    """Print summary of systems.
+
+    Parameters
+    ----------
+    name : str
+        The name of the system
+    nsystems : int
+        The number of systems
+    system_dirs : list of str
+        The directories of the systems
+    natoms : list of int
+        The number of atoms
+    batch_size : list of int
+        The batch size
+    nbatches : list of int
+        The number of batches
+    sys_probs : list of float
+        The probabilities
+    pbc : list of bool
+        The periodic boundary conditions
+    """
+    # width 65
+    sys_width = 42
+    log.info(
+        f"---Summary of DataSystem: {name:13s}-----------------------------------------------"
+    )
+    log.info("found %d system(s):" % nsystems)
+    log.info(
+        ("%s  " % _format_name_length("system", sys_width))
+        + ("%6s  %6s  %6s  %9s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
+    )
+    for ii in range(nsystems):
+        log.info(
+            "%s  %6d  %6d  %6d  %9.3e  %3s"
+            % (
+                _format_name_length(system_dirs[ii], sys_width),
+                natoms[ii],
+                batch_size[ii],
+                nbatches[ii],
+                sys_probs[ii],
+                "T" if pbc[ii] else "F",
+            )
+        )
+    log.info(
+        "--------------------------------------------------------------------------------------"
+    )
+
+
+def process_sys_probs(sys_probs, nbatch):
+    sys_probs = np.array(sys_probs)
+    type_filter = sys_probs >= 0
+    assigned_sum_prob = np.sum(type_filter * sys_probs)
+    # 1e-8 is to handle floating point error; See #1917
+    assert (
+        assigned_sum_prob <= 1.0 + 1e-8
+    ), "the sum of assigned probability should be less than 1"
+    rest_sum_prob = 1.0 - assigned_sum_prob
+    if not np.isclose(rest_sum_prob, 0):
+        rest_nbatch = (1 - type_filter) * nbatch
+        rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
+        ret_prob = rest_prob + type_filter * sys_probs
+    else:
+        ret_prob = sys_probs
+    assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
+    return ret_prob
+
+
+def prob_sys_size_ext(keywords, nsystems, nbatch):
+    block_str = keywords.split(";")[1:]
+    block_stt = []
+    block_end = []
+    block_weights = []
+    for ii in block_str:
+        stt = int(ii.split(":")[0])
+        end = int(ii.split(":")[1])
+        weight = float(ii.split(":")[2])
+        assert weight >= 0, "the weight of a block should be no less than 0"
+        block_stt.append(stt)
+        block_end.append(end)
+        block_weights.append(weight)
+    nblocks = len(block_str)
+    block_probs = np.array(block_weights) / np.sum(block_weights)
+    sys_probs = np.zeros([nsystems])
+    for ii in range(nblocks):
+        nbatch_block = nbatch[block_stt[ii] : block_end[ii]]
+        tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
+        sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
+    return sys_probs
+
+
+def process_systems(systems: Union[str, List[str]]) -> List[str]:
+    """Process the user-input systems.
+
+    If it is a single directory, search for all the systems in the directory.
+    Check if the systems are valid.
+
+    Parameters
+    ----------
+    systems : str or list of str
+        The user-input systems
+
+    Returns
+    -------
+    list of str
+        The valid systems
+    """
+    if isinstance(systems, str):
+        systems = expand_sys_str(systems)
+    elif isinstance(systems, list):
+        systems = systems.copy()
+    help_msg = "Please check your setting for data systems"
+    # check length of systems
+    if len(systems) == 0:
+        msg = "cannot find valid a data system"
+        log.fatal(msg)
+        raise OSError(msg, help_msg)
+    # rougly check all items in systems are valid
+    for ii in systems:
+        ii = DPPath(ii)
+        if not ii.is_dir():
+            msg = f"dir {ii} is not a valid dir"
+            log.fatal(msg)
+            raise OSError(msg, help_msg)
+        if not (ii / "type.raw").is_file():
+            msg = f"dir {ii} is not a valid data system dir"
+            log.fatal(msg)
+            raise OSError(msg, help_msg)
+    return systems
+
+
+def get_data(
+    jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=False
+) -> DeepmdDataSystem:
+    """Get the data system.
+
+    Parameters
+    ----------
+    jdata
+        The json data
+    rcut
+        The cut-off radius, not used
+    type_map
+        The type map
+    modifier
+        The data modifier
+    multi_task_mode
+        If in multi task mode
+
+    Returns
+    -------
+    DeepmdDataSystem
+        The data system
+    """
+    systems = j_must_have(jdata, "systems")
+    systems = process_systems(systems)
+
+    batch_size = j_must_have(jdata, "batch_size")
+    sys_probs = jdata.get("sys_probs", None)
+    auto_prob = jdata.get("auto_prob", "prob_sys_size")
+    optional_type_map = not multi_task_mode
+
+    data = DeepmdDataSystem(
+        systems=systems,
+        batch_size=batch_size,
+        test_size=1,  # to satisfy the old api
+        shuffle_test=True,  # to satisfy the old api
+        rcut=rcut,
+        type_map=type_map,
+        optional_type_map=optional_type_map,
+        modifier=modifier,
+        trn_all_set=True,  # sample from all sets
+        sys_probs=sys_probs,
+        auto_prob_style=auto_prob,
+    )
+    data.add_dict(data_requirement)
+
+    return data
diff --git a/deepmd/utils/econf_embd.py b/deepmd/utils/econf_embd.py
new file mode 100644
index 0000000000..3940db65ba
--- /dev/null
+++ b/deepmd/utils/econf_embd.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+try:
+    import dpdata
+    from mendeleev import (
+        element,
+    )
+except ImportError:
+    pass
+
+###
+# made by command
+# ret = make_econf_embedding(type_map, flatten=True)
+# print_econf_embedding(ret)
+###
+# fmt: off
+electronic_configuration_embedding = \
+{ kk: np.array(vv, dtype=np.int32) for kk,vv in {
+  "H"  : [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "He" : [2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Li" : [2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Be" : [2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "B"  : [2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "C"  : [2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "N"  : [2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "O"  : [2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "F"  : [2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ne" : [2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Na" : [2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Mg" : [2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Al" : [2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Si" : [2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "P"  : [2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "S"  : [2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Cl" : [2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ar" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "K"  : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ca" : [2,2,2,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Sc" : [2,2,2,2,2,2,2,2,2,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ti" : [2,2,2,2,2,2,2,2,2,1,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "V"  : [2,2,2,2,2,2,2,2,2,1,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Cr" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Mn" : [2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Fe" : [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Co" : [2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ni" : [2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Cu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Zn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ga" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ge" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "As" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Se" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Br" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Kr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Rb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Sr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Y"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Zr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Nb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Mo" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Tc" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ru" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Rh" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Pd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Ag" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Cd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "In" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Sn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Sb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Te" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "I"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Xe" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
+  "Cs" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
+  "Ba" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "La" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Ce" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Pr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Nd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Pm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Sm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Eu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Gd" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Tb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Dy" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Ho" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Er" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Tm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Yb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Lu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Hf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Ta" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "W"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Re" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Os" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Ir" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Pt" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
+  "Au" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
+  "Hg" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],
+  "Tl" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0],
+  "Pb" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0],
+  "Bi" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0],
+  "Po" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0],
+  "At" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,1,0,0,0,0,0,0],
+  "Rn" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,0],
+  "Fr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,1],
+  "Ra" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,0,0,0,0,0,2],
+  "Ac" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2],
+  "Th" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,2,2,2,1,1,0,0,0,2],
+  "Pa" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,0,0,0,0,2,2,2,2,1,0,0,0,0,2],
+  "U"  : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,0,0,0,2,2,2,2,1,0,0,0,0,2],
+  "Np" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,0,0,0,2,2,2,2,1,0,0,0,0,2],
+  "Pu" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,2,2,2,2,0,0,0,0,0,2],
+  "Am" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2],
+  "Cm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,2,2,2,2,1,0,0,0,0,2],
+  "Bk" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,2,2,2,2,0,0,0,0,0,2],
+  "Cf" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,2,2,2,0,0,0,0,0,2],
+  "Es" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,2,2,0,0,0,0,0,2],
+  "Fm" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,0,0,0,0,0,2],
+  "Md" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,0,0,0,0,2],
+  "No" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,2],
+  "Lr" : [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,0,0,0,2],
+}.items()}
+# fmt: on
+
+ln_to_lett = {
+    0: "s",
+    1: "p",
+    2: "d",
+    3: "f",
+    4: "g",
+}
+lett_to_ln = {vv: kk for kk, vv in ln_to_lett.items()}
+
+conf_keys = [
+    (1, "s"),
+    (2, "s"),
+    (2, "p"),
+    (3, "s"),
+    (3, "p"),
+    (3, "d"),
+    (4, "s"),
+    (4, "p"),
+    (4, "d"),
+    (4, "f"),
+    (5, "s"),
+    (5, "p"),
+    (5, "d"),
+    (5, "f"),
+    (6, "s"),
+    (6, "p"),
+    (6, "d"),
+    (7, "s"),
+]
+
+maxn = 7
+maxl = maxn
+maxm = 2 * maxl + 1
+
+type_map = dpdata.periodic_table.ELEMENTS
+
+
+def make_empty_list_vec():
+    ret = {}
+    for kk in conf_keys:
+        ll = lett_to_ln[kk[1]]
+        ret[kk] = np.zeros([2 * ll + 1], dtype=np.int32)
+    return ret
+
+
+def flatten_list_vec(lv):
+    ret = np.array([], dtype=np.int32)
+    for kk in conf_keys:
+        ret = np.append(ret, lv[kk])
+    return ret
+
+
+def make_element_embedding_list_vec(
+    ename: str,
+) -> np.ndarray:
+    """Compute the embedding of one element."""
+    ret = make_empty_list_vec()
+    ele = element(ename)
+    ec = ele.ec
+    occ = ec.spin_occupations()
+    for kk, vv in occ.items():
+        assert kk in conf_keys
+        for ip in range(vv["pairs"]):
+            ret[kk][ip] = 2
+        for iu in range(vv["pairs"], vv["pairs"] + vv["unpaired"]):
+            ret[kk][iu] = 1
+    return ret
+
+
+def make_econf_embedding(types, flatten=True):
+    all_ret = {}
+    for ii in types:
+        ir = make_element_embedding_list_vec(ii)
+        if flatten:
+            ir = flatten_list_vec(ir)
+        all_ret[ii] = ir
+    return all_ret
+
+
+def print_econf_embedding(res):
+    for kk, vv in res.items():
+        vvstr = ",".join([str(ii) for ii in vv])
+        space = " " * (2 - len(kk))
+        print(f'"{kk}"{space} : [{vvstr}],')  # noqa: T201
diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py
new file mode 100644
index 0000000000..217c46844b
--- /dev/null
+++ b/deepmd/utils/env_mat_stat.py
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from collections import (
+    defaultdict,
+)
+from typing import (
+    Dict,
+    Iterator,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from deepmd.utils.path import (
+    DPPath,
+)
+
+log = logging.getLogger(__name__)
+
+
+class StatItem:
+    """A class to store the statistics of the environment matrix.
+
+    Parameters
+    ----------
+    number : int
+        The total size of given array.
+    sum : float
+        The sum value of the matrix.
+    squared_sum : float
+        The sum squared value of the matrix.
+    """
+
+    def __init__(self, number: int = 0, sum: float = 0, squared_sum: float = 0) -> None:
+        self.number = number
+        self.sum = sum
+        self.squared_sum = squared_sum
+
+    def __add__(self, other: "StatItem") -> "StatItem":
+        return StatItem(
+            number=self.number + other.number,
+            sum=self.sum + other.sum,
+            squared_sum=self.squared_sum + other.squared_sum,
+        )
+
+    def compute_avg(self, default: float = 0) -> float:
+        """Compute the average of the environment matrix.
+
+        Parameters
+        ----------
+        default : float, optional
+            The default value of the average, by default 0.
+
+        Returns
+        -------
+        float
+            The average of the environment matrix.
+        """
+        if self.number == 0:
+            return default
+        return self.sum / self.number
+
+    def compute_std(self, default: float = 1e-1, protection: float = 1e-2) -> float:
+        """Compute the standard deviation of the environment matrix.
+
+        Parameters
+        ----------
+        default : float, optional
+            The default value of the standard deviation, by default 1e-1.
+        protection : float, optional
+            The protection value for the standard deviation, by default 1e-2.
+
+        Returns
+        -------
+        float
+            The standard deviation of the environment matrix.
+        """
+        if self.number == 0:
+            return default
+        val = np.sqrt(
+            self.squared_sum / self.number
+            - np.multiply(self.sum / self.number, self.sum / self.number)
+        )
+        if np.abs(val) < protection:
+            val = protection
+        return val
+
+
+class EnvMatStat(ABC):
+    """A base class to store and calculate the statistics of the environment matrix."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.stats = defaultdict(StatItem)
+
+    def compute_stats(self, data: List[Dict[str, np.ndarray]]) -> None:
+        """Compute the statistics of the environment matrix.
+
+        Parameters
+        ----------
+        data : List[Dict[str, np.ndarray]]
+            The environment matrix.
+        """
+        if len(self.stats) > 0:
+            raise ValueError("The statistics has already been computed.")
+        for iter_stats in self.iter(data):
+            for kk in iter_stats:
+                self.stats[kk] += iter_stats[kk]
+
+    @abstractmethod
+    def iter(self, data: List[Dict[str, np.ndarray]]) -> Iterator[Dict[str, StatItem]]:
+        """Get the iterator of the environment matrix.
+
+        Parameters
+        ----------
+        data : List[Dict[str, np.ndarray]]
+            The environment matrix.
+
+        Yields
+        ------
+        Dict[str, StatItem]
+            The statistics of the environment matrix.
+        """
+
+    def save_stats(self, path: DPPath) -> None:
+        """Save the statistics of the environment matrix.
+
+        Parameters
+        ----------
+        path : DPH5Path
+            The path to save the statistics of the environment matrix.
+        """
+        if len(self.stats) == 0:
+            raise ValueError("The statistics hasn't been computed.")
+        for kk, vv in self.stats.items():
+            path.mkdir(parents=True, exist_ok=True)
+            (path / kk).save_numpy(np.array([vv.number, vv.sum, vv.squared_sum]))
+
+    def load_stats(self, path: DPPath) -> None:
+        """Load the statistics of the environment matrix.
+
+        Parameters
+        ----------
+        path : DPH5Path
+            The path to load the statistics of the environment matrix.
+        """
+        if len(self.stats) > 0:
+            raise ValueError("The statistics has already been computed.")
+        for kk in path.glob("*"):
+            arr = kk.load_numpy()
+            self.stats[kk.name] = StatItem(
+                number=arr[0],
+                sum=arr[1],
+                squared_sum=arr[2],
+            )
+
+    def load_or_compute_stats(
+        self, data: List[Dict[str, np.ndarray]], path: Optional[DPPath] = None
+    ) -> None:
+        """Load the statistics of the environment matrix if it exists, otherwise compute and save it.
+
+        Parameters
+        ----------
+        path : DPH5Path
+            The path to load the statistics of the environment matrix.
+        data : List[Dict[str, np.ndarray]]
+            The environment matrix.
+        """
+        if path is not None and path.is_dir():
+            self.load_stats(path)
+            log.info(f"Load stats from {path}.")
+        else:
+            self.compute_stats(data)
+            if path is not None:
+                self.save_stats(path)
+                log.info(f"Save stats to {path}.")
+
+    def get_avg(self, default: float = 0) -> Dict[str, float]:
+        """Get the average of the environment matrix.
+
+        Parameters
+        ----------
+        default : float, optional
+            The default value of the average, by default 0.
+
+        Returns
+        -------
+        Dict[str, float]
+            The average of the environment matrix.
+        """
+        return {kk: vv.compute_avg(default=default) for kk, vv in self.stats.items()}
+
+    def get_std(
+        self, default: float = 1e-1, protection: float = 1e-2
+    ) -> Dict[str, float]:
+        """Get the standard deviation of the environment matrix.
+
+        Parameters
+        ----------
+        default : float, optional
+            The default value of the standard deviation, by default 1e-1.
+        protection : float, optional
+            The protection value for the standard deviation, by default 1e-2.
+
+        Returns
+        -------
+        Dict[str, float]
+            The standard deviation of the environment matrix.
+        """
+        return {
+            kk: vv.compute_std(default=default, protection=protection)
+            for kk, vv in self.stats.items()
+        }
diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py
index 683131e48a..11f42ede96 100644
--- a/deepmd/utils/errors.py
+++ b/deepmd/utils/errors.py
@@ -1,19 +1,3 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd_utils.utils.errors import (
-    OutOfMemoryError,
-)
-
-
-class GraphTooLargeError(Exception):
-    """The graph is too large, exceeding protobuf's hard limit of 2GB."""
-
-
-class GraphWithoutTensorError(Exception):
-    pass
-
-
-__all__ = [
-    "OutOfMemoryError",
-    "GraphTooLargeError",
-    "GraphWithoutTensorError",
-]
+class OutOfMemoryError(Exception):
+    """This error is caused by out-of-memory (OOM)."""
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index cc6c0224de..1150fe2701 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -1,111 +1,140 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import json
 import logging
 from typing import (
-    Any,
-    Dict,
+    TYPE_CHECKING,
+    List,
 )
 
-from deepmd.utils.errors import (
-    GraphWithoutTensorError,
+import numpy as np
+
+from deepmd.infer.deep_eval import (
+    DeepEval,
 )
-from deepmd.utils.graph import (
-    get_tensor_by_name,
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
 )
 
+if TYPE_CHECKING:
+    pass
+
 log = logging.getLogger(__name__)
 
 
-def replace_model_params_with_pretrained_model(
-    jdata: Dict[str, Any], pretrained_model: str
+def change_energy_bias_lower(
+    data: DeepmdDataSystem,
+    dp: DeepEval,
+    origin_type_map: List[str],
+    full_type_map: List[str],
+    bias_atom_e: np.ndarray,
+    bias_adjust_mode="change-by-statistic",
+    ntest=10,
 ):
-    """Replace the model params in input script according to pretrained model.
+    """Change the energy bias according to the input data and the pretrained model.
 
     Parameters
     ----------
-    jdata : Dict[str, Any]
-        input script
-    pretrained_model : str
-        filename of the pretrained model
+    data : DeepmdDataSystem
+        The training data.
+    dp : str
+        The DeepEval object.
+    origin_type_map : list
+        The original type_map in dataset, they are targets to change the energy bias.
+    full_type_map : str
+        The full type_map in pretrained model
+    bias_atom_e : np.ndarray
+        The old energy bias in the pretrained model.
+    bias_adjust_mode : str
+        The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic']
+        'change-by-statistic' : perform predictions on energies of target dataset,
+                and do least sqaure on the errors to obtain the target shift as bias.
+        'set-by-statistic' : directly use the statistic energy bias in the target dataset.
+    ntest : int
+        The number of test samples in a system to change the energy bias.
     """
-    # Get the input script from the pretrained model
-    try:
-        t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script")
-    except GraphWithoutTensorError as e:
-        raise RuntimeError(
-            "The input frozen pretrained model: %s has no training script, "
-            "which is not supported to perform finetuning. "
-            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit."
-            % input
-        ) from e
-    pretrained_jdata = json.loads(t_jdata)
-
-    # Check the model type
-    assert (
-        pretrained_jdata["model"]["descriptor"]["type"]
-        in [
-            "se_atten",
-            "se_atten_v2",
-        ]
-        and pretrained_jdata["model"]["fitting_net"]["type"] in ["ener"]
-    ), "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!"
-
-    # Check the type map
-    pretrained_type_map = pretrained_jdata["model"]["type_map"]
-    cur_type_map = jdata["model"].get("type_map", [])
-    out_line_type = []
-    for i in cur_type_map:
-        if i not in pretrained_type_map:
-            out_line_type.append(i)
-    assert not out_line_type, (
-        f"{out_line_type!s} type(s) not contained in the pretrained model! "
-        "Please choose another suitable one."
-    )
-    if cur_type_map != pretrained_type_map:
-        log.info(
-            "Change the type_map from {} to {}.".format(
-                str(cur_type_map), str(pretrained_type_map)
-            )
+    type_numbs = []
+    energy_ground_truth = []
+    energy_predict = []
+    sorter = np.argsort(full_type_map)
+    idx_type_map = sorter[
+        np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+    ]
+    mixed_type = data.mixed_type
+    numb_type = len(full_type_map)
+    for sys in data.data_systems:
+        test_data = sys.get_test()
+        nframes = test_data["box"].shape[0]
+        numb_test = min(nframes, ntest)
+        if mixed_type:
+            atype = test_data["type"][:numb_test].reshape([numb_test, -1])
+        else:
+            atype = test_data["type"][0]
+        assert np.array(
+            [i in idx_type_map for i in list(set(atype.reshape(-1)))]
+        ).all(), "Some types are not in 'type_map'!"
+        energy_ground_truth.append(
+            test_data["energy"][:numb_test].reshape([numb_test, 1])
         )
-        jdata["model"]["type_map"] = pretrained_type_map
-
-    # Change model configurations
-    log.info("Change the model configurations according to the pretrained one...")
-    for config_key in ["type_embedding", "descriptor", "fitting_net"]:
-        if (
-            config_key not in jdata["model"].keys()
-            and config_key in pretrained_jdata["model"].keys()
-        ):
-            log.info(
-                "Add the '{}' from pretrained model: {}.".format(
-                    config_key, str(pretrained_jdata["model"][config_key])
+        if mixed_type:
+            type_numbs.append(
+                np.array(
+                    [(atype == i).sum(axis=-1) for i in idx_type_map],
+                    dtype=np.int32,
+                ).T
+            )
+        else:
+            type_numbs.append(
+                np.tile(
+                    np.bincount(atype, minlength=numb_type)[idx_type_map],
+                    (numb_test, 1),
                 )
             )
-            jdata["model"][config_key] = pretrained_jdata["model"][config_key]
-        elif (
-            config_key == "type_embedding"
-            and config_key in jdata["model"].keys()
-            and config_key not in pretrained_jdata["model"].keys()
-        ):
-            # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None.
-            cur_para = jdata["model"].pop(config_key)
-            if "trainable" in cur_para and not cur_para["trainable"]:
-                jdata["model"][config_key] = {
-                    "trainable": False,
-                    "activation_function": "None",
-                }
-                log.info("The type_embeddings from pretrained model will be frozen.")
-        elif (
-            config_key in jdata["model"].keys()
-            and config_key in pretrained_jdata["model"].keys()
-            and jdata["model"][config_key] != pretrained_jdata["model"][config_key]
-        ):
-            target_para = pretrained_jdata["model"][config_key]
-            cur_para = jdata["model"][config_key]
-            # keep some params that are irrelevant to model structures (need to discuss) TODO
-            if "trainable" in cur_para.keys():
-                target_para["trainable"] = cur_para["trainable"]
-            log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.")
-            jdata["model"][config_key] = target_para
-
-    return jdata, cur_type_map
+        if bias_adjust_mode == "change-by-statistic":
+            coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
+            if sys.pbc:
+                box = test_data["box"][:numb_test]
+            else:
+                box = None
+            if dp.get_dim_fparam() > 0:
+                fparam = test_data["fparam"][:numb_test]
+            else:
+                fparam = None
+            if dp.get_dim_aparam() > 0:
+                aparam = test_data["aparam"][:numb_test]
+            else:
+                aparam = None
+            ret = dp.eval(
+                coord,
+                box,
+                atype,
+                mixed_type=mixed_type,
+                fparam=fparam,
+                aparam=aparam,
+            )
+            energy_predict.append(ret[0].reshape([numb_test, 1]))
+    type_numbs = np.concatenate(type_numbs)
+    energy_ground_truth = np.concatenate(energy_ground_truth)
+    old_bias = bias_atom_e[idx_type_map]
+    if bias_adjust_mode == "change-by-statistic":
+        energy_predict = np.concatenate(energy_predict)
+        bias_diff = energy_ground_truth - energy_predict
+        delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
+        unbias_e = energy_predict + type_numbs @ delta_bias
+        atom_numbs = type_numbs.sum(-1)
+        rmse_ae = np.sqrt(
+            np.mean(
+                np.square((unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs)
+            )
+        )
+        bias_atom_e[idx_type_map] += delta_bias.reshape(-1)
+        log.info(
+            f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
+        )
+    elif bias_adjust_mode == "set-by-statistic":
+        statistic_bias = np.linalg.lstsq(type_numbs, energy_ground_truth, rcond=None)[0]
+        bias_atom_e[idx_type_map] = statistic_bias.reshape(-1)
+    else:
+        raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
+    log.info(
+        f"Change energy bias of {origin_type_map!s} from {old_bias!s} to {bias_atom_e[idx_type_map]!s}."
+    )
+    return bias_atom_e
diff --git a/deepmd/utils/hostlist.py b/deepmd/utils/hostlist.py
new file mode 100644
index 0000000000..c184b04031
--- /dev/null
+++ b/deepmd/utils/hostlist.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import socket
+from typing import (
+    List,
+    Tuple,
+)
+
+
+def get_host_names() -> Tuple[str, List[str]]:
+    """Get host names of all nodes in the cluster.
+
+    If mpi4py is not installed or MPI is not used, then the
+    host name of the current node is returned as those of all nodes.
+
+    Returns
+    -------
+    str
+        Host name of the current node
+    List[str]
+        List of host names of all nodes in the cluster
+    """
+    host_name = socket.gethostname()
+    try:
+        from mpi4py import (
+            MPI,
+        )
+    except ImportError:
+        return host_name, [host_name]
+
+    comm = MPI.COMM_WORLD
+    if comm.Get_size() == 1:
+        return host_name, [host_name]
+    host_names = comm.allgather(host_name)
+    return host_name, host_names
diff --git a/deepmd_utils/utils/model_stat.py b/deepmd/utils/model_stat.py
similarity index 100%
rename from deepmd_utils/utils/model_stat.py
rename to deepmd/utils/model_stat.py
diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
index fa9325937e..34200df007 100644
--- a/deepmd/utils/neighbor_stat.py
+++ b/deepmd/utils/neighbor_stat.py
@@ -1,41 +1,38 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 import math
+from abc import (
+    ABC,
+    abstractmethod,
+)
 from typing import (
-    List,
+    Iterator,
     Tuple,
 )
 
 import numpy as np
 
-from deepmd.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-    default_tf_session_config,
-    op_module,
-    tf,
-)
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.parallel_op import (
-    ParallelOp,
-)
 
 log = logging.getLogger(__name__)
 
 
-class NeighborStat:
-    """Class for getting training data information.
+class NeighborStat(ABC):
+    """Abstract base class for getting training data information.
 
-    It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.
+    It loads data from DeepmdData object, and measures the data info, including
+    neareest nbor distance between atoms, max nbor size of atoms and the output
+    data range of the environment matrix.
 
     Parameters
     ----------
-    ntypes
-            The num of atom types
-    rcut
-            The cut-off radius
-    one_type : bool, optional, default=False
+    ntypes : int
+        The num of atom types
+    rcut : float
+        The cut-off radius
+    mixed_type : bool, optional, default=False
         Treat all types as a single type.
     """
 
@@ -43,55 +40,13 @@ def __init__(
         self,
         ntypes: int,
         rcut: float,
-        one_type: bool = False,
+        mixed_type: bool = False,
     ) -> None:
-        """Constructor."""
         self.rcut = rcut
         self.ntypes = ntypes
-        self.one_type = one_type
-        sub_graph = tf.Graph()
-
-        def builder():
-            place_holders = {}
-            for ii in ["coord", "box"]:
-                place_holders[ii] = tf.placeholder(
-                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii
-                )
-            place_holders["type"] = tf.placeholder(
-                tf.int32, [None, None], name="t_type"
-            )
-            place_holders["natoms_vec"] = tf.placeholder(
-                tf.int32, [self.ntypes + 2], name="t_natoms"
-            )
-            place_holders["default_mesh"] = tf.placeholder(
-                tf.int32, [None], name="t_mesh"
-            )
-            t_type = place_holders["type"]
-            t_natoms = place_holders["natoms_vec"]
-            if self.one_type:
-                # all types = 0, natoms_vec = [natoms, natoms, natoms]
-                t_type = tf.clip_by_value(t_type, -1, 0)
-                t_natoms = tf.tile(t_natoms[0:1], [3])
+        self.mixed_type = mixed_type
 
-            _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat(
-                place_holders["coord"],
-                t_type,
-                t_natoms,
-                place_holders["box"],
-                place_holders["default_mesh"],
-                rcut=self.rcut,
-            )
-            place_holders["dir"] = tf.placeholder(tf.string)
-            _min_nbor_dist = tf.reduce_min(_min_nbor_dist)
-            _max_nbor_size = tf.reduce_max(_max_nbor_size, axis=0)
-            return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"])
-
-        with sub_graph.as_default():
-            self.p = ParallelOp(builder, config=default_tf_session_config)
-
-        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
-
-    def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]:
+    def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, np.ndarray]:
         """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms.
 
         Parameters
@@ -104,38 +59,18 @@ def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]:
         min_nbor_dist
             The nearest distance between neighbor atoms
         max_nbor_size
-            A list with ntypes integers, denotes the actual achieved max sel
+            An array with ntypes integers, denotes the actual achieved max sel
         """
-        self.min_nbor_dist = 100.0
-        self.max_nbor_size = [0]
-        if not self.one_type:
-            self.max_nbor_size *= self.ntypes
+        min_nbor_dist = 100.0
+        max_nbor_size = np.zeros(1 if self.mixed_type else self.ntypes, dtype=int)
 
-        def feed():
-            for ii in range(len(data.system_dirs)):
-                for jj in data.data_systems[ii].dirs:
-                    data_set = data.data_systems[ii]._load_set(jj)
-                    for kk in range(np.array(data_set["type"]).shape[0]):
-                        yield {
-                            "coord": np.array(data_set["coord"])[kk].reshape(
-                                [-1, data.natoms[ii] * 3]
-                            ),
-                            "type": np.array(data_set["type"])[kk].reshape(
-                                [-1, data.natoms[ii]]
-                            ),
-                            "natoms_vec": np.array(data.natoms_vec[ii]),
-                            "box": np.array(data_set["box"])[kk].reshape([-1, 9]),
-                            "default_mesh": np.array(data.default_mesh[ii]),
-                            "dir": str(jj),
-                        }
-
-        for mn, dt, jj in self.p.generate(self.sub_sess, feed()):
+        for mn, dt, jj in self.iterator(data):
             if np.isinf(dt):
                 log.warning(
                     "Atoms with no neighbors found in %s. Please make sure it's what you expected."
                     % jj
                 )
-            if dt < self.min_nbor_dist:
+            if dt < min_nbor_dist:
                 if math.isclose(dt, 0.0, rel_tol=1e-6):
                     # it's unexpected that the distance between two atoms is zero
                     # zero distance will cause nan (#874)
@@ -143,11 +78,27 @@ def feed():
                         "Some atoms are overlapping in %s. Please check your"
                         " training data to remove duplicated atoms." % jj
                     )
-                self.min_nbor_dist = dt
-            self.max_nbor_size = np.maximum(mn, self.max_nbor_size)
+                min_nbor_dist = dt
+            max_nbor_size = np.maximum(mn, max_nbor_size)
 
         # do sqrt in the final
-        self.min_nbor_dist = math.sqrt(self.min_nbor_dist)
-        log.info("training data with min nbor dist: " + str(self.min_nbor_dist))
-        log.info("training data with max nbor size: " + str(self.max_nbor_size))
-        return self.min_nbor_dist, self.max_nbor_size
+        min_nbor_dist = math.sqrt(min_nbor_dist)
+        log.info("training data with min nbor dist: " + str(min_nbor_dist))
+        log.info("training data with max nbor size: " + str(max_nbor_size))
+        return min_nbor_dist, max_nbor_size
+
+    @abstractmethod
+    def iterator(
+        self, data: DeepmdDataSystem
+    ) -> Iterator[Tuple[np.ndarray, float, str]]:
+        """Abstract method for producing data.
+
+        Yields
+        ------
+        mn : np.ndarray
+            The maximal number of neighbors
+        dt : float
+            The squared minimal distance between two atoms
+        jj : str
+            The directory of the data system
+        """
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
new file mode 100644
index 0000000000..3956dac654
--- /dev/null
+++ b/deepmd/utils/out_stat.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Output statistics."""
+
+from typing import (
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+
+def compute_stats_from_redu(
+    output_redu: np.ndarray,
+    natoms: np.ndarray,
+    assigned_bias: Optional[np.ndarray] = None,
+    rcond: Optional[float] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the output statistics.
+
+    Given the reduced output value and the number of atoms for each atom,
+    compute the least-squares solution as the atomic output bais and std.
+
+    Parameters
+    ----------
+    output_redu
+        The reduced output value, shape is [nframes, ndim].
+    natoms
+        The number of atoms for each atom, shape is [nframes, ntypes].
+    assigned_bias
+        The assigned output bias, shape is [ntypes, ndim]. Set to nan
+        if not assigned.
+    rcond
+        Cut-off ratio for small singular values of a.
+
+    Returns
+    -------
+    np.ndarray
+        The computed output bias, shape is [ntypes, ndim].
+    np.ndarray
+        The computed output std, shape is [ntypes, ndim].
+    """
+    output_redu = np.array(output_redu)
+    natoms = np.array(natoms)
+    # check shape
+    assert output_redu.ndim == 2
+    assert natoms.ndim == 2
+    assert output_redu.shape[0] == natoms.shape[0]  # nframes
+    if assigned_bias is not None:
+        assigned_bias = np.array(assigned_bias).reshape(
+            natoms.shape[1], output_redu.shape[1]
+        )
+    # compute output bias
+    if assigned_bias is not None:
+        # Atomic energies stats are incorrect if atomic energies are assigned.
+        # In this situation, we directly use these assigned energies instead of computing stats.
+        # This will make the loss decrease quickly
+        assigned_bias_atom_mask = ~np.isnan(assigned_bias).any(axis=1)
+        # assigned_bias_masked: nmask, ndim
+        assigned_bias_masked = assigned_bias[assigned_bias_atom_mask]
+        # assigned_bias_natoms: nframes, nmask
+        assigned_bias_natoms = natoms[:, assigned_bias_atom_mask]
+        # output_redu: nframes, ndim
+        output_redu -= np.einsum(
+            "ij,jk->ik", assigned_bias_natoms, assigned_bias_masked
+        )
+        # remove assigned atom
+        natoms[:, assigned_bias_atom_mask] = 0
+
+    # computed_output_bias: ntypes, ndim
+    computed_output_bias, _, _, _ = np.linalg.lstsq(natoms, output_redu, rcond=rcond)
+    if assigned_bias is not None:
+        # add back assigned atom; this might not be required
+        computed_output_bias[assigned_bias_atom_mask] = assigned_bias_masked
+    # rest_redu: nframes, ndim
+    rest_redu = output_redu - np.einsum("ij,jk->ik", natoms, computed_output_bias)
+    output_std = rest_redu.std(axis=0)
+    return computed_output_bias, output_std
+
+
+def compute_stats_from_atomic(
+    output: np.ndarray,
+    atype: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the output statistics.
+
+    Given the output value and the type of atoms,
+    compute the atomic output bais and std.
+
+    Parameters
+    ----------
+    output
+        The output value, shape is [nframes, nloc, ndim].
+    atype
+        The type of atoms, shape is [nframes, nloc].
+
+    Returns
+    -------
+    np.ndarray
+        The computed output bias, shape is [ntypes, ndim].
+    np.ndarray
+        The computed output std, shape is [ntypes, ndim].
+    """
+    output = np.array(output)
+    atype = np.array(atype)
+    # check shape
+    assert output.ndim == 3
+    assert atype.ndim == 2
+    assert output.shape[:2] == atype.shape
+    # compute output bias
+    nframes, nloc, ndim = output.shape
+    ntypes = atype.max() + 1
+    output_bias = np.zeros((ntypes, ndim))
+    output_std = np.zeros((ntypes, ndim))
+    for type_i in range(ntypes):
+        mask = atype == type_i
+        output_bias[type_i] = (
+            output[mask].mean(axis=0) if output[mask].size > 0 else np.nan
+        )
+        output_std[type_i] = (
+            output[mask].std(axis=0) if output[mask].size > 0 else np.nan
+        )
+    return output_bias, output_std
diff --git a/deepmd/utils/pair_tab.py b/deepmd/utils/pair_tab.py
index 1a526ac5fc..1b397a3cfa 100644
--- a/deepmd/utils/pair_tab.py
+++ b/deepmd/utils/pair_tab.py
@@ -1,9 +1,279 @@
+#!/usr/bin/env python3
+
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.pair_tab import (
-    PairTab,
+import logging
+from typing import (
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+from scipy.interpolate import (
+    CubicSpline,
+)
+
+from deepmd.utils.version import (
+    check_version_compatibility,
 )
 
-__all__ = [
-    "PairTab",
-]
+log = logging.getLogger(__name__)
+
+
+class PairTab:
+    """Pairwise tabulated potential.
+
+    Parameters
+    ----------
+    filename
+            File name for the short-range tabulated potential.
+            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+            The first colume is the distance between atoms.
+            The second to the last columes are energies for pairs of certain types.
+            For example we have two atom types, 0 and 1.
+            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
+    """
+
+    def __init__(self, filename: str, rcut: Optional[float] = None) -> None:
+        """Constructor."""
+        self.reinit(filename, rcut)
+
+    def reinit(self, filename: str, rcut: Optional[float] = None) -> None:
+        """Initialize the tabulated interaction.
+
+        Parameters
+        ----------
+        filename
+            File name for the short-range tabulated potential.
+            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+            The first colume is the distance between atoms.
+            The second to the last columes are energies for pairs of certain types.
+            For example we have two atom types, 0 and 1.
+            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
+        """
+        if filename is None:
+            self.tab_info, self.tab_data = None, None
+            return
+        self.vdata = np.loadtxt(filename)
+        self.rmin = self.vdata[0][0]
+        self.rmax = self.vdata[-1][0]
+        self.hh = self.vdata[1][0] - self.vdata[0][0]
+        ncol = self.vdata.shape[1] - 1
+        n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
+        self.ntypes = int(n0 + 0.1)
+        assert self.ntypes * (self.ntypes + 1) // 2 == ncol, (
+            "number of volumes provided in %s does not match guessed number of types %d"
+            % (filename, self.ntypes)
+        )
+
+        # check table data against rcut and update tab_file if needed, table upper boundary is used as rcut if not provided.
+        self.rcut = rcut if rcut is not None else self.rmax
+        self._check_table_upper_boundary()
+        self.nspline = (
+            self.vdata.shape[0] - 1
+        )  # this nspline is updated based on the expanded table.
+        self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
+        self.tab_data = self._make_data()
+
+    def serialize(self) -> dict:
+        return {
+            "@class": "PairTab",
+            "@version": 1,
+            "rmin": self.rmin,
+            "rmax": self.rmax,
+            "hh": self.hh,
+            "ntypes": self.ntypes,
+            "rcut": self.rcut,
+            "nspline": self.nspline,
+            "@variables": {
+                "vdata": self.vdata,
+                "tab_info": self.tab_info,
+                "tab_data": self.tab_data,
+            },
+        }
+
+    @classmethod
+    def deserialize(cls, data) -> "PairTab":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        data.pop("@class")
+        variables = data.pop("@variables")
+        tab = PairTab(None, None)
+        tab.vdata = variables["vdata"]
+        tab.rmin = data["rmin"]
+        tab.rmax = data["rmax"]
+        tab.hh = data["hh"]
+        tab.ntypes = data["ntypes"]
+        tab.rcut = data["rcut"]
+        tab.nspline = data["nspline"]
+        tab.tab_info = variables["tab_info"]
+        tab.tab_data = variables["tab_data"]
+        return tab
+
+    def _check_table_upper_boundary(self) -> None:
+        """Update User Provided Table Based on `rcut`.
+
+        This function checks the upper boundary provided in the table against rcut.
+        If the table upper boundary values decay to zero before rcut, padding zeros will
+        be added to the table to cover rcut; if the table upper boundary values do not decay to zero
+        before ruct, extrapolation will be performed till rcut.
+
+        Examples
+        --------
+        table = [[0.005 1.    2.    3.   ]
+                [0.01  0.8   1.6   2.4  ]
+                [0.015 0.    1.    1.5  ]]
+
+        rcut = 0.022
+
+        new_table = [[0.005 1.    2.    3.   ]
+                    [0.01  0.8   1.6   2.4  ]
+                    [0.015 0.    1.    1.5  ]
+                    [0.02  0.    0.    0.   ]
+
+        ----------------------------------------------
+
+        table = [[0.005 1.    2.    3.   ]
+                [0.01  0.8   1.6   2.4  ]
+                [0.015 0.5   1.    1.5  ]
+                [0.02  0.25  0.4   0.75 ]
+                [0.025 0.    0.1   0.   ]
+                [0.03  0.    0.    0.   ]]
+
+        rcut = 0.031
+
+        new_table = [[0.005 1.    2.    3.   ]
+                    [0.01  0.8   1.6   2.4  ]
+                    [0.015 0.5   1.    1.5  ]
+                    [0.02  0.25  0.4   0.75 ]
+                    [0.025 0.    0.1   0.   ]
+                    [0.03  0.    0.    0.   ]
+                    [0.035 0.    0.    0.   ]]
+        """
+        upper_val = self.vdata[-1][1:]
+        upper_idx = self.vdata.shape[0] - 1
+        self.ncol = self.vdata.shape[1]
+
+        # the index in table for the grid point of rcut, always give the point after rcut.
+        rcut_idx = int(np.ceil(self.rcut / self.hh - self.rmin / self.hh))
+        if np.all(upper_val == 0):
+            # if table values decay to `0` after rcut
+            if self.rcut < self.rmax and np.any(self.vdata[rcut_idx - 1][1:] != 0):
+                log.warning(
+                    "The energy provided in the table does not decay to 0 at rcut."
+                )
+            # if table values decay to `0` at rcut, do nothing
+
+            # if table values decay to `0` before rcut, pad table with `0`s.
+            elif self.rcut > self.rmax:
+                pad_zero = np.zeros((rcut_idx - upper_idx, self.ncol))
+                pad_zero[:, 0] = np.linspace(
+                    self.rmax + self.hh,
+                    self.rmax + self.hh * (rcut_idx - upper_idx),
+                    rcut_idx - upper_idx,
+                )
+                self.vdata = np.concatenate((self.vdata, pad_zero), axis=0)
+        else:
+            # if table values do not decay to `0` at rcut
+            if self.rcut <= self.rmax:
+                log.warning(
+                    "The energy provided in the table does not decay to 0 at rcut."
+                )
+            # if rcut goes beyond table upper bond, need extrapolation, ensure values decay to `0` before rcut.
+            else:
+                log.warning(
+                    "The rcut goes beyond table upper boundary, performing extrapolation."
+                )
+                pad_extrapolation = np.zeros((rcut_idx - upper_idx, self.ncol))
+
+                pad_extrapolation[:, 0] = np.linspace(
+                    self.rmax + self.hh,
+                    self.rmax + self.hh * (rcut_idx - upper_idx),
+                    rcut_idx - upper_idx,
+                )
+                # need to calculate table values to fill in with cubic spline
+                pad_extrapolation = self._extrapolate_table(pad_extrapolation)
+
+                self.vdata = np.concatenate((self.vdata, pad_extrapolation), axis=0)
+
+    def get(self) -> Tuple[np.array, np.array]:
+        """Get the serialized table."""
+        return self.tab_info, self.tab_data
+
+    def _extrapolate_table(self, pad_extrapolation: np.array) -> np.array:
+        """Soomth extrapolation between table upper boundary and rcut.
+
+        This method should only be used when the table upper boundary `rmax` is smaller than `rcut`, and
+        the table upper boundary values are not zeros. To simplify the problem, we use a single
+        cubic spline between `rmax` and `rcut` for each pair of atom types. One can substitute this extrapolation
+        to higher order polynomials if needed.
+
+        There are two scenarios:
+            1. `ruct` - `rmax` >= hh:
+                Set values at the grid point right before `rcut` to 0, and perform exterapolation between
+                the grid point and `rmax`, this allows smooth decay to 0 at `rcut`.
+            2. `rcut` - `rmax` < hh:
+                Set values at `rmax + hh` to 0, and perform extrapolation between `rmax` and `rmax + hh`.
+
+        Parameters
+        ----------
+        pad_extrapolation : np.array
+            The emepty grid that holds the extrapolation values.
+
+        Returns
+        -------
+        np.array
+            The cubic spline extrapolation.
+        """
+        # in theory we should check if the table has at least two rows.
+        slope = self.vdata[-1, 1:] - self.vdata[-2, 1:]  # shape of (ncol-1, )
+
+        # for extrapolation, we want values decay to `0` prior to `ruct` if possible
+        # here we try to find the grid point prior to `rcut`
+        grid_point = (
+            -2 if pad_extrapolation[-1, 0] / self.hh - self.rmax / self.hh >= 2 else -1
+        )
+        temp_grid = np.stack((self.vdata[-1, :], pad_extrapolation[grid_point, :]))
+        vv = temp_grid[:, 1:]
+        xx = temp_grid[:, 0]
+        cs = CubicSpline(xx, vv, bc_type=((1, slope), (1, np.zeros_like(slope))))
+        xx_grid = pad_extrapolation[:, 0]
+        res = cs(xx_grid)
+
+        pad_extrapolation[:, 1:] = res
+
+        # Note: when doing cubic spline, if we want to ensure values decay to zero prior to `rcut`
+        # this may cause values be positive post `rcut`, we need to overwrite those values to zero
+        pad_extrapolation = (
+            pad_extrapolation if grid_point == -1 else pad_extrapolation[:-1, :]
+        )
+        return pad_extrapolation
+
+    def _make_data(self):
+        data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
+        stride = 4 * self.nspline
+        idx_iter = 0
+        xx = self.vdata[:, 0]
+        for t0 in range(self.ntypes):
+            for t1 in range(t0, self.ntypes):
+                vv = self.vdata[:, 1 + idx_iter]
+                cs = CubicSpline(xx, vv, bc_type="clamped")
+                dd = cs(xx, 1)
+                dd *= self.hh
+                dtmp = np.zeros(stride)
+                for ii in range(self.nspline):
+                    dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1]
+                    dtmp[ii * 4 + 1] = (
+                        -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1]
+                    )
+                    dtmp[ii * 4 + 2] = dd[ii]
+                    dtmp[ii * 4 + 3] = vv[ii]
+                data[
+                    (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride
+                    + stride
+                ] = dtmp
+                data[
+                    (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride
+                    + stride
+                ] = dtmp
+                idx_iter += 1
+        return data
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index 780bc8cabf..858e31a39d 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -1,13 +1,476 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.path import (
-    DPH5Path,
-    DPOSPath,
-    DPPath,
-)
-
-__all__ = [
-    "DPPath",
-    "DPOSPath",
-    "DPH5Path",
-]
+import os
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from functools import (
+    lru_cache,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    ClassVar,
+    Dict,
+    List,
+    Optional,
+)
+
+import h5py
+import numpy as np
+from wcmatch.glob import (
+    globfilter,
+)
+
+
+class DPPath(ABC):
+    """The path class to data system (DeepmdData).
+
+    Parameters
+    ----------
+    path : str
+        path
+    mode : str, optional
+        mode, by default "r"
+    """
+
+    def __new__(cls, path: str, mode: str = "r"):
+        if cls is DPPath:
+            if os.path.isdir(path):
+                return super().__new__(DPOSPath)
+            elif os.path.isfile(path.split("#")[0]):
+                # assume h5 if it is not dir
+                return super().__new__(DPH5Path)
+            raise FileNotFoundError("%s not found" % path)
+        return super().__new__(cls)
+
+    @abstractmethod
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+
+    @abstractmethod
+    def load_txt(self, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+
+    @abstractmethod
+    def save_numpy(self, arr: np.ndarray) -> None:
+        """Save NumPy array.
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            NumPy array
+        """
+
+    @abstractmethod
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+
+    @abstractmethod
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+
+    @abstractmethod
+    def is_file(self) -> bool:
+        """Check if self is file."""
+
+    @abstractmethod
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+
+    @abstractmethod
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+
+    @abstractmethod
+    def __lt__(self, other: "DPPath") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+
+    @abstractmethod
+    def __str__(self) -> str:
+        """Represent string."""
+
+    def __repr__(self) -> str:
+        return f"{type(self)} ({self!s})"
+
+    def __eq__(self, other) -> bool:
+        return str(self) == str(other)
+
+    def __hash__(self):
+        return hash(str(self))
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Name of the path."""
+
+    @abstractmethod
+    def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
+        """Make directory.
+
+        Parameters
+        ----------
+        parents : bool, optional
+            If true, any missing parents of this directory are created as well.
+        exist_ok : bool, optional
+            If true, no error will be raised if the target directory already exists.
+        """
+
+
+class DPOSPath(DPPath):
+    """The OS path class to data system (DeepmdData) for real directories.
+
+    Parameters
+    ----------
+    path : str
+        path
+    mode : str, optional
+        mode, by default "r"
+    """
+
+    def __init__(self, path: str, mode: str = "r") -> None:
+        super().__init__()
+        self.mode = mode
+        if isinstance(path, Path):
+            self.path = path
+        else:
+            self.path = Path(path)
+
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return np.load(str(self.path))
+
+    def load_txt(self, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return np.loadtxt(str(self.path), **kwargs)
+
+    def save_numpy(self, arr: np.ndarray) -> None:
+        """Save NumPy array.
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            NumPy array
+        """
+        if self.mode == "r":
+            raise ValueError("Cannot save to read-only path")
+        with self.path.open("wb") as f:
+            np.save(f, arr)
+
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        # currently DPOSPath will only derivative DPOSPath
+        return [type(self)(p, mode=self.mode) for p in self.path.glob(pattern)]
+
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        return [type(self)(p, mode=self.mode) for p in self.path.rglob(pattern)]
+
+    def is_file(self) -> bool:
+        """Check if self is file."""
+        return self.path.is_file()
+
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+        return self.path.is_dir()
+
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+        return type(self)(self.path / key, mode=self.mode)
+
+    def __lt__(self, other: "DPOSPath") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+        return self.path < other.path
+
+    def __str__(self) -> str:
+        """Represent string."""
+        return str(self.path)
+
+    @property
+    def name(self) -> str:
+        """Name of the path."""
+        return self.path.name
+
+    def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
+        """Make directory.
+
+        Parameters
+        ----------
+        parents : bool, optional
+            If true, any missing parents of this directory are created as well.
+        exist_ok : bool, optional
+            If true, no error will be raised if the target directory already exists.
+        """
+        if self.mode == "r":
+            raise ValueError("Cannot mkdir to read-only path")
+        self.path.mkdir(parents=parents, exist_ok=exist_ok)
+
+
+class DPH5Path(DPPath):
+    """The path class to data system (DeepmdData) for HDF5 files.
+
+    Notes
+    -----
+    OS - HDF5 relationship:
+        directory - Group
+        file - Dataset
+
+    Parameters
+    ----------
+    path : str
+        path
+    mode : str, optional
+        mode, by default "r"
+    """
+
+    def __init__(self, path: str, mode: str = "r") -> None:
+        super().__init__()
+        self.mode = mode
+        # we use "#" to split path
+        # so we do not support file names containing #...
+        s = path.split("#")
+        self.root_path = s[0]
+        self.root = self._load_h5py(s[0], mode)
+        # h5 path: default is the root path
+        self._name = s[1] if len(s) > 1 else "/"
+
+    @classmethod
+    @lru_cache(None)
+    def _load_h5py(cls, path: str, mode: str = "r") -> h5py.File:
+        """Load hdf5 file.
+
+        Parameters
+        ----------
+        path : str
+            path to hdf5 file
+        mode : str, optional
+            mode, by default 'r'
+        """
+        # this method has cache to avoid duplicated
+        # loading from different DPH5Path
+        # However the file will be never closed?
+        return h5py.File(path, mode)
+
+    def load_numpy(self) -> np.ndarray:
+        """Load NumPy array.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        return self.root[self._name][:]
+
+    def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
+        """Load NumPy array from text.
+
+        Returns
+        -------
+        np.ndarray
+            loaded NumPy array
+        """
+        arr = self.load_numpy()
+        if dtype:
+            arr = arr.astype(dtype)
+        return arr
+
+    def save_numpy(self, arr: np.ndarray) -> None:
+        """Save NumPy array.
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            NumPy array
+        """
+        if self._name in self._keys:
+            del self.root[self._name]
+        self.root.create_dataset(self._name, data=arr)
+        self.root.flush()
+        self._new_keys.append(self._name)
+
+    def glob(self, pattern: str) -> List["DPPath"]:
+        """Search path using the glob pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        # got paths starts with current path first, which is faster
+        subpaths = [ii for ii in self._keys if ii.startswith(self._name)]
+        return [
+            type(self)(f"{self.root_path}#{pp}", mode=self.mode)
+            for pp in globfilter(subpaths, self._connect_path(pattern))
+        ]
+
+    def rglob(self, pattern: str) -> List["DPPath"]:
+        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
+        of the given relative pattern.
+
+        Parameters
+        ----------
+        pattern : str
+            glob pattern
+
+        Returns
+        -------
+        List[DPPath]
+            list of paths
+        """
+        return self.glob("**" + pattern)
+
+    @property
+    def _keys(self) -> List[str]:
+        """Walk all groups and dataset."""
+        return self._file_keys(self.root)
+
+    __file_new_keys: ClassVar[Dict[h5py.File, List[str]]] = {}
+
+    @property
+    def _new_keys(self):
+        """New keys that haven't been cached."""
+        self.__file_new_keys.setdefault(self.root, [])
+        return self.__file_new_keys[self.root]
+
+    @classmethod
+    @lru_cache(None)
+    def _file_keys(cls, file: h5py.File) -> List[str]:
+        """Walk all groups and dataset."""
+        l = []
+        file.visit(lambda x: l.append("/" + x))
+        return l
+
+    def is_file(self) -> bool:
+        """Check if self is file."""
+        if self._name not in self._keys and self._name not in self._new_keys:
+            return False
+        return isinstance(self.root[self._name], h5py.Dataset)
+
+    def is_dir(self) -> bool:
+        """Check if self is directory."""
+        if self._name == "/":
+            return True
+        if self._name not in self._keys and self._name not in self._new_keys:
+            return False
+        return isinstance(self.root[self._name], h5py.Group)
+
+    def __truediv__(self, key: str) -> "DPPath":
+        """Used for / operator."""
+        return type(self)(f"{self.root_path}#{self._connect_path(key)}", mode=self.mode)
+
+    def _connect_path(self, path: str) -> str:
+        """Connect self with path."""
+        if self._name.endswith("/"):
+            return f"{self._name}{path}"
+        return f"{self._name}/{path}"
+
+    def __lt__(self, other: "DPH5Path") -> bool:
+        """Whether this DPPath is less than other for sorting."""
+        if self.root_path == other.root_path:
+            return self._name < other._name
+        return self.root_path < other.root_path
+
+    def __str__(self) -> str:
+        """Returns path of self."""
+        return f"{self.root_path}#{self._name}"
+
+    @property
+    def name(self) -> str:
+        """Name of the path."""
+        return self._name.split("/")[-1]
+
+    def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
+        """Make directory.
+
+        Parameters
+        ----------
+        parents : bool, optional
+            If true, any missing parents of this directory are created as well.
+        exist_ok : bool, optional
+            If true, no error will be raised if the target directory already exists.
+        """
+        if self._name in self._keys:
+            if not exist_ok:
+                raise FileExistsError(f"{self} already exists")
+            return
+        if parents:
+            self.root.require_group(self._name)
+        else:
+            self.root.create_group(self._name)
+        self._new_keys.append(self._name)
diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py
index 3b5b297304..22f315f63d 100644
--- a/deepmd/utils/plugin.py
+++ b/deepmd/utils/plugin.py
@@ -1,15 +1,161 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.plugin import (
-    Plugin,
-    PluginVariant,
-    VariantABCMeta,
-    VariantMeta,
+"""Base of plugin systems."""
+# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py
+
+import difflib
+from abc import (
+    ABCMeta,
+)
+from typing import (
+    Callable,
+    Dict,
+    Optional,
+    Type,
 )
 
-__all__ = [
-    "Plugin",
-    "VariantMeta",
-    "VariantABCMeta",
-    "PluginVariant",
-]
+
+class Plugin:
+    """A class to register and restore plugins.
+
+    Attributes
+    ----------
+    plugins : Dict[str, object]
+        plugins
+
+    Examples
+    --------
+    >>> plugin = Plugin()
+    >>> @plugin.register("xx")
+        def xxx():
+            pass
+    >>> print(plugin.plugins["xx"])
+    """
+
+    def __init__(self):
+        self.plugins = {}
+
+    def __add__(self, other) -> "Plugin":
+        self.plugins.update(other.plugins)
+        return self
+
+    def register(self, key: str) -> Callable[[object], object]:
+        """Register a plugin.
+
+        Parameters
+        ----------
+        key : str
+            key of the plugin
+
+        Returns
+        -------
+        Callable[[object], object]
+            decorator
+        """
+
+        def decorator(object: object) -> object:
+            self.plugins[key] = object
+            return object
+
+        return decorator
+
+    def get_plugin(self, key) -> object:
+        """Visit a plugin by key.
+
+        Parameters
+        ----------
+        key : str
+            key of the plugin
+
+        Returns
+        -------
+        object
+            the plugin
+        """
+        return self.plugins[key]
+
+
+class VariantMeta:
+    def __call__(cls, *args, **kwargs):
+        """Remove `type` and keys that starts with underline."""
+        obj = cls.__new__(cls, *args, **kwargs)
+        kwargs.pop("type", None)
+        to_pop = []
+        for kk in kwargs:
+            if kk[0] == "_":
+                to_pop.append(kk)
+        for kk in to_pop:
+            kwargs.pop(kk, None)
+        obj.__init__(*args, **kwargs)
+        return obj
+
+
+class VariantABCMeta(VariantMeta, ABCMeta):
+    pass
+
+
+class PluginVariant(metaclass=VariantABCMeta):
+    """A class to remove `type` from input arguments."""
+
+    pass
+
+
+def make_plugin_registry(name: Optional[str] = None) -> Type[object]:
+    """Make a plugin registry.
+
+    Parameters
+    ----------
+    name : Optional[str]
+        the name of the registry for the error message, e.g. descriptor, backend, etc.
+
+    Examples
+    --------
+    >>> class BaseClass(make_plugin_registry()):
+            pass
+    """
+    if name is None:
+        name = "class"
+
+    class PR:
+        __plugins = Plugin()
+
+        @staticmethod
+        def register(key: str) -> Callable[[object], object]:
+            """Register a descriptor plugin.
+
+            Parameters
+            ----------
+            key : str
+                the key of a descriptor
+
+            Returns
+            -------
+            callable[[object], object]
+                the registered descriptor
+
+            Examples
+            --------
+            >>> @BaseClass.register("some_class")
+                class SomeClass(BaseClass):
+                    pass
+            """
+            return PR.__plugins.register(key)
+
+        @classmethod
+        def get_class_by_type(cls, class_type: str) -> Type[object]:
+            """Get the class by the plugin type."""
+            if class_type in PR.__plugins.plugins:
+                return PR.__plugins.plugins[class_type]
+            else:
+                # did you mean
+                matches = difflib.get_close_matches(
+                    class_type, PR.__plugins.plugins.keys()
+                )
+                dym_message = f"Did you mean: {matches[0]}?" if matches else ""
+                raise RuntimeError(f"Unknown {name} type: {class_type}. {dym_message}")
+
+        @classmethod
+        def get_plugins(cls) -> Dict[str, Type[object]]:
+            """Get all the registered plugins."""
+            return PR.__plugins.plugins
+
+    return PR
diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
index 09547eeac9..44ea6a1dac 100644
--- a/deepmd/utils/random.py
+++ b/deepmd/utils/random.py
@@ -1,15 +1,82 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.random import (
-    choice,
-    random,
-    seed,
-    shuffle,
+from typing import (
+    Optional,
+    Tuple,
+    Union,
 )
 
-__all__ = [
-    "choice",
-    "random",
-    "seed",
-    "shuffle",
-]
+import numpy as np
+
+_RANDOM_GENERATOR = np.random.RandomState()
+
+
+def choice(
+    a: Union[np.ndarray, int],
+    size: Optional[Union[int, Tuple[int, ...]]] = None,
+    replace: bool = True,
+    p: Optional[np.ndarray] = None,
+):
+    """Generates a random sample from a given 1-D array.
+
+    Parameters
+    ----------
+    a : 1-D array-like or int
+        If an ndarray, a random sample is generated from its elements. If an int,
+        the random sample is generated as if it were np.arange(a)
+    size : int or tuple of ints, optional
+        Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples
+        are drawn. Default is None, in which case a single value is returned.
+    replace : boolean, optional
+        Whether the sample is with or without replacement. Default is True, meaning
+        that a value of a can be selected multiple times.
+    p : 1-D array-like, optional
+        The probabilities associated with each entry in a. If not given, the sample
+        assumes a uniform distribution over all entries in a.
+
+    Returns
+    -------
+    np.ndarray
+        arrays with results and their shapes
+    """
+    return _RANDOM_GENERATOR.choice(a, size=size, replace=replace, p=p)
+
+
+def random(size=None):
+    """Return random floats in the half-open interval [0.0, 1.0).
+
+    Parameters
+    ----------
+    size
+        Output shape.
+
+    Returns
+    -------
+    np.ndarray
+        Arrays with results and their shapes.
+    """
+    return _RANDOM_GENERATOR.random_sample(size)
+
+
+def seed(val: Optional[int] = None):
+    """Seed the generator.
+
+    Parameters
+    ----------
+    val : int
+        Seed.
+    """
+    _RANDOM_GENERATOR.seed(val)
+
+
+def shuffle(x: np.ndarray):
+    """Modify a sequence in-place by shuffling its contents.
+
+    Parameters
+    ----------
+    x : np.ndarray
+        The array or list to be shuffled.
+    """
+    _RANDOM_GENERATOR.shuffle(x)
+
+
+__all__ = ["choice", "random", "seed", "shuffle"]
diff --git a/deepmd/utils/spin.py b/deepmd/utils/spin.py
index 7820627649..38e8da48da 100644
--- a/deepmd/utils/spin.py
+++ b/deepmd/utils/spin.py
@@ -1,87 +1,199 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 from typing import (
     List,
-    Optional,
+    Tuple,
+    Union,
 )
 
-from deepmd.env import (
-    GLOBAL_TF_FLOAT_PRECISION,
-    tf,
-)
+import numpy as np
 
 
 class Spin:
-    """Class for spin.
+    """Class for spin, mainly processes the spin type-related information.
+    Atom types can be split into three kinds:
+    1. Real types: real atom species, "Fe", "H", "O", etc.
+    2. Spin types: atom species with spin, as virtual atoms in input, "Fe_spin", etc.
+    3. Placeholder types: atom species without spin, as placeholders in input without contribution,
+    also name "H_spin", "O_spin", etc.
+    For any types in 2. or 3., the type index is `ntypes` plus index of its corresponding real type.
 
     Parameters
     ----------
-    use_spin
-                Whether to use atomic spin model for each atom type
-    spin_norm
-                The magnitude of atomic spin for each atom type with spin
-    virtual_len
-                The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin
+    use_spin: List[bool]
+                A list of boolean values indicating whether to use atomic spin for each atom type.
+                True for spin and False for not. List of bool values with shape of [ntypes].
+    virtual_scale: List[float], float
+                The scaling factor to determine the virtual distance
+                between a virtual atom representing spin and its corresponding real atom
+                for each atom type with spin. This factor is defined as the virtual distance
+                divided by the magnitude of atomic spin for each atom type with spin.
+                The virtual coordinate is defined as the real coordinate plus spin * virtual_scale.
+                List of float values with shape of [ntypes] or [ntypes_spin] or one single float value for all types,
+                only used when use_spin is True for each atom type.
     """
 
     def __init__(
         self,
-        use_spin: Optional[List[bool]] = None,
-        spin_norm: Optional[List[float]] = None,
-        virtual_len: Optional[List[float]] = None,
+        use_spin: List[bool],
+        virtual_scale: Union[List[float], float],
     ) -> None:
-        """Constructor."""
-        self.use_spin = use_spin
-        self.spin_norm = spin_norm
-        self.virtual_len = virtual_len
-        self.ntypes_spin = self.use_spin.count(True)
+        self.ntypes_real = len(use_spin)
+        self.ntypes_spin = use_spin.count(True)
+        self.use_spin = np.array(use_spin)
+        self.spin_mask = self.use_spin.astype(np.int64)
+        self.ntypes_real_and_spin = self.ntypes_real + self.ntypes_spin
+        self.ntypes_placeholder = self.ntypes_real - self.ntypes_spin
+        self.ntypes_input = 2 * self.ntypes_real  # with placeholder for input types
+        self.real_type = np.arange(self.ntypes_real)
+        self.spin_type = np.arange(self.ntypes_real)[self.use_spin] + self.ntypes_real
+        self.real_and_spin_type = np.concatenate([self.real_type, self.spin_type])
+        self.placeholder_type = (
+            np.arange(self.ntypes_real)[~self.use_spin] + self.ntypes_real
+        )
+        self.spin_placeholder_type = np.arange(self.ntypes_real) + self.ntypes_real
+        self.input_type = np.arange(self.ntypes_real * 2)
+        if isinstance(virtual_scale, list):
+            if len(virtual_scale) == self.ntypes_real:
+                self.virtual_scale = virtual_scale
+            elif len(virtual_scale) == self.ntypes_spin:
+                self.virtual_scale = np.zeros(self.ntypes_real)
+                self.virtual_scale[self.use_spin] = virtual_scale
+            else:
+                raise ValueError(
+                    f"Invalid length of virtual_scale for spin atoms"
+                    f": Expected {self.ntypes_real} or { self.ntypes_spin} but got {len(virtual_scale)}!"
+                )
+        elif isinstance(virtual_scale, float):
+            self.virtual_scale = [virtual_scale for _ in range(self.ntypes_real)]
+        else:
+            raise ValueError(f"Invalid virtual scale type: {type(virtual_scale)}")
+        self.virtual_scale = np.array(self.virtual_scale)
+        self.virtual_scale_mask = (self.virtual_scale * self.use_spin).reshape([-1])
+        self.pair_exclude_types = []
+        self.init_pair_exclude_types_placeholder()
+        self.atom_exclude_types_ps = []
+        self.init_atom_exclude_types_placeholder_spin()
+        self.atom_exclude_types_p = []
+        self.init_atom_exclude_types_placeholder()
 
-    def build(
-        self,
-        reuse=None,
-        suffix="",
-    ):
-        """Build the computational graph for the spin.
-
-        Parameters
-        ----------
-        reuse
-            The weights in the networks should be reused when get the variable.
-        suffix
-            Name suffix to identify this descriptor
-
-        Returns
-        -------
-        embedded_types
-            The computational graph for embedded types
-        """
-        name = "spin_attr" + suffix
-        with tf.variable_scope(name, reuse=reuse):
-            t_ntypes_spin = tf.constant(
-                self.ntypes_spin, name="ntypes_spin", dtype=tf.int32
-            )
-            t_virtual_len = tf.constant(
-                self.virtual_len,
-                name="virtual_len",
-                dtype=GLOBAL_TF_FLOAT_PRECISION,
-            )
-            t_spin_norm = tf.constant(
-                self.spin_norm,
-                name="spin_norm",
-                dtype=GLOBAL_TF_FLOAT_PRECISION,
-            )
+    def get_ntypes_real(self) -> int:
+        """Returns the number of real atom types."""
+        return self.ntypes_real
 
     def get_ntypes_spin(self) -> int:
         """Returns the number of atom types which contain spin."""
         return self.ntypes_spin
 
+    def get_ntypes_real_and_spin(self) -> int:
+        """Returns the number of real atom types and types which contain spin."""
+        return self.ntypes_real_and_spin
+
+    def get_ntypes_input(self) -> int:
+        """Returns the number of double real atom types for input placeholder."""
+        return self.ntypes_input
+
     def get_use_spin(self) -> List[bool]:
         """Returns the list of whether to use spin for each atom type."""
         return self.use_spin
 
-    def get_spin_norm(self) -> List[float]:
+    def get_virtual_scale(self) -> np.ndarray:
         """Returns the list of magnitude of atomic spin for each atom type."""
-        return self.spin_norm
+        return self.virtual_scale
+
+    def init_pair_exclude_types_placeholder(self) -> None:
+        """
+        Initialize the pair-wise exclusion types for descriptor.
+        The placeholder types for those without spin are excluded.
+        """
+        ti_grid, tj_grid = np.meshgrid(
+            self.placeholder_type, self.input_type, indexing="ij"
+        )
+        self.pair_exclude_types = (
+            np.stack((ti_grid, tj_grid), axis=-1).reshape(-1, 2).tolist()
+        )
+
+    def init_atom_exclude_types_placeholder_spin(self) -> None:
+        """
+        Initialize the atom-wise exclusion types for fitting.
+        Both the placeholder types and spin types are excluded.
+        """
+        self.atom_exclude_types_ps = self.spin_placeholder_type.tolist()
+
+    def init_atom_exclude_types_placeholder(self) -> None:
+        """
+        Initialize the atom-wise exclusion types for fitting.
+        The placeholder types for those without spin are excluded.
+        """
+        self.atom_exclude_types_p = self.placeholder_type.tolist()
+
+    def get_pair_exclude_types(self, exclude_types=None) -> List[Tuple[int, int]]:
+        """
+        Return the pair-wise exclusion types for descriptor.
+        The placeholder types for those without spin are excluded.
+        """
+        if exclude_types is None:
+            return self.pair_exclude_types
+        else:
+            _exclude_types: List[Tuple[int, int]] = copy.deepcopy(
+                self.pair_exclude_types
+            )
+            for tt in exclude_types:
+                assert len(tt) == 2
+                _exclude_types.append((tt[0], tt[1]))
+            return _exclude_types
+
+    def get_atom_exclude_types(self, exclude_types=None) -> List[int]:
+        """
+        Return the atom-wise exclusion types for fitting before out_def.
+        Both the placeholder types and spin types are excluded.
+        """
+        if exclude_types is None:
+            return self.atom_exclude_types_ps
+        else:
+            _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_ps)
+            _exclude_types += exclude_types
+            _exclude_types = list(set(_exclude_types))
+            return _exclude_types
+
+    def get_atom_exclude_types_placeholder(self, exclude_types=None) -> List[int]:
+        """
+        Return the atom-wise exclusion types for fitting after out_def.
+        The placeholder types for those without spin are excluded.
+        """
+        if exclude_types is None:
+            return self.atom_exclude_types_p
+        else:
+            _exclude_types: List[int] = copy.deepcopy(self.atom_exclude_types_p)
+            _exclude_types += exclude_types
+            _exclude_types = list(set(_exclude_types))
+            return _exclude_types
+
+    def get_spin_mask(self):
+        """
+        Return the spin mask of shape [ntypes],
+        with spin types being 1, and non-spin types being 0.
+        """
+        return self.spin_mask
+
+    def get_virtual_scale_mask(self):
+        """
+        Return the virtual scale mask of shape [ntypes],
+        with spin types being its virtual scale, and non-spin types being 0.
+        """
+        return self.virtual_scale_mask
+
+    def serialize(
+        self,
+    ) -> dict:
+        return {
+            "use_spin": self.use_spin.tolist(),
+            "virtual_scale": self.virtual_scale.tolist(),
+        }
 
-    def get_virtual_len(self) -> List[float]:
-        """Returns the list of distance between real atom and virtual atom for each atom type."""
-        return self.virtual_len
+    @classmethod
+    def deserialize(
+        cls,
+        data: dict,
+    ) -> "Spin":
+        return cls(**data)
diff --git a/deepmd/utils/summary.py b/deepmd/utils/summary.py
new file mode 100644
index 0000000000..e2118bf7e0
--- /dev/null
+++ b/deepmd/utils/summary.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+import os
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    ClassVar,
+)
+
+import deepmd
+from deepmd.env import (
+    GLOBAL_CONFIG,
+    get_default_nthreads,
+    global_float_prec,
+)
+from deepmd.utils.hostlist import (
+    get_host_names,
+)
+
+log = logging.getLogger(__name__)
+
+
+class SummaryPrinter(ABC):
+    """Base summary printer.
+
+    Backends should inherit from this class and implement the abstract methods.
+    """
+
+    # http://patorjk.com/software/taag. Font:Big"
+    WELCOME = (
+        r" _____               _____   __  __  _____           _     _  _   ",
+        r"|  __ \             |  __ \ |  \/  ||  __ \         | |   (_)| |  ",
+        r"| |  | |  ___   ___ | |__) || \  / || |  | | ______ | | __ _ | |_ ",
+        r"| |  | | / _ \ / _ \|  ___/ | |\/| || |  | ||______|| |/ /| || __|",
+        r"| |__| ||  __/|  __/| |     | |  | || |__| |        |   < | || |_ ",
+        r"|_____/  \___| \___||_|     |_|  |_||_____/         |_|\_\|_| \__|",
+    )
+
+    CITATION = (
+        "Please read and cite:",
+        "Wang, Zhang, Han and E, Comput.Phys.Comm. 228, 178-184 (2018)",
+        "Zeng et al, J. Chem. Phys., 159, 054801 (2023)",
+        "See https://deepmd.rtfd.io/credits/ for details.",
+    )
+
+    BUILD: ClassVar = {
+        "installed to": "\n".join(deepmd.__path__),
+        "source": GLOBAL_CONFIG["git_summ"],
+        "source brach": GLOBAL_CONFIG["git_branch"],
+        "source commit": GLOBAL_CONFIG["git_hash"],
+        "source commit at": GLOBAL_CONFIG["git_date"],
+        "use float prec": global_float_prec,
+        "build variant": GLOBAL_CONFIG["dp_variant"],
+    }
+
+    def __call__(self):
+        """Print build and current running cluster configuration summary."""
+        nodename, nodelist = get_host_names()
+        build_info = self.BUILD.copy()
+        build_info.update(self.get_backend_info())
+        if len(nodelist) > 1:
+            build_info.update(
+                {
+                    "world size": str(len(nodelist)),
+                    "node list": ", ".join(set(nodelist)),
+                }
+            )
+        build_info.update(
+            {
+                "running on": nodename,
+                "computing device": self.get_compute_device(),
+            }
+        )
+        if self.is_built_with_cuda():
+            env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "unset")
+            build_info["CUDA_VISIBLE_DEVICES"] = env_value
+        if self.is_built_with_rocm():
+            env_value = os.environ.get("HIP_VISIBLE_DEVICES", "unset")
+            build_info["HIP_VISIBLE_DEVICES"] = env_value
+        if self.is_built_with_cuda() or self.is_built_with_rocm():
+            build_info["Count of visible GPUs"] = str(self.get_ngpus())
+
+        intra, inter = get_default_nthreads()
+        build_info.update(
+            {
+                "num_intra_threads": str(intra),
+                "num_inter_threads": str(inter),
+            }
+        )
+        # count the maximum characters in the keys and values
+        max_key_len = max(len(k) for k in build_info) + 2
+        max_val_len = max(
+            len(x) for v in build_info.values() for x in str(v).split("\n")
+        )
+        # print the summary
+        for line in self.WELCOME + self.CITATION:
+            log.info(line)
+        log.info("-" * (max_key_len + max_val_len))
+        for kk, vv in build_info.items():
+            for iline, vline in enumerate(str(vv).split("\n")):
+                if iline == 0:
+                    log.info(f"{kk + ': ':<{max_key_len}}{vline}")
+                else:
+                    log.info(f"{'':<{max_key_len}}{vline}")
+        log.info("-" * (max_key_len + max_val_len))
+
+    @abstractmethod
+    def is_built_with_cuda(self) -> bool:
+        """Check if the backend is built with CUDA."""
+
+    @abstractmethod
+    def is_built_with_rocm(self) -> bool:
+        """Check if the backend is built with ROCm."""
+
+    @abstractmethod
+    def get_compute_device(self) -> str:
+        """Get Compute device."""
+
+    @abstractmethod
+    def get_ngpus(self) -> int:
+        """Get the number of GPUs."""
+
+    def get_backend_info(self) -> dict:
+        """Get backend information."""
+        return {}
diff --git a/deepmd/utils/update_sel.py b/deepmd/utils/update_sel.py
new file mode 100644
index 0000000000..d1be8e8138
--- /dev/null
+++ b/deepmd/utils/update_sel.py
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from abc import (
+    abstractmethod,
+)
+from typing import (
+    Type,
+)
+
+from deepmd.utils.data_system import (
+    get_data,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
+
+log = logging.getLogger(__name__)
+
+
+class BaseUpdateSel:
+    """Update the sel field in the descriptor."""
+
+    def update_one_sel(
+        self,
+        jdata,
+        descriptor,
+        mixed_type: bool = False,
+        rcut_key="rcut",
+        sel_key="sel",
+    ):
+        rcut = descriptor[rcut_key]
+        tmp_sel = self.get_sel(
+            jdata,
+            rcut,
+            mixed_type=mixed_type,
+        )
+        sel = descriptor[sel_key]
+        if isinstance(sel, int):
+            # convert to list and finnally convert back to int
+            sel = [sel]
+        if self.parse_auto_sel(descriptor[sel_key]):
+            ratio = self.parse_auto_sel_ratio(descriptor[sel_key])
+            descriptor[sel_key] = sel = [
+                int(self.wrap_up_4(ii * ratio)) for ii in tmp_sel
+            ]
+        else:
+            # sel is set by user
+            for ii, (tt, dd) in enumerate(zip(tmp_sel, sel)):
+                if dd and tt > dd:
+                    # we may skip warning for sel=0, where the user is likely
+                    # to exclude such type in the descriptor
+                    log.warning(
+                        "sel of type %d is not enough! The expected value is "
+                        "not less than %d, but you set it to %d. The accuracy"
+                        " of your model may get worse." % (ii, tt, dd)
+                    )
+        if mixed_type:
+            descriptor[sel_key] = sum(sel)
+        return descriptor
+
+    def parse_auto_sel(self, sel):
+        if not isinstance(sel, str):
+            return False
+        words = sel.split(":")
+        if words[0] == "auto":
+            return True
+        else:
+            return False
+
+    def parse_auto_sel_ratio(self, sel):
+        if not self.parse_auto_sel(sel):
+            raise RuntimeError(f"invalid auto sel format {sel}")
+        else:
+            words = sel.split(":")
+            if len(words) == 1:
+                ratio = 1.1
+            elif len(words) == 2:
+                ratio = float(words[1])
+            else:
+                raise RuntimeError(f"invalid auto sel format {sel}")
+            return ratio
+
+    def wrap_up_4(self, xx):
+        return 4 * ((int(xx) + 3) // 4)
+
+    def get_sel(self, jdata, rcut, mixed_type: bool = False):
+        _, max_nbor_size = self.get_nbor_stat(jdata, rcut, mixed_type=mixed_type)
+        return max_nbor_size
+
+    def get_rcut(self, jdata):
+        if jdata["model"].get("type") == "pairwise_dprc":
+            return max(
+                jdata["model"]["qm_model"]["descriptor"]["rcut"],
+                jdata["model"]["qmmm_model"]["descriptor"]["rcut"],
+            )
+        descrpt_data = jdata["model"]["descriptor"]
+        rcut_list = []
+        if descrpt_data["type"] == "hybrid":
+            for ii in descrpt_data["list"]:
+                rcut_list.append(ii["rcut"])
+        else:
+            rcut_list.append(descrpt_data["rcut"])
+        return max(rcut_list)
+
+    def get_type_map(self, jdata):
+        return jdata["model"].get("type_map", None)
+
+    def get_nbor_stat(self, jdata, rcut, mixed_type: bool = False):
+        # it seems that DeepmdDataSystem does not need rcut
+        # it's not clear why there is an argument...
+        # max_rcut = get_rcut(jdata)
+        max_rcut = rcut
+        type_map = self.get_type_map(jdata)
+
+        if type_map and len(type_map) == 0:
+            type_map = None
+        multi_task_mode = "data_dict" in jdata["training"]
+        if not multi_task_mode:
+            train_data = get_data(
+                jdata["training"]["training_data"], max_rcut, type_map, None
+            )
+            train_data.get_batch()
+        else:
+            assert (
+                type_map is not None
+            ), "Data stat in multi-task mode must have available type_map! "
+            train_data = None
+            for systems in jdata["training"]["data_dict"]:
+                tmp_data = get_data(
+                    jdata["training"]["data_dict"][systems]["training_data"],
+                    max_rcut,
+                    type_map,
+                    None,
+                )
+                tmp_data.get_batch()
+                assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
+                if train_data is None:
+                    train_data = tmp_data
+                else:
+                    train_data.system_dirs += tmp_data.system_dirs
+                    train_data.data_systems += tmp_data.data_systems
+                    train_data.natoms += tmp_data.natoms
+                    train_data.natoms_vec += tmp_data.natoms_vec
+                    train_data.default_mesh += tmp_data.default_mesh
+        data_ntypes = train_data.get_ntypes()
+        if type_map is not None:
+            map_ntypes = len(type_map)
+        else:
+            map_ntypes = data_ntypes
+        ntypes = max([map_ntypes, data_ntypes])
+
+        neistat = self.neighbor_stat(ntypes, rcut, mixed_type=mixed_type)
+
+        min_nbor_dist, max_nbor_size = neistat.get_stat(train_data)
+        self.hook(min_nbor_dist, max_nbor_size)
+
+        return min_nbor_dist, max_nbor_size
+
+    @property
+    @abstractmethod
+    def neighbor_stat(self) -> Type[NeighborStat]:
+        pass
+
+    @abstractmethod
+    def hook(self, min_nbor_dist, max_nbor_size):
+        pass
+
+    def get_min_nbor_dist(self, jdata, rcut):
+        min_nbor_dist, _ = self.get_nbor_stat(jdata, rcut)
+        return min_nbor_dist
diff --git a/deepmd/utils/version.py b/deepmd/utils/version.py
new file mode 100644
index 0000000000..a0b479778d
--- /dev/null
+++ b/deepmd/utils/version.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+def check_version_compatibility(
+    current_version: int,
+    maximum_supported_version: int,
+    minimal_supported_version: int = 1,
+):
+    """Check if the current version is compatible with the supported versions.
+
+    Parameters
+    ----------
+    current_version : int
+        The current version.
+    maximum_supported_version : int
+        The maximum supported version.
+    minimal_supported_version : int, optional
+        The minimal supported version. Default is 1.
+
+    Raises
+    ------
+    ValueError
+        If the current version is not compatible with the supported versions.
+    """
+    if not minimal_supported_version <= current_version <= maximum_supported_version:
+        raise ValueError(
+            f"Current version {current_version} is not compatible with supported versions "
+            f"[{minimal_supported_version}, {maximum_supported_version}]."
+        )
diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py
index 267f89ed28..b344d3bb75 100644
--- a/deepmd/utils/weight_avg.py
+++ b/deepmd/utils/weight_avg.py
@@ -1,9 +1,48 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-"""Alias for backward compatibility."""
-from deepmd_utils.utils.weight_avg import (
-    weighted_average,
+from collections import (
+    defaultdict,
 )
+from typing import (
+    Dict,
+    List,
+    Tuple,
+)
+
+import numpy as np
+
+
+def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
+    """Compute wighted average of prediction errors (MAE or RMSE) for model.
+
+    Parameters
+    ----------
+    errors : List[Dict[str, Tuple[float, float]]]
+        List: the error of systems
+        Dict: the error of quantities, name given by the key
+        str: the name of the quantity, must starts with 'mae' or 'rmse'
+        Tuple: (error, weight)
 
-__all__ = [
-    "weighted_average",
-]
+    Returns
+    -------
+    Dict
+        weighted averages
+    """
+    sum_err = defaultdict(float)
+    sum_siz = defaultdict(int)
+    for err in errors:
+        for kk, (ee, ss) in err.items():
+            if kk.startswith("mae"):
+                sum_err[kk] += ee * ss
+            elif kk.startswith("rmse"):
+                sum_err[kk] += ee * ee * ss
+            else:
+                raise RuntimeError("unknown error type")
+            sum_siz[kk] += ss
+    for kk in sum_err.keys():
+        if kk.startswith("mae"):
+            sum_err[kk] = sum_err[kk] / sum_siz[kk]
+        elif kk.startswith("rmse"):
+            sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk])
+        else:
+            raise RuntimeError("unknown error type")
+    return sum_err
diff --git a/deepmd_utils/__init__.py b/deepmd_utils/__init__.py
deleted file mode 100644
index 1c5314bb7e..0000000000
--- a/deepmd_utils/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""Untilization methods for DeePMD-kit.
-
-The __init__ module should not import any modules
-for performance.
-"""
diff --git a/deepmd_utils/common.py b/deepmd_utils/common.py
deleted file mode 100644
index b594c54030..0000000000
--- a/deepmd_utils/common.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import json
-import warnings
-from pathlib import (
-    Path,
-)
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    List,
-    Optional,
-    TypeVar,
-    Union,
-)
-
-try:
-    from typing import Literal  # python >=3.8
-except ImportError:
-    from typing_extensions import Literal  # type: ignore
-
-import numpy as np
-import yaml
-
-from deepmd_utils.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-)
-from deepmd_utils.utils.path import (
-    DPPath,
-)
-
-__all__ = [
-    "data_requirement",
-    "add_data_requirement",
-    "select_idx_map",
-    "make_default_mesh",
-    "j_must_have",
-    "j_loader",
-    "expand_sys_str",
-    "get_np_precision",
-]
-
-
-if TYPE_CHECKING:
-    _DICT_VAL = TypeVar("_DICT_VAL")
-    _PRECISION = Literal["default", "float16", "float32", "float64"]
-    _ACTIVATION = Literal[
-        "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"
-    ]
-    __all__.extend(
-        [
-            "_DICT_VAL",
-            "_PRECISION",
-            "_ACTIVATION",
-        ]
-    )
-
-
-# TODO this is not a good way to do things. This is some global variable to which
-# TODO anyone can write and there is no good way to keep track of the changes
-data_requirement = {}
-
-
-def add_data_requirement(
-    key: str,
-    ndof: int,
-    atomic: bool = False,
-    must: bool = False,
-    high_prec: bool = False,
-    type_sel: Optional[bool] = None,
-    repeat: int = 1,
-    default: float = 0.0,
-    dtype: Optional[np.dtype] = None,
-):
-    """Specify data requirements for training.
-
-    Parameters
-    ----------
-    key : str
-        type of data stored in corresponding `*.npy` file e.g. `forces` or `energy`
-    ndof : int
-        number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces
-        have `atomic=True` and `ndof=3`
-    atomic : bool, optional
-        specifies whwther the `ndof` keyworrd applies to per atom quantity or not,
-        by default False
-    must : bool, optional
-        specifi if the `*.npy` data file must exist, by default False
-    high_prec : bool, optional
-        if true load data to `np.float64` else `np.float32`, by default False
-    type_sel : bool, optional
-        select only certain type of atoms, by default None
-    repeat : int, optional
-        if specify repaeat data `repeat` times, by default 1
-    default : float, optional, default=0.
-        default value of data
-    dtype : np.dtype, optional
-        the dtype of data, overwrites `high_prec` if provided
-    """
-    data_requirement[key] = {
-        "ndof": ndof,
-        "atomic": atomic,
-        "must": must,
-        "high_prec": high_prec,
-        "type_sel": type_sel,
-        "repeat": repeat,
-        "default": default,
-        "dtype": dtype,
-    }
-
-
-def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
-    """Build map of indices for element supplied element types from all atoms list.
-
-    Parameters
-    ----------
-    atom_types : np.ndarray
-        array specifing type for each atoms as integer
-    select_types : np.ndarray
-        types of atoms you want to find indices for
-
-    Returns
-    -------
-    np.ndarray
-        indices of types of atoms defined by `select_types` in `atom_types` array
-
-    Warnings
-    --------
-    `select_types` array will be sorted before finding indices in `atom_types`
-    """
-    sort_select_types = np.sort(select_types)
-    idx_map = []
-    for ii in sort_select_types:
-        idx_map.append(np.where(atom_types == ii)[0])
-    return np.concatenate(idx_map)
-
-
-def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
-    """Make mesh.
-
-    Only the size of mesh matters, not the values:
-    * 6 for PBC, no mixed types
-    * 0 for no PBC, no mixed types
-    * 7 for PBC, mixed types
-    * 1 for no PBC, mixed types
-
-    Parameters
-    ----------
-    pbc : bool
-        if True, the mesh will be made for periodic boundary conditions
-    mixed_type : bool
-        if True, the mesh will be made for mixed types
-
-    Returns
-    -------
-    np.ndarray
-        mesh
-    """
-    mesh_size = int(pbc) * 6 + int(mixed_type)
-    default_mesh = np.zeros(mesh_size, dtype=np.int32)
-    return default_mesh
-
-
-# TODO maybe rename this to j_deprecated and only warn about deprecated keys,
-# TODO if the deprecated_key argument is left empty function puppose is only custom
-# TODO error since dict[key] already raises KeyError when the key is missing
-def j_must_have(
-    jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
-) -> "_DICT_VAL":
-    """Assert that supplied dictionary conaines specified key.
-
-    Returns
-    -------
-    _DICT_VAL
-        value that was store unde supplied key
-
-    Raises
-    ------
-    RuntimeError
-        if the key is not present
-    """
-    if key not in jdata.keys():
-        for ii in deprecated_key:
-            if ii in jdata.keys():
-                warnings.warn(f"the key {ii} is deprecated, please use {key} instead")
-                return jdata[ii]
-        else:
-            raise RuntimeError(f"json database must provide key {key}")
-    else:
-        return jdata[key]
-
-
-def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
-    """Load yaml or json settings file.
-
-    Parameters
-    ----------
-    filename : Union[str, Path]
-        path to file
-
-    Returns
-    -------
-    Dict[str, Any]
-        loaded dictionary
-
-    Raises
-    ------
-    TypeError
-        if the supplied file is of unsupported type
-    """
-    filepath = Path(filename)
-    if filepath.suffix.endswith("json"):
-        with filepath.open() as fp:
-            return json.load(fp)
-    elif filepath.suffix.endswith(("yml", "yaml")):
-        with filepath.open() as fp:
-            return yaml.safe_load(fp)
-    else:
-        raise TypeError("config file must be json, or yaml/yml")
-
-
-# TODO port completely to pathlib when all callers are ported
-def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
-    """Recursively iterate over directories taking those that contain `type.raw` file.
-
-    Parameters
-    ----------
-    root_dir : Union[str, Path]
-        starting directory
-
-    Returns
-    -------
-    List[str]
-        list of string pointing to system directories
-    """
-    root_dir = DPPath(root_dir)
-    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
-    if (root_dir / "type.raw").is_file():
-        matches.append(str(root_dir))
-    return matches
-
-
-def get_np_precision(precision: "_PRECISION") -> np.dtype:
-    """Get numpy precision constant from string.
-
-    Parameters
-    ----------
-    precision : _PRECISION
-        string name of numpy constant or default
-
-    Returns
-    -------
-    np.dtype
-        numpy presicion constant
-
-    Raises
-    ------
-    RuntimeError
-        if string is invalid
-    """
-    if precision == "default":
-        return GLOBAL_NP_FLOAT_PRECISION
-    elif precision == "float16":
-        return np.float16
-    elif precision == "float32":
-        return np.float32
-    elif precision == "float64":
-        return np.float64
-    else:
-        raise RuntimeError(f"{precision} is not a valid precision")
diff --git a/deepmd_utils/entrypoints/doc.py b/deepmd_utils/entrypoints/doc.py
deleted file mode 100644
index 9f1fd39095..0000000000
--- a/deepmd_utils/entrypoints/doc.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module that prints train input arguments docstrings."""
-
-from deepmd_utils.utils.argcheck import (
-    gen_doc,
-    gen_json,
-)
-
-__all__ = ["doc_train_input"]
-
-
-def doc_train_input(*, out_type: str = "rst", **kwargs):
-    """Print out trining input arguments to console."""
-    if out_type == "rst":
-        doc_str = gen_doc(make_anchor=True)
-    elif out_type == "json":
-        doc_str = gen_json()
-    else:
-        raise RuntimeError("Unsupported out type %s" % out_type)
-    print(doc_str)
diff --git a/deepmd_utils/entrypoints/gui.py b/deepmd_utils/entrypoints/gui.py
deleted file mode 100644
index 8b6b9e0a09..0000000000
--- a/deepmd_utils/entrypoints/gui.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""DP-GUI entrypoint."""
-
-
-def start_dpgui(*, port: int, bind_all: bool, **kwargs):
-    """Host DP-GUI server.
-
-    Parameters
-    ----------
-    port : int
-        The port to serve DP-GUI on.
-    bind_all : bool
-        Serve on all public interfaces. This will expose your DP-GUI instance
-        to the network on both IPv4 and IPv6 (where available).
-    **kwargs
-        additional arguments
-
-    Raises
-    ------
-    ModuleNotFoundError
-        The dpgui package is not installed
-    """
-    try:
-        from dpgui import (
-            start_dpgui,
-        )
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError(
-            "To use DP-GUI, please install the dpgui package:\npip install dpgui"
-        ) from e
-    start_dpgui(port=port, bind_all=bind_all)
diff --git a/deepmd_utils/env.py b/deepmd_utils/env.py
deleted file mode 100644
index b1d4958ed8..0000000000
--- a/deepmd_utils/env.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import os
-
-import numpy as np
-
-__all__ = [
-    "GLOBAL_NP_FLOAT_PRECISION",
-    "GLOBAL_ENER_FLOAT_PRECISION",
-    "global_float_prec",
-]
-
-# FLOAT_PREC
-dp_float_prec = os.environ.get("DP_INTERFACE_PREC", "high").lower()
-if dp_float_prec in ("high", ""):
-    # default is high
-    GLOBAL_NP_FLOAT_PRECISION = np.float64
-    GLOBAL_ENER_FLOAT_PRECISION = np.float64
-    global_float_prec = "double"
-elif dp_float_prec == "low":
-    GLOBAL_NP_FLOAT_PRECISION = np.float32
-    GLOBAL_ENER_FLOAT_PRECISION = np.float64
-    global_float_prec = "float"
-else:
-    raise RuntimeError(
-        "Unsupported float precision option: %s. Supported: high,"
-        "low. Please set precision with environmental variable "
-        "DP_INTERFACE_PREC." % dp_float_prec
-    )
diff --git a/deepmd_utils/loggers/loggers.py b/deepmd_utils/loggers/loggers.py
deleted file mode 100644
index 015581f6bd..0000000000
--- a/deepmd_utils/loggers/loggers.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""Logger initialization for package."""
-
-import logging
-import os
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-)
-
-if TYPE_CHECKING:
-    from pathlib import (
-        Path,
-    )
-
-    from mpi4py import (
-        MPI,
-    )
-
-    _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND
-
-logging.getLogger(__name__)
-
-__all__ = ["set_log_handles"]
-
-# logger formater
-FFORMATTER = logging.Formatter(
-    "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
-)
-CFORMATTER = logging.Formatter(
-    #    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
-    "%(app_name)s %(levelname)-7s %(message)s"
-)
-FFORMATTER_MPI = logging.Formatter(
-    "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s"
-)
-CFORMATTER_MPI = logging.Formatter(
-    #    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
-    "%(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s"
-)
-
-
-class _AppFilter(logging.Filter):
-    """Add field `app_name` to log messages."""
-
-    def filter(self, record):
-        record.app_name = "DEEPMD"
-        return True
-
-
-class _MPIRankFilter(logging.Filter):
-    """Add MPI rank number to log messages, adds field `rank`."""
-
-    def __init__(self, rank: int) -> None:
-        super().__init__(name="MPI_rank_id")
-        self.mpi_rank = str(rank)
-
-    def filter(self, record):
-        record.rank = self.mpi_rank
-        return True
-
-
-class _MPIMasterFilter(logging.Filter):
-    """Filter that lets through only messages emited from rank==0."""
-
-    def __init__(self, rank: int) -> None:
-        super().__init__(name="MPI_master_log")
-        self.mpi_rank = rank
-
-    def filter(self, record):
-        if self.mpi_rank == 0:
-            return True
-        else:
-            return False
-
-
-class _MPIFileStream:
-    """Wrap MPI.File` so it has the same API as python file streams.
-
-    Parameters
-    ----------
-    filename : Path
-        disk location of the file stream
-    MPI : MPI
-        MPI communicator object
-    mode : str, optional
-        file write mode, by default _MPI_APPEND_MODE
-    """
-
-    def __init__(
-        self, filename: "Path", MPI: "MPI", mode: str = "_MPI_APPEND_MODE"
-    ) -> None:
-        self.stream = MPI.File.Open(MPI.COMM_WORLD, filename, mode)
-        self.stream.Set_atomicity(True)
-        self.name = "MPIfilestream"
-
-    def write(self, msg: str):
-        """Write to MPI shared file stream.
-
-        Parameters
-        ----------
-        msg : str
-            message to write
-        """
-        b = bytearray()
-        b.extend(map(ord, msg))
-        self.stream.Write_shared(b)
-
-    def close(self):
-        """Synchronize and close MPI file stream."""
-        self.stream.Sync()
-        self.stream.Close()
-
-
-class _MPIHandler(logging.FileHandler):
-    """Emulate `logging.FileHandler` with MPI shared File that all ranks can write to.
-
-    Parameters
-    ----------
-    filename : Path
-        file path
-    MPI : MPI
-        MPI communicator object
-    mode : str, optional
-        file access mode, by default "_MPI_APPEND_MODE"
-    """
-
-    def __init__(
-        self,
-        filename: "Path",
-        MPI: "MPI",
-        mode: str = "_MPI_APPEND_MODE",
-    ) -> None:
-        self.MPI = MPI
-        super().__init__(filename, mode=mode, encoding=None, delay=False)
-
-    def _open(self):
-        return _MPIFileStream(self.baseFilename, self.MPI, self.mode)
-
-    def setStream(self, stream):
-        """Stream canot be reasigned in MPI mode."""
-        raise NotImplementedError("Unable to do for MPI file handler!")
-
-
-def set_log_handles(
-    level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None
-):
-    """Set desired level for package loggers and add file handlers.
-
-    Parameters
-    ----------
-    level : int
-        logging level
-    log_path : Optional[str]
-        path to log file, if None logs will be send only to console. If the parent
-        directory does not exist it will be automatically created, by default None
-    mpi_log : Optional[str], optional
-        mpi log type. Has three options. `master` will output logs to file and console
-        only from rank==0. `collect` will write messages from all ranks to one file
-        opened under rank==0 and to console. `workers` will open one log file for each
-        worker designated by its rank, console behaviour is the same as for `collect`.
-        If this argument is specified, package 'mpi4py' must be already installed.
-        by default None
-
-    Raises
-    ------
-    RuntimeError
-        If the argument `mpi_log` is specified, package `mpi4py` is not installed.
-
-    References
-    ----------
-    https://groups.google.com/g/mpi4py/c/SaNzc8bdj6U
-    https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error
-    https://stackoverflow.com/questions/56085015/suppress-openmp-debug-messages-when-running-tensorflow-on-cpu
-
-    Notes
-    -----
-    Logging levels:
-
-    +---------+--------------+----------------+----------------+----------------+
-    |         | our notation | python logging | tensorflow cpp | OpenMP         |
-    +=========+==============+================+================+================+
-    | debug   | 10           | 10             | 0              | 1/on/true/yes  |
-    +---------+--------------+----------------+----------------+----------------+
-    | info    | 20           | 20             | 1              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-    | warning | 30           | 30             | 2              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-    | error   | 40           | 40             | 3              | 0/off/false/no |
-    +---------+--------------+----------------+----------------+----------------+
-
-    """
-    # silence logging for OpenMP when running on CPU if level is any other than debug
-    if level <= 10:
-        os.environ["KMP_WARNINGS"] = "FALSE"
-
-    # set TF cpp internal logging level
-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1))
-
-    # get root logger
-    root_log = logging.getLogger("deepmd")
-    root_log.propagate = False
-
-    root_log.setLevel(level)
-
-    # check if arguments are present
-    MPI = None
-    if mpi_log:
-        try:
-            from mpi4py import (
-                MPI,
-            )
-        except ImportError as e:
-            raise RuntimeError(
-                "You cannot specify 'mpi_log' when mpi4py not installed"
-            ) from e
-
-    # * add console handler ************************************************************
-    ch = logging.StreamHandler()
-    if MPI:
-        rank = MPI.COMM_WORLD.Get_rank()
-        if mpi_log == "master":
-            ch.setFormatter(CFORMATTER)
-            ch.addFilter(_MPIMasterFilter(rank))
-        else:
-            ch.setFormatter(CFORMATTER_MPI)
-            ch.addFilter(_MPIRankFilter(rank))
-    else:
-        ch.setFormatter(CFORMATTER)
-
-    ch.setLevel(level)
-    ch.addFilter(_AppFilter())
-    # clean old handlers before adding new one
-    root_log.handlers.clear()
-    root_log.addHandler(ch)
-
-    # * add file handler ***************************************************************
-    if log_path:
-        # create directory
-        log_path.parent.mkdir(exist_ok=True, parents=True)
-
-        fh = None
-
-        if mpi_log == "master":
-            rank = MPI.COMM_WORLD.Get_rank()
-            if rank == 0:
-                fh = logging.FileHandler(log_path, mode="w")
-                fh.addFilter(_MPIMasterFilter(rank))
-                fh.setFormatter(FFORMATTER)
-        elif mpi_log == "collect":
-            rank = MPI.COMM_WORLD.Get_rank()
-            fh = _MPIHandler(log_path, MPI, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE)
-            fh.addFilter(_MPIRankFilter(rank))
-            fh.setFormatter(FFORMATTER_MPI)
-        elif mpi_log == "workers":
-            rank = MPI.COMM_WORLD.Get_rank()
-            # if file has suffix than inser rank number before suffix
-            # e.g deepmd.log -> deepmd_<rank>.log
-            # if no suffix is present, insert rank as suffix
-            # e.g. deepmdlog -> deepmdlog.<rank>
-            if log_path.suffix:
-                worker_log = (log_path.parent / f"{log_path.stem}_{rank}").with_suffix(
-                    log_path.suffix
-                )
-            else:
-                worker_log = log_path.with_suffix(f".{rank}")
-
-            fh = logging.FileHandler(worker_log, mode="w")
-            fh.setFormatter(FFORMATTER)
-        else:
-            fh = logging.FileHandler(log_path, mode="w")
-            fh.setFormatter(FFORMATTER)
-
-        if fh:
-            fh.setLevel(level)
-            fh.addFilter(_AppFilter())
-            root_log.addHandler(fh)
diff --git a/deepmd_utils/model_format/common.py b/deepmd_utils/model_format/common.py
deleted file mode 100644
index d032e5d5df..0000000000
--- a/deepmd_utils/model_format/common.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from abc import (
-    ABC,
-)
-
-import numpy as np
-
-PRECISION_DICT = {
-    "float16": np.float16,
-    "float32": np.float32,
-    "float64": np.float64,
-    "half": np.float16,
-    "single": np.float32,
-    "double": np.float64,
-}
-DEFAULT_PRECISION = "float64"
-
-
-class NativeOP(ABC):
-    """The unit operation of a native model."""
-
-    def call(self, *args, **kwargs):
-        """Forward pass in NumPy implementation."""
-        raise NotImplementedError
-
-    def __call__(self, *args, **kwargs):
-        """Forward pass in NumPy implementation."""
-        return self.call(*args, **kwargs)
diff --git a/deepmd_utils/model_format/output_def.py b/deepmd_utils/model_format/output_def.py
deleted file mode 100644
index 268dc21ea6..0000000000
--- a/deepmd_utils/model_format/output_def.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
-
-
-def check_shape(
-    shape: List[int],
-    def_shape: List[int],
-):
-    """Check if the shape satisfies the defined shape."""
-    assert len(shape) == len(def_shape)
-    if def_shape[-1] == -1:
-        if list(shape[:-1]) != def_shape[:-1]:
-            raise ValueError(f"{shape[:-1]} shape not matching def {def_shape[:-1]}")
-    else:
-        if list(shape) != def_shape:
-            raise ValueError(f"{shape} shape not matching def {def_shape}")
-
-
-def check_var(var, var_def):
-    if var_def.atomic:
-        # var.shape == [nf, nloc, *var_def.shape]
-        if len(var.shape) != len(var_def.shape) + 2:
-            raise ValueError(f"{var.shape[2:]} length not matching def {var_def.shape}")
-        check_shape(list(var.shape[2:]), var_def.shape)
-    else:
-        # var.shape == [nf, *var_def.shape]
-        if len(var.shape) != len(var_def.shape) + 1:
-            raise ValueError(f"{var.shape[1:]} length not matching def {var_def.shape}")
-        check_shape(list(var.shape[1:]), var_def.shape)
-
-
-def model_check_output(cls):
-    """Check if the output of the Model is consistent with the definition.
-
-    Two methods are assumed to be provided by the Model:
-    1. Model.output_def that gives the output definition.
-    2. Model.__call__ that defines the forward path of the model.
-
-    """
-
-    class wrapper(cls):
-        def __init__(
-            self,
-            *args,
-            **kwargs,
-        ):
-            super().__init__(*args, **kwargs)
-            self.md = self.output_def()
-
-        def __call__(
-            self,
-            *args,
-            **kwargs,
-        ):
-            ret = cls.__call__(self, *args, **kwargs)
-            for kk in self.md.keys_outp():
-                dd = self.md[kk]
-                check_var(ret[kk], dd)
-                if dd.reduciable:
-                    rk = get_reduce_name(kk)
-                    check_var(ret[rk], self.md[rk])
-                if dd.differentiable:
-                    dnr, dnc = get_deriv_name(kk)
-                    check_var(ret[dnr], self.md[dnr])
-                    check_var(ret[dnc], self.md[dnc])
-            return ret
-
-    return wrapper
-
-
-def fitting_check_output(cls):
-    """Check if the output of the Fitting is consistent with the definition.
-
-    Two methods are assumed to be provided by the Fitting:
-    1. Fitting.output_def that gives the output definition.
-    2. Fitting.__call__ defines the forward path of the fitting.
-
-    """
-
-    class wrapper(cls):
-        def __init__(
-            self,
-            *args,
-            **kwargs,
-        ):
-            super().__init__(*args, **kwargs)
-            self.md = self.output_def()
-
-        def __call__(
-            self,
-            *args,
-            **kwargs,
-        ):
-            ret = cls.__call__(self, *args, **kwargs)
-            for kk in self.md.keys():
-                dd = self.md[kk]
-                check_var(ret[kk], dd)
-            return ret
-
-    return wrapper
-
-
-class OutputVariableDef:
-    """Defines the shape and other properties of the one output variable.
-
-    It is assume that the fitting network output variables for each
-    local atom. This class defines one output variable, including its
-    name, shape, reducibility and differentiability.
-
-    Parameters
-    ----------
-    name
-          Name of the output variable. Notice that the xxxx_redu,
-          xxxx_derv_c, xxxx_derv_r are reserved names that should
-          not be used to define variables.
-    shape
-          The shape of the variable. e.g. energy should be [1],
-          dipole should be [3], polarizabilty should be [3,3].
-    reduciable
-          If the variable is reduced.
-    differentiable
-          If the variable is differentiated with respect to coordinates
-          of atoms and cell tensor (pbc case). Only reduciable variable
-          are differentiable.
-
-    """
-
-    def __init__(
-        self,
-        name: str,
-        shape: List[int],
-        reduciable: bool = False,
-        differentiable: bool = False,
-        atomic: bool = True,
-    ):
-        self.name = name
-        self.shape = list(shape)
-        self.atomic = atomic
-        self.reduciable = reduciable
-        self.differentiable = differentiable
-        if not self.reduciable and self.differentiable:
-            raise ValueError("only reduciable variable are differentiable")
-
-
-class FittingOutputDef:
-    """Defines the shapes and other properties of the fitting network outputs.
-
-    It is assume that the fitting network output variables for each
-    local atom. This class defines all the outputs.
-
-    Parameters
-    ----------
-    var_defs
-          List of output variable definitions.
-
-    """
-
-    def __init__(
-        self,
-        var_defs: List[OutputVariableDef],
-    ):
-        self.var_defs = {vv.name: vv for vv in var_defs}
-
-    def __getitem__(
-        self,
-        key: str,
-    ) -> OutputVariableDef:
-        return self.var_defs[key]
-
-    def get_data(self) -> Dict[str, OutputVariableDef]:
-        return self.var_defs
-
-    def keys(self):
-        return self.var_defs.keys()
-
-
-class ModelOutputDef:
-    """Defines the shapes and other properties of the model outputs.
-
-    The model reduce and differentiate fitting outputs if applicable.
-    If a variable is named by foo, then the reduced variable is called
-    foo_redu, the derivative w.r.t. coordinates is called foo_derv_r
-    and the derivative w.r.t. cell is called foo_derv_c.
-
-    Parameters
-    ----------
-    fit_defs
-          Definition for the fitting net output
-
-    """
-
-    def __init__(
-        self,
-        fit_defs: FittingOutputDef,
-    ):
-        self.def_outp = fit_defs
-        self.def_redu = do_reduce(self.def_outp)
-        self.def_derv_r, self.def_derv_c = do_derivative(self.def_outp)
-        self.var_defs: Dict[str, OutputVariableDef] = {}
-        for ii in [
-            self.def_outp.get_data(),
-            self.def_redu,
-            self.def_derv_c,
-            self.def_derv_r,
-        ]:
-            self.var_defs.update(ii)
-
-    def __getitem__(
-        self,
-        key: str,
-    ) -> OutputVariableDef:
-        return self.var_defs[key]
-
-    def get_data(
-        self,
-        key: str,
-    ) -> Dict[str, OutputVariableDef]:
-        return self.var_defs
-
-    def keys(self):
-        return self.var_defs.keys()
-
-    def keys_outp(self):
-        return self.def_outp.keys()
-
-    def keys_redu(self):
-        return self.def_redu.keys()
-
-    def keys_derv_r(self):
-        return self.def_derv_r.keys()
-
-    def keys_derv_c(self):
-        return self.def_derv_c.keys()
-
-
-def get_reduce_name(name: str) -> str:
-    return name + "_redu"
-
-
-def get_deriv_name(name: str) -> Tuple[str, str]:
-    return name + "_derv_r", name + "_derv_c"
-
-
-def do_reduce(
-    def_outp: FittingOutputDef,
-) -> Dict[str, OutputVariableDef]:
-    def_redu: Dict[str, OutputVariableDef] = {}
-    for kk, vv in def_outp.get_data().items():
-        if vv.reduciable:
-            rk = get_reduce_name(kk)
-            def_redu[rk] = OutputVariableDef(
-                rk, vv.shape, reduciable=False, differentiable=False, atomic=False
-            )
-    return def_redu
-
-
-def do_derivative(
-    def_outp: FittingOutputDef,
-) -> Tuple[Dict[str, OutputVariableDef], Dict[str, OutputVariableDef]]:
-    def_derv_r: Dict[str, OutputVariableDef] = {}
-    def_derv_c: Dict[str, OutputVariableDef] = {}
-    for kk, vv in def_outp.get_data().items():
-        if vv.differentiable:
-            rkr, rkc = get_deriv_name(kk)
-            def_derv_r[rkr] = OutputVariableDef(
-                rkr,
-                vv.shape + [3],  # noqa: RUF005
-                reduciable=False,
-                differentiable=False,
-            )
-            def_derv_c[rkc] = OutputVariableDef(
-                rkc,
-                vv.shape + [3, 3],  # noqa: RUF005
-                reduciable=True,
-                differentiable=False,
-            )
-    return def_derv_r, def_derv_c
diff --git a/deepmd_utils/utils/__init__.py b/deepmd_utils/utils/__init__.py
deleted file mode 100644
index bac6924ac1..0000000000
--- a/deepmd_utils/utils/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# For performance, do not add things to this file
-# import submodules instead
diff --git a/deepmd_utils/utils/argcheck.py b/deepmd_utils/utils/argcheck.py
deleted file mode 100644
index 6c51a7b859..0000000000
--- a/deepmd_utils/utils/argcheck.py
+++ /dev/null
@@ -1,2028 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import json
-import logging
-from typing import (
-    Callable,
-    List,
-    Optional,
-)
-
-from dargs import (
-    Argument,
-    ArgumentEncoder,
-    Variant,
-    dargs,
-)
-
-from deepmd.common import (
-    ACTIVATION_FN_DICT,
-    PRECISION_DICT,
-)
-from deepmd_utils.utils.argcheck_nvnmd import (
-    nvnmd_args,
-)
-from deepmd_utils.utils.plugin import (
-    Plugin,
-)
-
-log = logging.getLogger(__name__)
-
-
-def list_to_doc(xx):
-    items = []
-    for ii in xx:
-        if len(items) == 0:
-            items.append(f'"{ii}"')
-        else:
-            items.append(f', "{ii}"')
-    items.append(".")
-    return "".join(items)
-
-
-def make_link(content, ref_key):
-    return (
-        f"`{content} <{ref_key}_>`_"
-        if not dargs.RAW_ANCHOR
-        else f"`{content} <#{ref_key}>`_"
-    )
-
-
-def type_embedding_args():
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_seed = "Random seed for parameter initialization"
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-
-    return [
-        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, default=None, doc=doc_seed),
-    ]
-
-
-def spin_args():
-    doc_use_spin = "Whether to use atomic spin model for each atom type"
-    doc_spin_norm = "The magnitude of atomic spin for each atom type with spin"
-    doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
-
-    return [
-        Argument("use_spin", List[bool], doc=doc_use_spin),
-        Argument("spin_norm", List[float], doc=doc_spin_norm),
-        Argument("virtual_len", List[float], doc=doc_virtual_len),
-    ]
-
-
-#  --- Descriptor configurations: --- #
-
-
-class ArgsPlugin:
-    def __init__(self) -> None:
-        self.__plugin = Plugin()
-
-    def register(
-        self, name: str, alias: Optional[List[str]] = None
-    ) -> Callable[[], List[Argument]]:
-        """Register a descriptor argument plugin.
-
-        Parameters
-        ----------
-        name : str
-            the name of a descriptor
-        alias : List[str], optional
-            the list of aliases of this descriptor
-
-        Returns
-        -------
-        Callable[[], List[Argument]]
-            the registered descriptor argument method
-
-        Examples
-        --------
-        >>> some_plugin = ArgsPlugin()
-        >>> @some_plugin.register("some_descrpt")
-            def descrpt_some_descrpt_args():
-                return []
-        """
-        # convert alias to hashed item
-        if isinstance(alias, list):
-            alias = tuple(alias)
-        return self.__plugin.register((name, alias))
-
-    def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
-        """Get all arguments.
-
-        Parameters
-        ----------
-        exclude_hybrid : bool
-            exclude hybrid descriptor to prevent circular calls
-
-        Returns
-        -------
-        List[Argument]
-            all arguments
-        """
-        arguments = []
-        for (name, alias), metd in self.__plugin.plugins.items():
-            if exclude_hybrid and name == "hybrid":
-                continue
-            arguments.append(
-                Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias)
-            )
-        return arguments
-
-
-descrpt_args_plugin = ArgsPlugin()
-
-
-@descrpt_args_plugin.register("loc_frame")
-def descrpt_local_frame_args():
-    doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
-    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
-    doc_rcut = "The cut-off radius. The default value is 6.0"
-    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
-- axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
-- axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
-- axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
-- axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
-- axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
-- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
-
-    return [
-        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
-        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
-    ]
-
-
-@descrpt_args_plugin.register("se_e2_a", alias=["se_a"])
-def descrpt_se_a_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"])
-def descrpt_se_t_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"])
-def descrpt_se_a_tpe_args():
-    doc_type_nchanl = "number of channels for type embedding"
-    doc_type_nlayer = "number of hidden layers of type embedding net"
-    doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded."
-
-    return [
-        *descrpt_se_a_args(),
-        Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl),
-        Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-    ]
-
-
-@descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
-def descrpt_se_r_args():
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("hybrid")
-def descrpt_hybrid_args():
-    doc_list = "A list of descriptor definitions"
-
-    return [
-        Argument(
-            "list",
-            list,
-            optional=False,
-            doc=doc_list,
-            repeat=True,
-            sub_fields=[],
-            sub_variants=[descrpt_variant_type_args(exclude_hybrid=True)],
-            fold_subdoc=True,
-        )
-    ]
-
-
-def descrpt_se_atten_common_args():
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
-    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = "The cut-off radius."
-    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_attn = "The length of hidden vectors in attention layers"
-    doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True"
-    doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
-    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
-
-    return [
-        Argument(
-            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
-        ),
-        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument("attn", int, optional=True, default=128, doc=doc_attn),
-        Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
-        Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
-        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
-    ]
-
-
-@descrpt_args_plugin.register("se_atten")
-def descrpt_se_atten_args():
-    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
-    doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
-
-    return [
-        *descrpt_se_atten_common_args(),
-        Argument(
-            "stripped_type_embedding",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_stripped_type_embedding,
-        ),
-        Argument(
-            "smooth_type_embdding",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_smooth_type_embdding,
-        ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_atten_v2")
-def descrpt_se_atten_v2_args():
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
-
-    return [
-        *descrpt_se_atten_common_args(),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
-        ),
-    ]
-
-
-@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"])
-def descrpt_se_a_ebd_v2_args():
-    return descrpt_se_a_args()
-
-
-@descrpt_args_plugin.register("se_a_mask")
-def descrpt_se_a_mask_args():
-    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
-    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
-    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
-    doc_seed = "Random seed for parameter initialization"
-
-    return [
-        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
-        Argument(
-            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
-        ),
-        Argument(
-            "axis_neuron",
-            int,
-            optional=True,
-            default=4,
-            alias=["n_axis_neuron"],
-            doc=doc_axis_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument(
-            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
-        ),
-        Argument(
-            "exclude_types",
-            List[List[int]],
-            optional=True,
-            default=[],
-            doc=doc_exclude_types,
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
-    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
-    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
-    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
-    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
-    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
-    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
-    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
-    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
-    doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
-- `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
-- `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
-- `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
-- `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
-- `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
-- `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
-- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
-- `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
-- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
-
-    return Variant(
-        "type",
-        descrpt_args_plugin.get_all_argument(exclude_hybrid=exclude_hybrid),
-        doc=doc_descrpt_type,
-    )
-
-
-#  --- Fitting net configurations: --- #
-fitting_args_plugin = ArgsPlugin()
-
-
-@fitting_args_plugin.register("ener")
-def fitting_ener():
-    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
-    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
-- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
-    doc_layer_name = (
-        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
-        "If two layers, either in the same fitting or different fittings, "
-        "have the same name, they will share the same neural network parameters. "
-        "The shape of these layers should be the same. "
-        "If null is given for a layer, parameters will not be shared."
-    )
-    doc_use_aparam_as_mask = (
-        "Whether to use the aparam as a mask in input."
-        "If True, the aparam will not be used in fitting net for embedding."
-        "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True."
-    )
-
-    return [
-        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument(
-            "trainable",
-            [List[bool], bool],
-            optional=True,
-            default=True,
-            doc=doc_trainable,
-        ),
-        Argument(
-            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "atom_ener",
-            List[Optional[float]],
-            optional=True,
-            default=[],
-            doc=doc_atom_ener,
-        ),
-        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
-        Argument(
-            "use_aparam_as_mask",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_use_aparam_as_mask,
-        ),
-    ]
-
-
-@fitting_args_plugin.register("dos")
-def fitting_dos():
-    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
-    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
-- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
-    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    doc_numb_dos = (
-        "The number of gridpoints on which the DOS is evaluated (NEDOS in VASP)"
-    )
-
-    return [
-        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
-        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-        Argument(
-            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("precision", str, optional=True, default="float64", doc=doc_precision),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument(
-            "trainable",
-            [List[bool], bool],
-            optional=True,
-            default=True,
-            doc=doc_trainable,
-        ),
-        Argument(
-            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument("numb_dos", int, optional=True, default=300, doc=doc_numb_dos),
-    ]
-
-
-@fitting_args_plugin.register("polar")
-def fitting_polar():
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``"
-    # doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
-    doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix."
-    doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-
-    # YWolfeee: user can decide whether to use shift diag
-    doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true."
-
-    return [
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
-        Argument(
-            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
-        ),
-        # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
-        Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
-        Argument(
-            "sel_type",
-            [List[int], int, None],
-            optional=True,
-            alias=["pol_type"],
-            doc=doc_sel_type,
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-# def fitting_global_polar():
-#    return fitting_polar()
-
-
-@fitting_args_plugin.register("dipole")
-def fitting_dipole():
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
-    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
-    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected."
-    doc_seed = "Random seed for parameter initialization of the fitting net"
-    return [
-        Argument(
-            "neuron",
-            List[int],
-            optional=True,
-            default=[120, 120, 120],
-            alias=["n_neuron"],
-            doc=doc_neuron,
-        ),
-        Argument(
-            "activation_function",
-            str,
-            optional=True,
-            default="tanh",
-            doc=doc_activation_function,
-        ),
-        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
-        Argument("precision", str, optional=True, default="default", doc=doc_precision),
-        Argument(
-            "sel_type",
-            [List[int], int, None],
-            optional=True,
-            alias=["dipole_type"],
-            doc=doc_sel_type,
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-    ]
-
-
-#   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
-def fitting_variant_type_args():
-    doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\
-- `ener`: Fit an energy model (potential energy surface).\n\n\
-- `dos` : Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter. \n\n\
-- `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
-- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
-
-    return Variant(
-        "type",
-        fitting_args_plugin.get_all_argument(),
-        optional=True,
-        default_tag="ener",
-        doc=doc_descrpt_type,
-    )
-
-
-#  --- Modifier configurations: --- #
-def modifier_dipole_charge():
-    doc_model_name = "The name of the frozen dipole model file."
-    doc_model_charge_map = f"The charge of the WFCC. The list length should be the same as the {make_link('sel_type', 'model/fitting_net[dipole]/sel_type')}. "
-    doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
-    doc_ewald_h = "The grid spacing of the FFT grid. Unit is A"
-    doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
-
-    return [
-        Argument("model_name", str, optional=False, doc=doc_model_name),
-        Argument(
-            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
-        ),
-        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
-        Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
-        Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
-    ]
-
-
-def modifier_variant_type_args():
-    doc_modifier_type = "The type of modifier. See explanation below.\n\n\
--`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
-    return Variant(
-        "type",
-        [
-            Argument("dipole_charge", dict, modifier_dipole_charge()),
-        ],
-        optional=False,
-        doc=doc_modifier_type,
-    )
-
-
-#  --- model compression configurations: --- #
-def model_compression():
-    doc_model_file = "The input model file, which will be compressed by the DeePMD-kit."
-    doc_table_config = "The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
-    doc_min_nbor_dist = (
-        "The nearest distance between neighbor atoms saved in the frozen model."
-    )
-
-    return [
-        Argument("model_file", str, optional=False, doc=doc_model_file),
-        Argument("table_config", List[float], optional=False, doc=doc_table_config),
-        Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
-    ]
-
-
-#  --- model compression configurations: --- #
-def model_compression_type_args():
-    doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
-
-    return Variant(
-        "type",
-        [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])],
-        optional=True,
-        default_tag="se_e2_a",
-        doc=doc_compress_type,
-    )
-
-
-def model_args(exclude_hybrid=False):
-    doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
-    doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
-    doc_data_stat_protect = "Protect parameter for atomic energy regression."
-    doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
-    doc_type_embedding = "The type embedding."
-    doc_modifier = "The modifier of model output."
-    doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
-    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
-    doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
-    doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
-    doc_srtab_add_bias = "Whether add energy bias from the statistics of the data to short-range tabulated atomic energy. It only takes effect when `use_srtab` is provided."
-    doc_compress_config = "Model compression configurations"
-    doc_spin = "The settings for systems with spin."
-    hybrid_models = []
-    if not exclude_hybrid:
-        hybrid_models.extend(
-            [
-                pairwise_dprc(),
-                linear_ener_model_args(),
-            ]
-        )
-    return Argument(
-        "model",
-        dict,
-        [
-            Argument("type_map", List[str], optional=True, doc=doc_type_map),
-            Argument(
-                "data_stat_nbatch",
-                int,
-                optional=True,
-                default=10,
-                doc=doc_data_stat_nbatch,
-            ),
-            Argument(
-                "data_stat_protect",
-                float,
-                optional=True,
-                default=1e-2,
-                doc=doc_data_stat_protect,
-            ),
-            Argument(
-                "data_bias_nsample",
-                int,
-                optional=True,
-                default=10,
-                doc=doc_data_bias_nsample,
-            ),
-            Argument("use_srtab", str, optional=True, doc=doc_use_srtab),
-            Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha),
-            Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin),
-            Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax),
-            Argument(
-                "srtab_add_bias",
-                bool,
-                optional=True,
-                default=True,
-                doc=doc_srtab_add_bias,
-            ),
-            Argument(
-                "type_embedding",
-                dict,
-                type_embedding_args(),
-                [],
-                optional=True,
-                doc=doc_type_embedding,
-            ),
-            Argument(
-                "modifier",
-                dict,
-                [],
-                [modifier_variant_type_args()],
-                optional=True,
-                doc=doc_modifier,
-            ),
-            Argument(
-                "compress",
-                dict,
-                [],
-                [model_compression_type_args()],
-                optional=True,
-                doc=doc_compress_config,
-                fold_subdoc=True,
-            ),
-            Argument("spin", dict, spin_args(), [], optional=True, doc=doc_spin),
-        ],
-        [
-            Variant(
-                "type",
-                [
-                    standard_model_args(),
-                    multi_model_args(),
-                    frozen_model_args(),
-                    pairtab_model_args(),
-                    *hybrid_models,
-                ],
-                optional=True,
-                default_tag="standard",
-            ),
-        ],
-    )
-
-
-def standard_model_args() -> Argument:
-    doc_descrpt = "The descriptor of atomic environment."
-    doc_fitting = "The fitting of physical properties."
-
-    ca = Argument(
-        "standard",
-        dict,
-        [
-            Argument(
-                "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt
-            ),
-            Argument(
-                "fitting_net",
-                dict,
-                [],
-                [fitting_variant_type_args()],
-                doc=doc_fitting,
-            ),
-        ],
-        doc="Stardard model, which contains a descriptor and a fitting.",
-    )
-    return ca
-
-
-def multi_model_args() -> Argument:
-    doc_descrpt = "The descriptor of atomic environment. See model[standard]/descriptor for details."
-    doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`."
-
-    ca = Argument(
-        "multi",
-        dict,
-        [
-            Argument(
-                "descriptor",
-                dict,
-                [],
-                [descrpt_variant_type_args()],
-                doc=doc_descrpt,
-                fold_subdoc=True,
-            ),
-            Argument("fitting_net_dict", dict, doc=doc_fitting_net_dict),
-        ],
-        doc="Multiple-task model.",
-    )
-    return ca
-
-
-def pairwise_dprc() -> Argument:
-    qm_model_args = model_args(exclude_hybrid=True)
-    qm_model_args.name = "qm_model"
-    qm_model_args.fold_subdoc = True
-    qmmm_model_args = model_args(exclude_hybrid=True)
-    qmmm_model_args.name = "qmmm_model"
-    qmmm_model_args.fold_subdoc = True
-    ca = Argument(
-        "pairwise_dprc",
-        dict,
-        [
-            qm_model_args,
-            qmmm_model_args,
-        ],
-    )
-    return ca
-
-
-def frozen_model_args() -> Argument:
-    doc_model_file = "Path to the frozen model file."
-    ca = Argument(
-        "frozen",
-        dict,
-        [
-            Argument("model_file", str, optional=False, doc=doc_model_file),
-        ],
-    )
-    return ca
-
-
-def pairtab_model_args() -> Argument:
-    doc_tab_file = "Path to the tabulation file."
-    doc_rcut = "The cut-off radius."
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
-    - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    ca = Argument(
-        "pairtab",
-        dict,
-        [
-            Argument("tab_file", str, optional=False, doc=doc_tab_file),
-            Argument("rcut", float, optional=False, doc=doc_rcut),
-            Argument("sel", [int, List[int], str], optional=False, doc=doc_sel),
-        ],
-        doc="Pairwise tabulation energy model.",
-    )
-    return ca
-
-
-def linear_ener_model_args() -> Argument:
-    doc_weights = (
-        "If the type is list of float, a list of weights for each model. "
-        'If "mean", the weights are set to be 1 / len(models). '
-        'If "sum", the weights are set to be 1.'
-    )
-    models_args = model_args(exclude_hybrid=True)
-    models_args.name = "models"
-    models_args.fold_subdoc = True
-    models_args.set_dtype(list)
-    models_args.set_repeat(True)
-    models_args.doc = "The sub-models."
-    ca = Argument(
-        "linear_ener",
-        dict,
-        [
-            models_args,
-            Argument(
-                "weights",
-                [list, str],
-                optional=False,
-                doc=doc_weights,
-            ),
-        ],
-    )
-    return ca
-
-
-#  --- Learning rate configurations: --- #
-def learning_rate_exp():
-    doc_start_lr = "The learning rate at the start of the training."
-    doc_stop_lr = "The desired learning rate at the end of the training."
-    doc_decay_steps = (
-        "The learning rate is decaying every this number of training steps."
-    )
-
-    args = [
-        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
-        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
-        Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
-    ]
-    return args
-
-
-def learning_rate_variant_type_args():
-    doc_lr = "The type of the learning rate."
-
-    return Variant(
-        "type",
-        [Argument("exp", dict, learning_rate_exp())],
-        optional=True,
-        default_tag="exp",
-        doc=doc_lr,
-    )
-
-
-def learning_rate_args():
-    doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
-    doc_lr = "The definitio of learning rate"
-    return Argument(
-        "learning_rate",
-        dict,
-        [
-            Argument(
-                "scale_by_worker",
-                str,
-                optional=True,
-                default="linear",
-                doc=doc_scale_by_worker,
-            )
-        ],
-        [learning_rate_variant_type_args()],
-        optional=True,
-        doc=doc_lr,
-    )
-
-
-def learning_rate_dict_args():
-    doc_learning_rate_dict = (
-        "The dictionary of definitions of learning rates in multi-task mode. "
-        "Each learning_rate_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of learning rate.\n"
-    )
-    ca = Argument(
-        "learning_rate_dict", dict, [], [], optional=True, doc=doc_learning_rate_dict
-    )
-    return ca
-
-
-#  --- Loss configurations: --- #
-def start_pref(item, label=None, abbr=None):
-    if label is None:
-        label = item
-    if abbr is None:
-        abbr = item
-    return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {label} label should be provided by file {label}.npy in each data system. If both start_pref_{abbr} and limit_pref_{abbr} are set to 0, then the {item} will be ignored."
-
-
-def limit_pref(item):
-    return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity."
-
-
-loss_args_plugin = ArgsPlugin()
-
-
-@loss_args_plugin.register("ener")
-def loss_ener():
-    doc_start_pref_e = start_pref("energy", abbr="e")
-    doc_limit_pref_e = limit_pref("energy")
-    doc_start_pref_f = start_pref("force", abbr="f")
-    doc_limit_pref_f = limit_pref("force")
-    doc_start_pref_v = start_pref("virial", abbr="v")
-    doc_limit_pref_v = limit_pref("virial")
-    doc_start_pref_ae = start_pref("atomic energy", label="atom_ener", abbr="ae")
-    doc_limit_pref_ae = limit_pref("atomic energy")
-    doc_start_pref_pf = start_pref(
-        "atomic prefactor force", label="atom_pref", abbr="pf"
-    )
-    doc_limit_pref_pf = limit_pref("atomic prefactor force")
-    doc_start_pref_gf = start_pref("generalized force", label="drdq", abbr="gf")
-    doc_limit_pref_gf = limit_pref("generalized force")
-    doc_numb_generalized_coord = "The dimension of generalized coordinates. Required when generalized force loss is used."
-    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
-    doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
-    return [
-        Argument(
-            "start_pref_e",
-            [float, int],
-            optional=True,
-            default=0.02,
-            doc=doc_start_pref_e,
-        ),
-        Argument(
-            "limit_pref_e",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_e,
-        ),
-        Argument(
-            "start_pref_f",
-            [float, int],
-            optional=True,
-            default=1000,
-            doc=doc_start_pref_f,
-        ),
-        Argument(
-            "limit_pref_f",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_f,
-        ),
-        Argument(
-            "start_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_v,
-        ),
-        Argument(
-            "limit_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_v,
-        ),
-        Argument(
-            "start_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_ae,
-        ),
-        Argument(
-            "limit_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_ae,
-        ),
-        Argument(
-            "start_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_pf,
-        ),
-        Argument(
-            "limit_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_pf,
-        ),
-        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
-        Argument(
-            "enable_atom_ener_coeff",
-            [bool],
-            optional=True,
-            default=False,
-            doc=doc_enable_atom_ener_coeff,
-        ),
-        Argument(
-            "start_pref_gf",
-            float,
-            optional=True,
-            default=0.0,
-            doc=doc_start_pref_gf,
-        ),
-        Argument(
-            "limit_pref_gf",
-            float,
-            optional=True,
-            default=0.0,
-            doc=doc_limit_pref_gf,
-        ),
-        Argument(
-            "numb_generalized_coord",
-            int,
-            optional=True,
-            default=0,
-            doc=doc_numb_generalized_coord,
-        ),
-    ]
-
-
-@loss_args_plugin.register("ener_spin")
-def loss_ener_spin():
-    doc_start_pref_e = start_pref("energy")
-    doc_limit_pref_e = limit_pref("energy")
-    doc_start_pref_fr = start_pref("force_real_atom")
-    doc_limit_pref_fr = limit_pref("force_real_atom")
-    doc_start_pref_fm = start_pref("force_magnetic")
-    doc_limit_pref_fm = limit_pref("force_magnetic")
-    doc_start_pref_v = start_pref("virial")
-    doc_limit_pref_v = limit_pref("virial")
-    doc_start_pref_ae = start_pref("atom_ener")
-    doc_limit_pref_ae = limit_pref("atom_ener")
-    doc_start_pref_pf = start_pref("atom_pref")
-    doc_limit_pref_pf = limit_pref("atom_pref")
-    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
-    doc_enable_atom_ener_coeff = r"If true, the energy will be computed as \sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
-    return [
-        Argument(
-            "start_pref_e",
-            [float, int],
-            optional=True,
-            default=0.02,
-            doc=doc_start_pref_e,
-        ),
-        Argument(
-            "limit_pref_e",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_e,
-        ),
-        Argument(
-            "start_pref_fr",
-            [float, int],
-            optional=True,
-            default=1000,
-            doc=doc_start_pref_fr,
-        ),
-        Argument(
-            "limit_pref_fr",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_fr,
-        ),
-        Argument(
-            "start_pref_fm",
-            [float, int],
-            optional=True,
-            default=10000,
-            doc=doc_start_pref_fm,
-        ),
-        Argument(
-            "limit_pref_fm",
-            [float, int],
-            optional=True,
-            default=10.0,
-            doc=doc_limit_pref_fm,
-        ),
-        Argument(
-            "start_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_v,
-        ),
-        Argument(
-            "limit_pref_v",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_v,
-        ),
-        Argument(
-            "start_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_ae,
-        ),
-        Argument(
-            "limit_pref_ae",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_ae,
-        ),
-        Argument(
-            "start_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_pf,
-        ),
-        Argument(
-            "limit_pref_pf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_pf,
-        ),
-        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
-        Argument(
-            "enable_atom_ener_coeff",
-            [bool],
-            optional=True,
-            default=False,
-            doc=doc_enable_atom_ener_coeff,
-        ),
-    ]
-
-
-@loss_args_plugin.register("dos")
-def loss_dos():
-    doc_start_pref_dos = start_pref("Density of State (DOS)")
-    doc_limit_pref_dos = limit_pref("Density of State (DOS)")
-    doc_start_pref_cdf = start_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
-    )
-    doc_limit_pref_cdf = limit_pref(
-        "Cumulative Distribution Function (cumulative intergral of DOS)"
-    )
-    doc_start_pref_ados = start_pref("atomic DOS (site-projected DOS)")
-    doc_limit_pref_ados = limit_pref("atomic DOS (site-projected DOS)")
-    doc_start_pref_acdf = start_pref("Cumulative integral of atomic DOS")
-    doc_limit_pref_acdf = limit_pref("Cumulative integral of atomic DOS")
-    return [
-        Argument(
-            "start_pref_dos",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_dos,
-        ),
-        Argument(
-            "limit_pref_dos",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_dos,
-        ),
-        Argument(
-            "start_pref_cdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_cdf,
-        ),
-        Argument(
-            "limit_pref_cdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_cdf,
-        ),
-        Argument(
-            "start_pref_ados",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_start_pref_ados,
-        ),
-        Argument(
-            "limit_pref_ados",
-            [float, int],
-            optional=True,
-            default=1.00,
-            doc=doc_limit_pref_ados,
-        ),
-        Argument(
-            "start_pref_acdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_start_pref_acdf,
-        ),
-        Argument(
-            "limit_pref_acdf",
-            [float, int],
-            optional=True,
-            default=0.00,
-            doc=doc_limit_pref_acdf,
-        ),
-    ]
-
-
-# YWolfeee: Modified to support tensor type of loss args.
-@loss_args_plugin.register("tensor")
-def loss_tensor():
-    # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]."
-    # doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well."
-    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included."
-    doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0."
-    return [
-        Argument(
-            "pref", [float, int], optional=False, default=None, doc=doc_global_weight
-        ),
-        Argument(
-            "pref_atomic",
-            [float, int],
-            optional=False,
-            default=None,
-            doc=doc_local_weight,
-        ),
-    ]
-
-
-def loss_variant_type_args():
-    doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`."
-
-    return Variant(
-        "type",
-        loss_args_plugin.get_all_argument(),
-        optional=True,
-        default_tag="ener",
-        doc=doc_loss,
-    )
-
-
-def loss_args():
-    doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset."
-    ca = Argument(
-        "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss
-    )
-    return ca
-
-
-def loss_dict_args():
-    doc_loss_dict = (
-        "The dictionary of definitions of multiple loss functions in multi-task mode. "
-        "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n"
-    )
-    ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict)
-    return ca
-
-
-#  --- Training configurations: --- #
-def training_data_args():  # ! added by Ziyao: new specification style for data systems.
-    link_sys = make_link("systems", "training/training_data/systems")
-    doc_systems = (
-        "The data systems for training. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
-    )
-    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
-    doc_batch_size = f'This key can be \n\n\
-- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
-- int: all {link_sys} use the same batch size.\n\n\
-- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
-- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\
-- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.\n\n\
-If MPI is used, the value should be considered as the batch size per task.'
-    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
-- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
-- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = (
-        "A list of float if specified. "
-        "Should be of the same length as `systems`, "
-        "specifying the probability of each system."
-    )
-
-    args = [
-        Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
-        ),
-        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
-        Argument(
-            "batch_size",
-            [List[int], int, str],
-            optional=True,
-            default="auto",
-            doc=doc_batch_size,
-        ),
-        Argument(
-            "auto_prob",
-            str,
-            optional=True,
-            default="prob_sys_size",
-            doc=doc_auto_prob_style,
-            alias=[
-                "auto_prob_style",
-            ],
-        ),
-        Argument(
-            "sys_probs",
-            List[float],
-            optional=True,
-            default=None,
-            doc=doc_sys_probs,
-            alias=["sys_weights"],
-        ),
-    ]
-
-    doc_training_data = "Configurations of training data."
-    return Argument(
-        "training_data",
-        dict,
-        optional=True,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_training_data,
-    )
-
-
-def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
-    link_sys = make_link("systems", "training/validation_data/systems")
-    doc_systems = (
-        "The data systems for validation. "
-        "This key can be provided with a list that specifies the systems, or be provided with a string "
-        "by which the prefix of all systems are given and the list of the systems is automatically generated."
-    )
-    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
-    doc_batch_size = f'This key can be \n\n\
-- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
-- int: all {link_sys} use the same batch size.\n\n\
-- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\
-- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.'
-    doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\
-- "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
-- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
-- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = (
-        "A list of float if specified. "
-        "Should be of the same length as `systems`, "
-        "specifying the probability of each system."
-    )
-    doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
-
-    args = [
-        Argument(
-            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
-        ),
-        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
-        Argument(
-            "batch_size",
-            [List[int], int, str],
-            optional=True,
-            default="auto",
-            doc=doc_batch_size,
-        ),
-        Argument(
-            "auto_prob",
-            str,
-            optional=True,
-            default="prob_sys_size",
-            doc=doc_auto_prob_style,
-            alias=[
-                "auto_prob_style",
-            ],
-        ),
-        Argument(
-            "sys_probs",
-            List[float],
-            optional=True,
-            default=None,
-            doc=doc_sys_probs,
-            alias=["sys_weights"],
-        ),
-        Argument(
-            "numb_btch",
-            int,
-            optional=True,
-            default=1,
-            doc=doc_numb_btch,
-            alias=[
-                "numb_batch",
-            ],
-        ),
-    ]
-
-    doc_validation_data = (
-        "Configurations of validation data. Similar to that of training data, "
-        "except that a `numb_btch` argument may be configured"
-    )
-    return Argument(
-        "validation_data",
-        dict,
-        optional=True,
-        default=None,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_validation_data,
-    )
-
-
-def mixed_precision_args():  # ! added by Denghui.
-    doc_output_prec = 'The precision for mixed precision params. " \
-        "The trainable variables precision during the mixed precision training process, " \
-        "supported options are float32 only currently.'
-    doc_compute_prec = 'The precision for mixed precision compute. " \
-        "The compute precision during the mixed precision training process, "" \
-        "supported options are float16 and bfloat16 currently.'
-
-    args = [
-        Argument(
-            "output_prec", str, optional=True, default="float32", doc=doc_output_prec
-        ),
-        Argument(
-            "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec
-        ),
-    ]
-
-    doc_mixed_precision = "Configurations of mixed precision."
-    return Argument(
-        "mixed_precision",
-        dict,
-        optional=True,
-        sub_fields=args,
-        sub_variants=[],
-        doc=doc_mixed_precision,
-    )
-
-
-def training_args():  # ! modified by Ziyao: data configuration isolated.
-    doc_numb_steps = "Number of training batch. Each training uses one batch of data."
-    doc_seed = "The random seed for getting frames from the training data set."
-    doc_disp_file = "The file for printing learning curve."
-    doc_disp_freq = "The frequency of printing learning curve."
-    doc_save_freq = "The frequency of saving check point."
-    doc_save_ckpt = "The path prefix of saving check point files."
-    doc_disp_training = "Displaying verbose information during training."
-    doc_time_training = "Timing durining training."
-    doc_profiling = "Profiling during training."
-    doc_profiling_file = "Output file for profiling."
-    doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`."
-    doc_tensorboard = "Enable tensorboard"
-    doc_tensorboard_log_dir = "The log directory of tensorboard outputs"
-    doc_tensorboard_freq = "The frequency of writing tensorboard events."
-    doc_data_dict = (
-        "The dictionary of multi DataSystems in multi-task mode. "
-        "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
-        "contains training data and optional validation data definitions."
-    )
-    doc_fitting_weight = (
-        "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
-        "is the training weight of fitting net `fitting_key`. "
-        "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. "
-        "Weights will be normalized and minus ones will be ignored. "
-        "If not set, each fitting net will be equally selected when training."
-    )
-
-    arg_training_data = training_data_args()
-    arg_validation_data = validation_data_args()
-    mixed_precision_data = mixed_precision_args()
-
-    args = [
-        arg_training_data,
-        arg_validation_data,
-        mixed_precision_data,
-        Argument(
-            "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]
-        ),
-        Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument(
-            "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file
-        ),
-        Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
-        Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
-        Argument(
-            "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt
-        ),
-        Argument(
-            "disp_training", bool, optional=True, default=True, doc=doc_disp_training
-        ),
-        Argument(
-            "time_training", bool, optional=True, default=True, doc=doc_time_training
-        ),
-        Argument("profiling", bool, optional=True, default=False, doc=doc_profiling),
-        Argument(
-            "profiling_file",
-            str,
-            optional=True,
-            default="timeline.json",
-            doc=doc_profiling_file,
-        ),
-        Argument(
-            "enable_profiler",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_enable_profiler,
-        ),
-        Argument(
-            "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard
-        ),
-        Argument(
-            "tensorboard_log_dir",
-            str,
-            optional=True,
-            default="log",
-            doc=doc_tensorboard_log_dir,
-        ),
-        Argument(
-            "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq
-        ),
-        Argument("data_dict", dict, optional=True, doc=doc_data_dict),
-        Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight),
-    ]
-
-    doc_training = "The training options."
-    return Argument("training", dict, args, [], doc=doc_training)
-
-
-def make_index(keys):
-    ret = []
-    for ii in keys:
-        ret.append(make_link(ii, ii))
-    return ", ".join(ret)
-
-
-def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
-    if make_link:
-        make_anchor = True
-    ptr = []
-    for ii in gen_args():
-        ptr.append(ii.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
-
-    key_words = []
-    for ii in "\n\n".join(ptr).split("\n"):
-        if "argument path" in ii:
-            key_words.append(ii.split(":")[1].replace("`", "").strip())
-    # ptr.insert(0, make_index(key_words))
-
-    return "\n\n".join(ptr)
-
-
-def gen_json(**kwargs):
-    return json.dumps(
-        tuple(gen_args()),
-        cls=ArgumentEncoder,
-    )
-
-
-def gen_args(**kwargs) -> List[Argument]:
-    return [
-        model_args(),
-        learning_rate_args(),
-        learning_rate_dict_args(),
-        loss_args(),
-        loss_dict_args(),
-        training_args(),
-        nvnmd_args(),
-    ]
-
-
-def normalize_multi_task(data):
-    # single-task or multi-task mode
-    if data["model"].get("type", "standard") not in ("standard", "multi"):
-        return data
-    single_fitting_net = "fitting_net" in data["model"].keys()
-    single_training_data = "training_data" in data["training"].keys()
-    single_valid_data = "validation_data" in data["training"].keys()
-    single_loss = "loss" in data.keys()
-    single_learning_rate = "learning_rate" in data.keys()
-    multi_fitting_net = "fitting_net_dict" in data["model"].keys()
-    multi_training_data = "data_dict" in data["training"].keys()
-    multi_loss = "loss_dict" in data.keys()
-    multi_fitting_weight = "fitting_weight" in data["training"].keys()
-    multi_learning_rate = "learning_rate_dict" in data.keys()
-    assert (single_fitting_net == single_training_data) and (
-        multi_fitting_net == multi_training_data
-    ), (
-        "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! "
-        "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' "
-        "must be defined at the same time! Please check your input script. "
-    )
-    assert not (single_fitting_net and multi_fitting_net), (
-        "Single-task mode and multi-task mode can not be performed together. "
-        "Please check your input script and choose just one format! "
-    )
-    assert (
-        single_fitting_net or multi_fitting_net
-    ), "Please define your fitting net and training data! "
-    if multi_fitting_net:
-        assert not single_valid_data, (
-            "In multi-task mode, 'training/validation_data' should not appear "
-            "outside 'training/data_dict'! Please check your input script."
-        )
-        assert (
-            not single_loss
-        ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
-        assert (
-            "type_map" in data["model"]
-        ), "In multi-task mode, 'model/type_map' must be defined! "
-        data["model"]["type"] = "multi"
-        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(
-            data["model"]["fitting_net_dict"]
-        )
-        data["training"]["data_dict"] = normalize_data_dict(
-            data["training"]["data_dict"]
-        )
-        data["loss_dict"] = (
-            normalize_loss_dict(
-                data["model"]["fitting_net_dict"].keys(), data["loss_dict"]
-            )
-            if multi_loss
-            else {}
-        )
-        if multi_learning_rate:
-            data["learning_rate_dict"] = normalize_learning_rate_dict(
-                data["model"]["fitting_net_dict"].keys(), data["learning_rate_dict"]
-            )
-        elif single_learning_rate:
-            data[
-                "learning_rate_dict"
-            ] = normalize_learning_rate_dict_with_single_learning_rate(
-                data["model"]["fitting_net_dict"].keys(), data["learning_rate"]
-            )
-        fitting_weight = (
-            data["training"]["fitting_weight"] if multi_fitting_weight else None
-        )
-        data["training"]["fitting_weight"] = normalize_fitting_weight(
-            data["model"]["fitting_net_dict"].keys(),
-            data["training"]["data_dict"].keys(),
-            fitting_weight=fitting_weight,
-        )
-    else:
-        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
-        assert not multi_learning_rate, "In single-task mode, please use 'model/learning_rate' in stead of 'model/learning_rate_dict'! "
-    return data
-
-
-def normalize_fitting_net_dict(fitting_net_dict):
-    new_dict = {}
-    base = Argument("base", dict, [], [fitting_variant_type_args()], doc="")
-    for fitting_key_item in fitting_net_dict:
-        data = base.normalize_value(
-            fitting_net_dict[fitting_key_item], trim_pattern="_*"
-        )
-        base.check_value(data, strict=True)
-        new_dict[fitting_key_item] = data
-    return new_dict
-
-
-def normalize_data_dict(data_dict):
-    new_dict = {}
-    base = Argument(
-        "base", dict, [training_data_args(), validation_data_args()], [], doc=""
-    )
-    for data_system_key_item in data_dict:
-        data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[data_system_key_item] = data
-    return new_dict
-
-
-def normalize_loss_dict(fitting_keys, loss_dict):
-    # check the loss dict
-    failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
-    assert (
-        not failed_loss_keys
-    ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_loss_keys), str(list(fitting_keys))
-    )
-    new_dict = {}
-    base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
-    for item in loss_dict:
-        data = base.normalize_value(loss_dict[item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[item] = data
-    return new_dict
-
-
-def normalize_learning_rate_dict(fitting_keys, learning_rate_dict):
-    # check the learning_rate dict
-    failed_learning_rate_keys = [
-        item for item in learning_rate_dict if item not in fitting_keys
-    ]
-    assert not failed_learning_rate_keys, "Learning rate dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_learning_rate_keys), str(list(fitting_keys))
-    )
-    new_dict = {}
-    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
-    for item in learning_rate_dict:
-        data = base.normalize_value(learning_rate_dict[item], trim_pattern="_*")
-        base.check_value(data, strict=True)
-        new_dict[item] = data
-    return new_dict
-
-
-def normalize_learning_rate_dict_with_single_learning_rate(fitting_keys, learning_rate):
-    new_dict = {}
-    base = Argument("base", dict, [], [learning_rate_variant_type_args()], doc="")
-    data = base.normalize_value(learning_rate, trim_pattern="_*")
-    base.check_value(data, strict=True)
-    for fitting_key in fitting_keys:
-        new_dict[fitting_key] = data
-    return new_dict
-
-
-def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
-    # check the mapping
-    failed_data_keys = [item for item in data_keys if item not in fitting_keys]
-    assert (
-        not failed_data_keys
-    ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
-        str(failed_data_keys), str(list(fitting_keys))
-    )
-    empty_fitting_keys = []
-    valid_fitting_keys = []
-    for item in fitting_keys:
-        if item not in data_keys:
-            empty_fitting_keys.append(item)
-        else:
-            valid_fitting_keys.append(item)
-    if empty_fitting_keys:
-        log.warning(
-            "Fitting net(s) {} have no data and will not be used in training.".format(
-                str(empty_fitting_keys)
-            )
-        )
-    num_pair = len(valid_fitting_keys)
-    assert num_pair > 0, "No valid training data systems for fitting nets!"
-
-    # check and normalize the fitting weight
-    new_weight = {}
-    if fitting_weight is None:
-        equal_weight = 1.0 / num_pair
-        for item in fitting_keys:
-            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0
-    else:
-        failed_weight_keys = [
-            item for item in fitting_weight if item not in fitting_keys
-        ]
-        assert not failed_weight_keys, "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
-            str(failed_weight_keys), str(list(fitting_keys))
-        )
-        sum_prob = 0.0
-        for item in fitting_keys:
-            if item in valid_fitting_keys:
-                if (
-                    item in fitting_weight
-                    and isinstance(fitting_weight[item], (int, float))
-                    and fitting_weight[item] > 0.0
-                ):
-                    sum_prob += fitting_weight[item]
-                    new_weight[item] = fitting_weight[item]
-                else:
-                    valid_fitting_keys.remove(item)
-                    log.warning(
-                        f"Fitting net '{item}' has zero or invalid weight "
-                        "and will not be used in training."
-                    )
-                    new_weight[item] = 0.0
-            else:
-                new_weight[item] = 0.0
-        assert sum_prob > 0.0, "No valid training weight for fitting nets!"
-        # normalize
-        for item in new_weight:
-            new_weight[item] /= sum_prob
-    return new_weight
-
-
-def normalize(data):
-    data = normalize_multi_task(data)
-
-    base = Argument("base", dict, gen_args())
-    data = base.normalize_value(data, trim_pattern="_*")
-    base.check_value(data, strict=True)
-
-    return data
-
-
-if __name__ == "__main__":
-    gen_doc()
diff --git a/deepmd_utils/utils/batch_size.py b/deepmd_utils/utils/batch_size.py
deleted file mode 100644
index 1b93a51242..0000000000
--- a/deepmd_utils/utils/batch_size.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
-import os
-from abc import (
-    ABC,
-    abstractmethod,
-)
-from typing import (
-    Callable,
-    Tuple,
-)
-
-import numpy as np
-
-from deepmd_utils.utils.errors import (
-    OutOfMemoryError,
-)
-
-log = logging.getLogger(__name__)
-
-
-class AutoBatchSize(ABC):
-    """This class allows DeePMD-kit to automatically decide the maximum
-    batch size that will not cause an OOM error.
-
-    Notes
-    -----
-    In some CPU environments, the program may be directly killed when OOM. In
-    this case, by default the batch size will not be increased for CPUs. The
-    environment variable `DP_INFER_BATCH_SIZE` can be set as the batch size.
-
-    In other cases, we assume all OOM error will raise :class:`OutOfMemoryError`.
-
-    Parameters
-    ----------
-    initial_batch_size : int, default: 1024
-        initial batch size (number of total atoms) when DP_INFER_BATCH_SIZE
-        is not set
-    factor : float, default: 2.
-        increased factor
-
-    Attributes
-    ----------
-    current_batch_size : int
-        current batch size (number of total atoms)
-    maximum_working_batch_size : int
-        maximum working batch size
-    minimal_not_working_batch_size : int
-        minimal not working batch size
-    """
-
-    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
-        # See also PyTorchLightning/pytorch-lightning#1638
-        # TODO: discuss a proper initial batch size
-        self.current_batch_size = initial_batch_size
-        DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
-        if DP_INFER_BATCH_SIZE > 0:
-            self.current_batch_size = DP_INFER_BATCH_SIZE
-            self.maximum_working_batch_size = DP_INFER_BATCH_SIZE
-            self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
-        else:
-            self.maximum_working_batch_size = initial_batch_size
-            if self.is_gpu_available():
-                self.minimal_not_working_batch_size = 2**31
-            else:
-                self.minimal_not_working_batch_size = (
-                    self.maximum_working_batch_size + 1
-                )
-                log.warning(
-                    "You can use the environment variable DP_INFER_BATCH_SIZE to"
-                    "control the inference batch size (nframes * natoms). "
-                    "The default value is %d." % initial_batch_size
-                )
-
-        self.factor = factor
-
-    def execute(
-        self, callable: Callable, start_index: int, natoms: int
-    ) -> Tuple[int, tuple]:
-        """Excuate a method with given batch size.
-
-        Parameters
-        ----------
-        callable : Callable
-            The method should accept the batch size and start_index as parameters,
-            and returns executed batch size and data.
-        start_index : int
-            start index
-        natoms : int
-            natoms
-
-        Returns
-        -------
-        int
-            executed batch size * number of atoms
-        tuple
-            result from callable, None if failing to execute
-
-        Raises
-        ------
-        OutOfMemoryError
-            OOM when batch size is 1
-        """
-        if natoms > 0:
-            batch_nframes = self.current_batch_size // natoms
-        else:
-            batch_nframes = self.current_batch_size
-        try:
-            n_batch, result = callable(max(batch_nframes, 1), start_index)
-        except Exception as e:
-            if not self.is_oom_error(e):
-                raise e
-            self.minimal_not_working_batch_size = min(
-                self.minimal_not_working_batch_size, self.current_batch_size
-            )
-            if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
-                self.maximum_working_batch_size = int(
-                    self.minimal_not_working_batch_size / self.factor
-                )
-            if self.minimal_not_working_batch_size <= natoms:
-                raise OutOfMemoryError(
-                    "The callable still throws an out-of-memory (OOM) error even when batch size is 1!"
-                ) from e
-            # adjust the next batch size
-            self._adjust_batch_size(1.0 / self.factor)
-            return 0, None
-        else:
-            n_tot = n_batch * natoms
-            self.maximum_working_batch_size = max(
-                self.maximum_working_batch_size, n_tot
-            )
-            # adjust the next batch size
-            if (
-                n_tot + natoms > self.current_batch_size
-                and self.current_batch_size * self.factor
-                < self.minimal_not_working_batch_size
-            ):
-                self._adjust_batch_size(self.factor)
-            return n_batch, result
-
-    def _adjust_batch_size(self, factor: float):
-        old_batch_size = self.current_batch_size
-        self.current_batch_size = int(self.current_batch_size * factor)
-        log.info(
-            "Adjust batch size from %d to %d"
-            % (old_batch_size, self.current_batch_size)
-        )
-
-    def execute_all(
-        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
-    ) -> Tuple[np.ndarray]:
-        """Excuate a method with all given data.
-
-        Parameters
-        ----------
-        callable : Callable
-            The method should accept *args and **kwargs as input and return the similiar array.
-        total_size : int
-            Total size
-        natoms : int
-            The number of atoms
-        *args
-            Variable length argument list.
-        **kwargs
-            If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
-        """
-
-        def execute_with_batch_size(
-            batch_size: int, start_index: int
-        ) -> Tuple[int, Tuple[np.ndarray]]:
-            end_index = start_index + batch_size
-            end_index = min(end_index, total_size)
-            return (end_index - start_index), callable(
-                *[
-                    (
-                        vv[start_index:end_index]
-                        if isinstance(vv, np.ndarray) and vv.ndim > 1
-                        else vv
-                    )
-                    for vv in args
-                ],
-                **{
-                    kk: (
-                        vv[start_index:end_index]
-                        if isinstance(vv, np.ndarray) and vv.ndim > 1
-                        else vv
-                    )
-                    for kk, vv in kwargs.items()
-                },
-            )
-
-        index = 0
-        results = []
-        while index < total_size:
-            n_batch, result = self.execute(execute_with_batch_size, index, natoms)
-            if not isinstance(result, tuple):
-                result = (result,)
-            index += n_batch
-            if n_batch:
-                for rr in result:
-                    rr.reshape((n_batch, -1))
-                results.append(result)
-
-        r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
-        if len(r) == 1:
-            # avoid returning tuple if callable doesn't return tuple
-            r = r[0]
-        return r
-
-    @abstractmethod
-    def is_gpu_available(self) -> bool:
-        """Check if GPU is available.
-
-        Returns
-        -------
-        bool
-            True if GPU is available
-        """
-
-    @abstractmethod
-    def is_oom_error(self, e: Exception) -> bool:
-        """Check if the exception is an OOM error.
-
-        Parameters
-        ----------
-        e : Exception
-            Exception
-
-        Returns
-        -------
-        bool
-            True if the exception is an OOM error
-        """
diff --git a/deepmd_utils/utils/compat.py b/deepmd_utils/utils/compat.py
deleted file mode 100644
index 5f9c14e6d8..0000000000
--- a/deepmd_utils/utils/compat.py
+++ /dev/null
@@ -1,392 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""Module providing compatibility between `0.x.x` and `1.x.x` input versions."""
-
-import json
-import warnings
-from pathlib import (
-    Path,
-)
-from typing import (
-    Any,
-    Dict,
-    Optional,
-    Sequence,
-    Union,
-)
-
-import numpy as np
-
-from deepmd.common import (
-    j_must_have,
-)
-
-
-def convert_input_v0_v1(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    """Convert input from v0 format to v1.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        loaded json/yaml file
-    warning : bool, optional
-        whether to show deprecation warning, by default True
-    dump : Optional[Union[str, Path]], optional
-        whether to dump converted file, by default None
-
-    Returns
-    -------
-    Dict[str, Any]
-        converted output
-    """
-    output = {}
-    output["model"] = _model(jdata, jdata["use_smooth"])
-    output["learning_rate"] = _learning_rate(jdata)
-    output["loss"] = _loss(jdata)
-    output["training"] = _training(jdata)
-    if warning:
-        _warning_input_v0_v1(dump)
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(output, fp, indent=4)
-    return output
-
-
-def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
-    msg = (
-        "It seems that you are using a deepmd-kit input of version 0.x.x, "
-        "which is deprecated. we have converted the input to >2.0.0 compatible"
-    )
-    if fname is not None:
-        msg += f", and output it to file {fname}"
-    warnings.warn(msg)
-
-
-def _model(jdata: Dict[str, Any], smooth: bool) -> Dict[str, Dict[str, Any]]:
-    """Convert data to v1 input for non-smooth model.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-    smooth : bool
-        whether to use smooth or non-smooth descriptor version
-
-    Returns
-    -------
-    Dict[str, Dict[str, Any]]
-        dictionary with model input parameters and sub-dictionaries for descriptor and
-        fitting net
-    """
-    model = {}
-    model["descriptor"] = (
-        _smth_descriptor(jdata) if smooth else _nonsmth_descriptor(jdata)
-    )
-    model["fitting_net"] = _fitting_net(jdata)
-    return model
-
-
-def _nonsmth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for non-smooth descriptor.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with descriptor parameters
-    """
-    descriptor = {}
-    descriptor["type"] = "loc_frame"
-    _jcopy(jdata, descriptor, ("sel_a", "sel_r", "rcut", "axis_rule"))
-    return descriptor
-
-
-def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for smooth descriptor.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with descriptor parameters
-    """
-    descriptor = {}
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        descriptor["seed"] = seed
-    descriptor["type"] = "se_a"
-    descriptor["sel"] = jdata["sel_a"]
-    _jcopy(jdata, descriptor, ("rcut",))
-    descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
-    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
-    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
-    descriptor["resnet_dt"] = False
-    if "resnet_dt" in jdata:
-        descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
-
-    return descriptor
-
-
-def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for fitting net.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with fitting net parameters
-    """
-    fitting_net = {}
-
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        fitting_net["seed"] = seed
-    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
-    fitting_net["resnet_dt"] = True
-    if "resnet_dt" in jdata:
-        fitting_net["resnet_dt"] = jdata["resnet_dt"]
-    if "fitting_resnet_dt" in jdata:
-        fitting_net["resnet_dt"] = jdata["fitting_resnet_dt"]
-    return fitting_net
-
-
-def _learning_rate(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for learning rate section.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with learning rate parameters
-    """
-    learning_rate = {}
-    learning_rate["type"] = "exp"
-    _jcopy(jdata, learning_rate, ("decay_steps", "decay_rate", "start_lr"))
-    return learning_rate
-
-
-def _loss(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for loss function.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with loss function parameters
-    """
-    loss: Dict[str, Any] = {}
-    _jcopy(
-        jdata,
-        loss,
-        (
-            "start_pref_e",
-            "limit_pref_e",
-            "start_pref_f",
-            "limit_pref_f",
-            "start_pref_v",
-            "limit_pref_v",
-        ),
-    )
-    if "start_pref_ae" in jdata:
-        loss["start_pref_ae"] = jdata["start_pref_ae"]
-    if "limit_pref_ae" in jdata:
-        loss["limit_pref_ae"] = jdata["limit_pref_ae"]
-    return loss
-
-
-def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert data to v1 input for training.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        parsed input json/yaml data
-
-    Returns
-    -------
-    Dict[str, Any]
-        dict with training parameters
-    """
-    training = {}
-    seed = jdata.get("seed", None)
-    if seed is not None:
-        training["seed"] = seed
-
-    _jcopy(jdata, training, ("systems", "set_prefix", "stop_batch", "batch_size"))
-    training["disp_file"] = "lcurve.out"
-    if "disp_file" in jdata:
-        training["disp_file"] = jdata["disp_file"]
-    training["disp_freq"] = j_must_have(jdata, "disp_freq")
-    training["numb_test"] = j_must_have(jdata, "numb_test")
-    training["save_freq"] = j_must_have(jdata, "save_freq")
-    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
-    training["disp_training"] = j_must_have(jdata, "disp_training")
-    training["time_training"] = j_must_have(jdata, "time_training")
-    if "profiling" in jdata:
-        training["profiling"] = jdata["profiling"]
-        if training["profiling"]:
-            training["profiling_file"] = j_must_have(jdata, "profiling_file")
-    return training
-
-
-def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
-    """Copy specified keys from one dict to another.
-
-    Parameters
-    ----------
-    src : Dict[str, Any]
-        source dictionary
-    dst : Dict[str, Any]
-        destination dictionary, will be modified in place
-    keys : Sequence[str]
-        list of keys to copy
-    """
-    for k in keys:
-        dst[k] = src[k]
-
-
-def remove_decay_rate(jdata: Dict[str, Any]):
-    """Convert decay_rate to stop_lr.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        input data
-    """
-    lr = jdata["learning_rate"]
-    if "decay_rate" in lr:
-        decay_rate = lr["decay_rate"]
-        start_lr = lr["start_lr"]
-        stop_step = jdata["training"]["stop_batch"]
-        decay_steps = lr["decay_steps"]
-        stop_lr = np.exp(np.log(decay_rate) * (stop_step / decay_steps)) * start_lr
-        lr["stop_lr"] = stop_lr
-        lr.pop("decay_rate")
-
-
-def convert_input_v1_v2(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    tr_cfg = jdata["training"]
-    tr_data_keys = {
-        "systems",
-        "set_prefix",
-        "batch_size",
-        "sys_prob",
-        "auto_prob",
-        # alias included
-        "sys_weights",
-        "auto_prob_style",
-    }
-
-    tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys}
-    new_tr_cfg = {k: v for k, v in tr_cfg.items() if k not in tr_data_keys}
-    new_tr_cfg["training_data"] = tr_data_cfg
-    if "training_data" in tr_cfg:
-        raise RuntimeError(
-            "Both v1 (training/systems) and v2 (training/training_data) parameters are given."
-        )
-
-    jdata["training"] = new_tr_cfg
-
-    # remove deprecated arguments
-    remove_decay_rate(jdata)
-
-    if warning:
-        _warning_input_v1_v2(dump)
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(jdata, fp, indent=4)
-
-    return jdata
-
-
-def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
-    msg = (
-        "It seems that you are using a deepmd-kit input of version 1.x.x, "
-        "which is deprecated. we have converted the input to >2.0.0 compatible"
-    )
-    if fname is not None:
-        msg += f", and output it to file {fname}"
-    warnings.warn(msg)
-
-
-def deprecate_numb_test(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
-
-    See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
-
-    Parameters
-    ----------
-    jdata : Dict[str, Any]
-        loaded json/yaml file
-    warning : bool, optional
-        whether to show deprecation warning, by default True
-    dump : Optional[Union[str, Path]], optional
-        whether to dump converted file, by default None
-
-    Returns
-    -------
-    Dict[str, Any]
-        converted output
-    """
-    try:
-        jdata.get("training", {}).pop("numb_test")
-    except KeyError:
-        pass
-    else:
-        if warning:
-            warnings.warn(
-                "The argument training->numb_test has been deprecated since v2.0.0. "
-                "Use training->validation_data->batch_size instead."
-            )
-
-    if dump is not None:
-        with open(dump, "w") as fp:
-            json.dump(jdata, fp, indent=4)
-    return jdata
-
-
-def update_deepmd_input(
-    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
-) -> Dict[str, Any]:
-    def is_deepmd_v0_input(jdata):
-        return "model" not in jdata.keys()
-
-    def is_deepmd_v1_input(jdata):
-        return "systems" in j_must_have(jdata, "training").keys()
-
-    if is_deepmd_v0_input(jdata):
-        jdata = convert_input_v0_v1(jdata, warning, None)
-        jdata = convert_input_v1_v2(jdata, False, None)
-        jdata = deprecate_numb_test(jdata, False, dump)
-    elif is_deepmd_v1_input(jdata):
-        jdata = convert_input_v1_v2(jdata, warning, None)
-        jdata = deprecate_numb_test(jdata, False, dump)
-    else:
-        jdata = deprecate_numb_test(jdata, warning, dump)
-
-    return jdata
diff --git a/deepmd_utils/utils/data.py b/deepmd_utils/utils/data.py
deleted file mode 100644
index 2689257e16..0000000000
--- a/deepmd_utils/utils/data.py
+++ /dev/null
@@ -1,614 +0,0 @@
-#!/usr/bin/env python3
-
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import logging
-from typing import (
-    List,
-    Optional,
-)
-
-import numpy as np
-
-from deepmd_utils.env import (
-    GLOBAL_ENER_FLOAT_PRECISION,
-    GLOBAL_NP_FLOAT_PRECISION,
-)
-from deepmd_utils.utils import random as dp_random
-from deepmd_utils.utils.path import (
-    DPPath,
-)
-
-log = logging.getLogger(__name__)
-
-
-class DeepmdData:
-    """Class for a data system.
-
-    It loads data from hard disk, and mantains the data as a `data_dict`
-
-    Parameters
-    ----------
-    sys_path
-            Path to the data system
-    set_prefix
-            Prefix for the directories of different sets
-    shuffle_test
-            If the test data are shuffled
-    type_map
-            Gives the name of different atom types
-    optional_type_map
-            If the type_map.raw in each system is optional
-    modifier
-            Data modifier that has the method `modify_data`
-    trn_all_set
-            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
-    sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
-            descriptors except mixed types.
-    """
-
-    def __init__(
-        self,
-        sys_path: str,
-        set_prefix: str = "set",
-        shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
-        optional_type_map: bool = True,
-        modifier=None,
-        trn_all_set: bool = False,
-        sort_atoms: bool = True,
-    ):
-        """Constructor."""
-        root = DPPath(sys_path)
-        self.dirs = root.glob(set_prefix + ".*")
-        if not len(self.dirs):
-            raise FileNotFoundError(f"No {set_prefix}.* is found in {sys_path}")
-        self.dirs.sort()
-        # check mix_type format
-        error_format_msg = (
-            "if one of the set is of mixed_type format, "
-            "then all of the sets in this system should be of mixed_type format!"
-        )
-        self.mixed_type = self._check_mode(self.dirs[0])
-        for set_item in self.dirs[1:]:
-            assert self._check_mode(set_item) == self.mixed_type, error_format_msg
-        # load atom type
-        self.atom_type = self._load_type(root)
-        self.natoms = len(self.atom_type)
-        # load atom type map
-        self.type_map = self._load_type_map(root)
-        assert (
-            optional_type_map or self.type_map is not None
-        ), f"System {sys_path} must have type_map.raw in this mode! "
-        if self.type_map is not None:
-            assert len(self.type_map) >= max(self.atom_type) + 1
-        # check pbc
-        self.pbc = self._check_pbc(root)
-        # enforce type_map if necessary
-        self.enforce_type_map = False
-        if type_map is not None and self.type_map is not None and len(type_map):
-            if not self.mixed_type:
-                atom_type_ = [
-                    type_map.index(self.type_map[ii]) for ii in self.atom_type
-                ]
-                self.atom_type = np.array(atom_type_, dtype=np.int32)
-            else:
-                self.enforce_type_map = True
-                sorter = np.argsort(type_map)
-                self.type_idx_map = np.array(
-                    sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
-                )
-                # padding for virtual atom
-                self.type_idx_map = np.append(
-                    self.type_idx_map, np.array([-1], dtype=np.int32)
-                )
-            self.type_map = type_map
-        if type_map is None and self.type_map is None and self.mixed_type:
-            raise RuntimeError("mixed_type format must have type_map!")
-        # make idx map
-        self.sort_atoms = sort_atoms
-        self.idx_map = self._make_idx_map(self.atom_type)
-        # train dirs
-        self.test_dir = self.dirs[-1]
-        if trn_all_set:
-            self.train_dirs = self.dirs
-        else:
-            if len(self.dirs) == 1:
-                self.train_dirs = self.dirs
-            else:
-                self.train_dirs = self.dirs[:-1]
-        self.data_dict = {}
-        # add box and coord
-        self.add("box", 9, must=self.pbc)
-        self.add("coord", 3, atomic=True, must=True)
-        # the training times of each frame
-        self.add("numb_copy", 1, must=False, default=1, dtype=int)
-        # set counters
-        self.set_count = 0
-        self.iterator = 0
-        self.shuffle_test = shuffle_test
-        # set modifier
-        self.modifier = modifier
-
-    def add(
-        self,
-        key: str,
-        ndof: int,
-        atomic: bool = False,
-        must: bool = False,
-        high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
-        repeat: int = 1,
-        default: float = 0.0,
-        dtype: Optional[np.dtype] = None,
-    ):
-        """Add a data item that to be loaded.
-
-        Parameters
-        ----------
-        key
-            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
-        ndof
-            The number of dof
-        atomic
-            The item is an atomic property.
-            If False, the size of the data should be nframes x ndof
-            If True, the size of data should be nframes x natoms x ndof
-        must
-            The data file `sys_path/set.*/key.npy` must exist.
-            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
-        high_prec
-            Load the data and store in float64, otherwise in float32
-        type_sel
-            Select certain type of atoms
-        repeat
-            The data will be repeated `repeat` times.
-        default : float, default=0.
-            default value of data
-        dtype : np.dtype, optional
-            the dtype of data, overwrites `high_prec` if provided
-        """
-        self.data_dict[key] = {
-            "ndof": ndof,
-            "atomic": atomic,
-            "must": must,
-            "high_prec": high_prec,
-            "type_sel": type_sel,
-            "repeat": repeat,
-            "reduce": None,
-            "default": default,
-            "dtype": dtype,
-        }
-        return self
-
-    def reduce(self, key_out: str, key_in: str):
-        """Generate a new item from the reduction of another atom.
-
-        Parameters
-        ----------
-        key_out
-            The name of the reduced item
-        key_in
-            The name of the data item to be reduced
-        """
-        assert key_in in self.data_dict, "cannot find input key"
-        assert self.data_dict[key_in]["atomic"], "reduced property should be atomic"
-        assert key_out not in self.data_dict, "output key should not have been added"
-        assert (
-            self.data_dict[key_in]["repeat"] == 1
-        ), "reduced proerties should not have been repeated"
-
-        self.data_dict[key_out] = {
-            "ndof": self.data_dict[key_in]["ndof"],
-            "atomic": False,
-            "must": True,
-            "high_prec": True,
-            "type_sel": None,
-            "repeat": 1,
-            "reduce": key_in,
-        }
-        return self
-
-    def get_data_dict(self) -> dict:
-        """Get the `data_dict`."""
-        return self.data_dict
-
-    def check_batch_size(self, batch_size):
-        """Check if the system can get a batch of data with `batch_size` frames."""
-        for ii in self.train_dirs:
-            if self.data_dict["coord"]["high_prec"]:
-                tmpe = (
-                    (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
-                )
-            else:
-                tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
-            if tmpe.ndim == 1:
-                tmpe = tmpe.reshape([1, -1])
-            if tmpe.shape[0] < batch_size:
-                return ii, tmpe.shape[0]
-        return None
-
-    def check_test_size(self, test_size):
-        """Check if the system can get a test dataset with `test_size` frames."""
-        if self.data_dict["coord"]["high_prec"]:
-            tmpe = (
-                (self.test_dir / "coord.npy")
-                .load_numpy()
-                .astype(GLOBAL_ENER_FLOAT_PRECISION)
-            )
-        else:
-            tmpe = (
-                (self.test_dir / "coord.npy")
-                .load_numpy()
-                .astype(GLOBAL_NP_FLOAT_PRECISION)
-            )
-        if tmpe.ndim == 1:
-            tmpe = tmpe.reshape([1, -1])
-        if tmpe.shape[0] < test_size:
-            return self.test_dir, tmpe.shape[0]
-        else:
-            return None
-
-    def get_batch(self, batch_size: int) -> dict:
-        """Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
-
-        Parameters
-        ----------
-        batch_size
-            size of the batch
-        """
-        if hasattr(self, "batch_set"):
-            set_size = self.batch_set["coord"].shape[0]
-        else:
-            set_size = 0
-        if self.iterator + batch_size > set_size:
-            self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
-            self.set_count += 1
-            set_size = self.batch_set["coord"].shape[0]
-        iterator_1 = self.iterator + batch_size
-        if iterator_1 >= set_size:
-            iterator_1 = set_size
-        idx = np.arange(self.iterator, iterator_1)
-        self.iterator += batch_size
-        ret = self._get_subdata(self.batch_set, idx)
-        return ret
-
-    def get_test(self, ntests: int = -1) -> dict:
-        """Get the test data with `ntests` frames.
-
-        Parameters
-        ----------
-        ntests
-            Size of the test data set. If `ntests` is -1, all test data will be get.
-        """
-        if not hasattr(self, "test_set"):
-            self._load_test_set(self.test_dir, self.shuffle_test)
-        if ntests == -1:
-            idx = None
-        else:
-            ntests_ = (
-                ntests
-                if ntests < self.test_set["type"].shape[0]
-                else self.test_set["type"].shape[0]
-            )
-            # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
-            idx = np.arange(ntests_)
-        ret = self._get_subdata(self.test_set, idx=idx)
-        if self.modifier is not None:
-            self.modifier.modify_data(ret, self)
-        return ret
-
-    def get_ntypes(self) -> int:
-        """Number of atom types in the system."""
-        if self.type_map is not None:
-            return len(self.type_map)
-        else:
-            return max(self.get_atom_type()) + 1
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map."""
-        return self.type_map
-
-    def get_atom_type(self) -> List[int]:
-        """Get atom types."""
-        return self.atom_type
-
-    def get_numb_set(self) -> int:
-        """Get number of training sets."""
-        return len(self.train_dirs)
-
-    def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
-        """Get the number of batches in a set."""
-        data = self._load_set(self.train_dirs[set_idx])
-        ret = data["coord"].shape[0] // batch_size
-        if ret == 0:
-            ret = 1
-        return ret
-
-    def get_sys_numb_batch(self, batch_size: int) -> int:
-        """Get the number of batches in the data system."""
-        ret = 0
-        for ii in range(len(self.train_dirs)):
-            ret += self.get_numb_batch(batch_size, ii)
-        return ret
-
-    def get_natoms(self):
-        """Get number of atoms."""
-        return len(self.atom_type)
-
-    def get_natoms_vec(self, ntypes: int):
-        """Get number of atoms and number of atoms in different types.
-
-        Parameters
-        ----------
-        ntypes
-            Number of types (may be larger than the actual number of types in the system).
-
-        Returns
-        -------
-        natoms
-            natoms[0]: number of local atoms
-            natoms[1]: total number of atoms held by this processor
-            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
-        """
-        natoms, natoms_vec = self._get_natoms_2(ntypes)
-        tmp = [natoms, natoms]
-        tmp = np.append(tmp, natoms_vec)
-        return tmp.astype(np.int32)
-
-    def avg(self, key):
-        """Return the average value of an item."""
-        if key not in self.data_dict.keys():
-            raise RuntimeError("key %s has not been added" % key)
-        info = self.data_dict[key]
-        ndof = info["ndof"]
-        eners = []
-        for ii in self.train_dirs:
-            data = self._load_set(ii)
-            ei = data[key].reshape([-1, ndof])
-            eners.append(ei)
-        eners = np.concatenate(eners, axis=0)
-        if eners.size == 0:
-            return 0
-        else:
-            return np.average(eners, axis=0)
-
-    def _idx_map_sel(self, atom_type, type_sel):
-        new_types = []
-        for ii in atom_type:
-            if ii in type_sel:
-                new_types.append(ii)
-        new_types = np.array(new_types, dtype=int)
-        natoms = new_types.shape[0]
-        idx = np.arange(natoms)
-        idx_map = np.lexsort((idx, new_types))
-        return idx_map
-
-    def _get_natoms_2(self, ntypes):
-        sample_type = self.atom_type
-        natoms = len(sample_type)
-        natoms_vec = np.zeros(ntypes).astype(int)
-        for ii in range(ntypes):
-            natoms_vec[ii] = np.count_nonzero(sample_type == ii)
-        return natoms, natoms_vec
-
-    def _get_subdata(self, data, idx=None):
-        new_data = {}
-        for ii in data:
-            dd = data[ii]
-            if "find_" in ii:
-                new_data[ii] = dd
-            else:
-                if idx is not None:
-                    new_data[ii] = dd[idx]
-                else:
-                    new_data[ii] = dd
-        return new_data
-
-    def _load_batch_set(self, set_name: DPPath):
-        if not hasattr(self, "batch_set") or self.get_numb_set() > 1:
-            self.batch_set = self._load_set(set_name)
-            if self.modifier is not None:
-                self.modifier.modify_data(self.batch_set, self)
-        self.batch_set, _ = self._shuffle_data(self.batch_set)
-        self.reset_get_batch()
-
-    def reset_get_batch(self):
-        self.iterator = 0
-
-    def _load_test_set(self, set_name: DPPath, shuffle_test):
-        self.test_set = self._load_set(set_name)
-        if shuffle_test:
-            self.test_set, _ = self._shuffle_data(self.test_set)
-
-    def _shuffle_data(self, data):
-        ret = {}
-        nframes = data["coord"].shape[0]
-        idx = np.arange(nframes)
-        # the training times of each frame
-        idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,)))
-        dp_random.shuffle(idx)
-        for kk in data:
-            if (
-                type(data[kk]) == np.ndarray
-                and len(data[kk].shape) == 2
-                and data[kk].shape[0] == nframes
-                and "find_" not in kk
-            ):
-                ret[kk] = data[kk][idx]
-            else:
-                ret[kk] = data[kk]
-        return ret, idx
-
-    def _load_set(self, set_name: DPPath):
-        # get nframes
-        if not isinstance(set_name, DPPath):
-            set_name = DPPath(set_name)
-        path = set_name / "coord.npy"
-        if self.data_dict["coord"]["high_prec"]:
-            coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
-        else:
-            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
-        if coord.ndim == 1:
-            coord = coord.reshape([1, -1])
-        nframes = coord.shape[0]
-        assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms
-        # load keys
-        data = {}
-        for kk in self.data_dict.keys():
-            if self.data_dict[kk]["reduce"] is None:
-                data["find_" + kk], data[kk] = self._load_data(
-                    set_name,
-                    kk,
-                    nframes,
-                    self.data_dict[kk]["ndof"],
-                    atomic=self.data_dict[kk]["atomic"],
-                    high_prec=self.data_dict[kk]["high_prec"],
-                    must=self.data_dict[kk]["must"],
-                    type_sel=self.data_dict[kk]["type_sel"],
-                    repeat=self.data_dict[kk]["repeat"],
-                    default=self.data_dict[kk]["default"],
-                    dtype=self.data_dict[kk]["dtype"],
-                )
-        for kk in self.data_dict.keys():
-            if self.data_dict[kk]["reduce"] is not None:
-                k_in = self.data_dict[kk]["reduce"]
-                ndof = self.data_dict[kk]["ndof"]
-                data["find_" + kk] = data["find_" + k_in]
-                tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION)
-                data[kk] = np.sum(
-                    np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1
-                )
-
-        if self.mixed_type:
-            # nframes x natoms
-            atom_type_mix = self._load_type_mix(set_name)
-            if self.enforce_type_map:
-                try:
-                    atom_type_mix_ = self.type_idx_map[atom_type_mix].astype(np.int32)
-                except IndexError as e:
-                    raise IndexError(
-                        "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                            set_name, self.get_ntypes()
-                        )
-                    ) from e
-                atom_type_mix = atom_type_mix_
-            real_type = atom_type_mix.reshape([nframes, self.natoms])
-            data["type"] = real_type
-            natoms = data["type"].shape[1]
-            # nframes x ntypes
-            atom_type_nums = np.array(
-                [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
-                dtype=np.int32,
-            ).T
-            ghost_nums = np.array(
-                [(real_type == -1).sum(axis=-1)],
-                dtype=np.int32,
-            ).T
-            assert (
-                atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms
-            ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format(
-                set_name, self.get_ntypes()
-            )
-            data["real_natoms_vec"] = np.concatenate(
-                (
-                    np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
-                    atom_type_nums,
-                ),
-                axis=-1,
-            )
-        else:
-            data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
-
-        return data
-
-    def _load_data(
-        self,
-        set_name,
-        key,
-        nframes,
-        ndof_,
-        atomic=False,
-        must=True,
-        repeat=1,
-        high_prec=False,
-        type_sel=None,
-        default: float = 0.0,
-        dtype: Optional[np.dtype] = None,
-    ):
-        if atomic:
-            natoms = self.natoms
-            idx_map = self.idx_map
-            # if type_sel, then revise natoms and idx_map
-            if type_sel is not None:
-                natoms = 0
-                for jj in type_sel:
-                    natoms += np.sum(self.atom_type == jj)
-                idx_map = self._idx_map_sel(self.atom_type, type_sel)
-            ndof = ndof_ * natoms
-        else:
-            ndof = ndof_
-        if dtype is not None:
-            pass
-        elif high_prec:
-            dtype = GLOBAL_ENER_FLOAT_PRECISION
-        else:
-            dtype = GLOBAL_NP_FLOAT_PRECISION
-        path = set_name / (key + ".npy")
-        if path.is_file():
-            data = path.load_numpy().astype(dtype)
-            try:  # YWolfeee: deal with data shape error
-                if atomic:
-                    data = data.reshape([nframes, natoms, -1])
-                    data = data[:, idx_map, :]
-                    data = data.reshape([nframes, -1])
-                data = np.reshape(data, [nframes, ndof])
-            except ValueError as err_message:
-                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
-                log.error(str(err_message))
-                log.error(explanation)
-                raise ValueError(str(err_message) + ". " + explanation)
-            if repeat != 1:
-                data = np.repeat(data, repeat).reshape([nframes, -1])
-            return np.float32(1.0), data
-        elif must:
-            raise RuntimeError("%s not found!" % path)
-        else:
-            data = np.full([nframes, ndof], default, dtype=dtype)
-            if repeat != 1:
-                data = np.repeat(data, repeat).reshape([nframes, -1])
-            return np.float32(0.0), data
-
-    def _load_type(self, sys_path: DPPath):
-        atom_type = (sys_path / "type.raw").load_txt(ndmin=1).astype(np.int32)
-        return atom_type
-
-    def _load_type_mix(self, set_name: DPPath):
-        type_path = set_name / "real_atom_types.npy"
-        real_type = type_path.load_numpy().astype(np.int32).reshape([-1, self.natoms])
-        return real_type
-
-    def _make_idx_map(self, atom_type):
-        natoms = atom_type.shape[0]
-        idx = np.arange(natoms)
-        if self.sort_atoms:
-            idx_map = np.lexsort((idx, atom_type))
-        else:
-            idx_map = idx
-        return idx_map
-
-    def _load_type_map(self, sys_path: DPPath):
-        fname = sys_path / "type_map.raw"
-        if fname.is_file():
-            return fname.load_txt(dtype=str, ndmin=1).tolist()
-        else:
-            return None
-
-    def _check_pbc(self, sys_path: DPPath):
-        pbc = True
-        if (sys_path / "nopbc").is_file():
-            pbc = False
-        return pbc
-
-    def _check_mode(self, set_path: DPPath):
-        return (set_path / "real_atom_types.npy").is_file()
diff --git a/deepmd_utils/utils/data_system.py b/deepmd_utils/utils/data_system.py
deleted file mode 100644
index f83f587590..0000000000
--- a/deepmd_utils/utils/data_system.py
+++ /dev/null
@@ -1,654 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import collections
-import logging
-import warnings
-from functools import (
-    lru_cache,
-)
-from typing import (
-    List,
-    Optional,
-)
-
-import numpy as np
-
-import deepmd_utils.utils.random as dp_random
-from deepmd_utils.common import (
-    make_default_mesh,
-)
-from deepmd_utils.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-)
-from deepmd_utils.utils.data import (
-    DeepmdData,
-)
-
-log = logging.getLogger(__name__)
-
-
-class DeepmdDataSystem:
-    """Class for manipulating many data systems.
-
-    It is implemented with the help of DeepmdData
-    """
-
-    def __init__(
-        self,
-        systems: List[str],
-        batch_size: int,
-        test_size: int,
-        rcut: Optional[float] = None,
-        set_prefix: str = "set",
-        shuffle_test: bool = True,
-        type_map: Optional[List[str]] = None,
-        optional_type_map: bool = True,
-        modifier=None,
-        trn_all_set=False,
-        sys_probs=None,
-        auto_prob_style="prob_sys_size",
-        sort_atoms: bool = True,
-    ):
-        """Constructor.
-
-        Parameters
-        ----------
-        systems
-            Specifying the paths to systems
-        batch_size
-            The batch size
-        test_size
-            The size of test data
-        rcut
-            The cut-off radius. Not used.
-        set_prefix
-            Prefix for the directories of different sets
-        shuffle_test
-            If the test data are shuffled
-        type_map
-            Gives the name of different atom types
-        optional_type_map
-            If the type_map.raw in each system is optional
-        modifier
-            Data modifier that has the method `modify_data`
-        trn_all_set
-            Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
-        sys_probs : list of float
-            The probabilitis of systems to get the batch.
-            Summation of positive elements of this list should be no greater than 1.
-            Element of this list can be negative, the probability of the corresponding system is determined
-                automatically by the number of batches in the system.
-        auto_prob_style : str
-            Determine the probability of systems automatically. The method is assigned by this key and can be
-            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
-            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
-            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." :
-                                the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`,
-                                where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system,
-                                the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional
-                to the number of batches in the system.
-        sort_atoms : bool
-            Sort atoms by atom types. Required to enable when the data is directly feeded to
-            descriptors except mixed types.
-        """
-        # init data
-        del rcut
-        self.system_dirs = systems
-        self.nsystems = len(self.system_dirs)
-        self.data_systems = []
-        for ii in self.system_dirs:
-            self.data_systems.append(
-                DeepmdData(
-                    ii,
-                    set_prefix=set_prefix,
-                    shuffle_test=shuffle_test,
-                    type_map=type_map,
-                    optional_type_map=optional_type_map,
-                    modifier=modifier,
-                    trn_all_set=trn_all_set,
-                    sort_atoms=sort_atoms,
-                )
-            )
-        # check mix_type format
-        error_format_msg = (
-            "if one of the system is of mixed_type format, "
-            "then all of the systems should be of mixed_type format!"
-        )
-        if self.data_systems[0].mixed_type:
-            for data_sys in self.data_systems[1:]:
-                assert data_sys.mixed_type, error_format_msg
-            self.mixed_type = True
-        else:
-            for data_sys in self.data_systems[1:]:
-                assert not data_sys.mixed_type, error_format_msg
-            self.mixed_type = False
-        # batch size
-        self.batch_size = batch_size
-        is_auto_bs = False
-        self.mixed_systems = False
-        if isinstance(self.batch_size, int):
-            self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
-        elif isinstance(self.batch_size, str):
-            words = self.batch_size.split(":")
-            if "auto" == words[0]:
-                is_auto_bs = True
-                rule = 32
-                if len(words) == 2:
-                    rule = int(words[1])
-                self.batch_size = self._make_auto_bs(rule)
-            elif "mixed" == words[0]:
-                self.mixed_type = True
-                self.mixed_systems = True
-                if len(words) == 2:
-                    rule = int(words[1])
-                else:
-                    raise RuntimeError("batch size must be specified for mixed systems")
-                self.batch_size = rule * np.ones(self.nsystems, dtype=int)
-            else:
-                raise RuntimeError("unknown batch_size rule " + words[0])
-        elif isinstance(self.batch_size, list):
-            pass
-        else:
-            raise RuntimeError("invalid batch_size")
-        assert isinstance(self.batch_size, (list, np.ndarray))
-        assert len(self.batch_size) == self.nsystems
-
-        # natoms, nbatches
-        ntypes = []
-        for ii in self.data_systems:
-            ntypes.append(ii.get_ntypes())
-        self.sys_ntypes = max(ntypes)
-        self.natoms = []
-        self.natoms_vec = []
-        self.nbatches = []
-        type_map_list = []
-        for ii in range(self.nsystems):
-            self.natoms.append(self.data_systems[ii].get_natoms())
-            self.natoms_vec.append(
-                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
-            )
-            self.nbatches.append(
-                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
-            )
-            type_map_list.append(self.data_systems[ii].get_type_map())
-        self.type_map = self._check_type_map_consistency(type_map_list)
-
-        # ! altered by Marián Rynik
-        # test size
-        # now test size can be set as a percentage of systems data or test size
-        # can be set for each system individualy in the same manner as batch
-        # size. This enables one to use systems with diverse number of
-        # structures and different number of atoms.
-        self.test_size = test_size
-        if isinstance(self.test_size, int):
-            self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
-        elif isinstance(self.test_size, str):
-            words = self.test_size.split("%")
-            try:
-                percent = int(words[0])
-            except ValueError:
-                raise RuntimeError("unknown test_size rule " + words[0])
-            self.test_size = self._make_auto_ts(percent)
-        elif isinstance(self.test_size, list):
-            pass
-        else:
-            raise RuntimeError("invalid test_size")
-        assert isinstance(self.test_size, (list, np.ndarray))
-        assert len(self.test_size) == self.nsystems
-
-        # init pick idx
-        self.pick_idx = 0
-
-        # derive system probabilities
-        self.sys_probs = None
-        self.set_sys_probs(sys_probs, auto_prob_style)
-
-        # check batch and test size
-        for ii in range(self.nsystems):
-            chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
-            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
-                warnings.warn(
-                    "system %s required batch size is larger than the size of the dataset %s (%d > %d)"
-                    % (
-                        self.system_dirs[ii],
-                        chk_ret[0],
-                        self.batch_size[ii],
-                        chk_ret[1],
-                    )
-                )
-            chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
-            if chk_ret is not None and not is_auto_bs and not self.mixed_systems:
-                warnings.warn(
-                    "system %s required test size is larger than the size of the dataset %s (%d > %d)"
-                    % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])
-                )
-
-    def _load_test(self, ntests=-1):
-        self.test_data = collections.defaultdict(list)
-        for ii in range(self.nsystems):
-            test_system_data = self.data_systems[ii].get_test(ntests=ntests)
-            for nn in test_system_data:
-                self.test_data[nn].append(test_system_data[nn])
-
-    @property
-    @lru_cache(maxsize=None)
-    def default_mesh(self) -> List[np.ndarray]:
-        """Mesh for each system."""
-        return [
-            make_default_mesh(
-                self.data_systems[ii].pbc, self.data_systems[ii].mixed_type
-            )
-            for ii in range(self.nsystems)
-        ]
-
-    def compute_energy_shift(self, rcond=None, key="energy"):
-        sys_ener = []
-        for ss in self.data_systems:
-            sys_ener.append(ss.avg(key))
-        sys_ener = np.concatenate(sys_ener)
-        sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
-        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
-        sys_tynatom = sys_tynatom[:, 2:]
-        energy_shift, resd, rank, s_value = np.linalg.lstsq(
-            sys_tynatom, sys_ener, rcond=rcond
-        )
-        return energy_shift
-
-    def add_dict(self, adict: dict) -> None:
-        """Add items to the data system by a `dict`.
-        `adict` should have items like
-        .. code-block:: python.
-
-           adict[key] = {
-               "ndof": ndof,
-               "atomic": atomic,
-               "must": must,
-               "high_prec": high_prec,
-               "type_sel": type_sel,
-               "repeat": repeat,
-           }
-
-        For the explaination of the keys see `add`
-        """
-        for kk in adict:
-            self.add(
-                kk,
-                adict[kk]["ndof"],
-                atomic=adict[kk]["atomic"],
-                must=adict[kk]["must"],
-                high_prec=adict[kk]["high_prec"],
-                type_sel=adict[kk]["type_sel"],
-                repeat=adict[kk]["repeat"],
-                default=adict[kk]["default"],
-            )
-
-    def add(
-        self,
-        key: str,
-        ndof: int,
-        atomic: bool = False,
-        must: bool = False,
-        high_prec: bool = False,
-        type_sel: Optional[List[int]] = None,
-        repeat: int = 1,
-        default: float = 0.0,
-    ):
-        """Add a data item that to be loaded.
-
-        Parameters
-        ----------
-        key
-            The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
-        ndof
-            The number of dof
-        atomic
-            The item is an atomic property.
-            If False, the size of the data should be nframes x ndof
-            If True, the size of data should be nframes x natoms x ndof
-        must
-            The data file `sys_path/set.*/key.npy` must exist.
-            If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
-        high_prec
-            Load the data and store in float64, otherwise in float32
-        type_sel
-            Select certain type of atoms
-        repeat
-            The data will be repeated `repeat` times.
-        default, default=0.
-            Default value of data
-        """
-        for ii in self.data_systems:
-            ii.add(
-                key,
-                ndof,
-                atomic=atomic,
-                must=must,
-                high_prec=high_prec,
-                repeat=repeat,
-                type_sel=type_sel,
-                default=default,
-            )
-
-    def reduce(self, key_out, key_in):
-        """Generate a new item from the reduction of another atom.
-
-        Parameters
-        ----------
-        key_out
-            The name of the reduced item
-        key_in
-            The name of the data item to be reduced
-        """
-        for ii in self.data_systems:
-            ii.reduce(key_out, key_in)
-
-    def get_data_dict(self, ii: int = 0) -> dict:
-        return self.data_systems[ii].get_data_dict()
-
-    def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
-        if sys_probs is None:
-            if auto_prob_style == "prob_uniform":
-                prob_v = 1.0 / float(self.nsystems)
-                probs = [prob_v for ii in range(self.nsystems)]
-            elif auto_prob_style[:13] == "prob_sys_size":
-                if auto_prob_style == "prob_sys_size":
-                    prob_style = f"prob_sys_size;0:{self.get_nsystems()}:1.0"
-                else:
-                    prob_style = auto_prob_style
-                probs = prob_sys_size_ext(
-                    prob_style, self.get_nsystems(), self.nbatches
-                )
-            else:
-                raise RuntimeError("Unknown auto prob style: " + auto_prob_style)
-        else:
-            probs = process_sys_probs(sys_probs, self.nbatches)
-        self.sys_probs = probs
-
-    def get_batch(self, sys_idx: Optional[int] = None) -> dict:
-        # batch generation style altered by Ziyao Li:
-        # one should specify the "sys_prob" and "auto_prob_style" params
-        # via set_sys_prob() function. The sys_probs this function uses is
-        # defined as a private variable, self.sys_probs, initialized in __init__().
-        # This is to optimize the (vain) efforts in evaluating sys_probs every batch.
-        """Get a batch of data from the data systems.
-
-        Parameters
-        ----------
-        sys_idx : int
-            The index of system from which the batch is get.
-            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
-            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
-            This option does not work for mixed systems.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        if not self.mixed_systems:
-            b_data = self.get_batch_standard(sys_idx)
-        else:
-            b_data = self.get_batch_mixed()
-        return b_data
-
-    def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict:
-        """Get a batch of data from the data systems in the standard way.
-
-        Parameters
-        ----------
-        sys_idx : int
-            The index of system from which the batch is get.
-            If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
-            If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        if sys_idx is not None:
-            self.pick_idx = sys_idx
-        else:
-            # prob = self._get_sys_probs(sys_probs, auto_prob_style)
-            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
-        b_data = self.data_systems[self.pick_idx].get_batch(
-            self.batch_size[self.pick_idx]
-        )
-        b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
-        b_data["default_mesh"] = self.default_mesh[self.pick_idx]
-        return b_data
-
-    def get_batch_mixed(self) -> dict:
-        """Get a batch of data from the data systems in the mixed way.
-
-        Returns
-        -------
-        dict
-            The batch data
-        """
-        # mixed systems have a global batch size
-        batch_size = self.batch_size[0]
-        batch_data = []
-        for _ in range(batch_size):
-            self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
-            bb_data = self.data_systems[self.pick_idx].get_batch(1)
-            bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
-            bb_data["default_mesh"] = self.default_mesh[self.pick_idx]
-            batch_data.append(bb_data)
-        b_data = self._merge_batch_data(batch_data)
-        return b_data
-
-    def _merge_batch_data(self, batch_data: List[dict]) -> dict:
-        """Merge batch data from different systems.
-
-        Parameters
-        ----------
-        batch_data : list of dict
-            A list of batch data from different systems.
-
-        Returns
-        -------
-        dict
-            The merged batch data.
-        """
-        b_data = {}
-        max_natoms = max(bb["natoms_vec"][0] for bb in batch_data)
-        # natoms_vec
-        natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int)
-        natoms_vec[0:3] = max_natoms
-        b_data["natoms_vec"] = natoms_vec
-        # real_natoms_vec
-        real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data])
-        b_data["real_natoms_vec"] = real_natoms_vec
-        # type
-        type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int)
-        for ii, bb in enumerate(batch_data):
-            type_vec[ii, : bb["type"].shape[1]] = bb["type"][0]
-        b_data["type"] = type_vec
-        # default_mesh
-        default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0)
-        b_data["default_mesh"] = default_mesh
-        # other data
-        data_dict = self.get_data_dict(0)
-        for kk, vv in data_dict.items():
-            if kk not in batch_data[0]:
-                continue
-            b_data["find_" + kk] = batch_data[0]["find_" + kk]
-            if not vv["atomic"]:
-                b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0)
-            else:
-                b_data[kk] = np.zeros(
-                    (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]),
-                    dtype=batch_data[0][kk].dtype,
-                )
-                for ii, bb in enumerate(batch_data):
-                    b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0]
-        return b_data
-
-    # ! altered by Marián Rynik
-    def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1):  # depreciated
-        """Get test data from the the data systems.
-
-        Parameters
-        ----------
-        sys_idx
-            The test dat of system with index `sys_idx` will be returned.
-            If is None, the currently selected system will be returned.
-        n_test
-            Number of test data. If set to -1 all test data will be get.
-        """
-        if not hasattr(self, "test_data"):
-            self._load_test(ntests=n_test)
-        if sys_idx is not None:
-            idx = sys_idx
-        else:
-            idx = self.pick_idx
-
-        test_system_data = {}
-        for nn in self.test_data:
-            test_system_data[nn] = self.test_data[nn][idx]
-        test_system_data["natoms_vec"] = self.natoms_vec[idx]
-        test_system_data["default_mesh"] = self.default_mesh[idx]
-        return test_system_data
-
-    def get_sys_ntest(self, sys_idx=None):
-        """Get number of tests for the currently selected system,
-        or one defined by sys_idx.
-        """
-        if sys_idx is not None:
-            return self.test_size[sys_idx]
-        else:
-            return self.test_size[self.pick_idx]
-
-    def get_type_map(self) -> List[str]:
-        """Get the type map."""
-        return self.type_map
-
-    def get_nbatches(self) -> int:
-        """Get the total number of batches."""
-        return self.nbatches
-
-    def get_ntypes(self) -> int:
-        """Get the number of types."""
-        return self.sys_ntypes
-
-    def get_nsystems(self) -> int:
-        """Get the number of data systems."""
-        return self.nsystems
-
-    def get_sys(self, idx: int) -> DeepmdData:
-        """Get a certain data system."""
-        return self.data_systems[idx]
-
-    def get_batch_size(self) -> int:
-        """Get the batch size."""
-        return self.batch_size
-
-    def _format_name_length(self, name, width):
-        if len(name) <= width:
-            return "{: >{}}".format(name, width)
-        else:
-            name = name[-(width - 3) :]
-            name = "-- " + name
-            return name
-
-    def print_summary(self, name):
-        # width 65
-        sys_width = 42
-        log.info(
-            f"---Summary of DataSystem: {name:13s}-----------------------------------------------"
-        )
-        log.info("found %d system(s):" % self.nsystems)
-        log.info(
-            ("%s  " % self._format_name_length("system", sys_width))
-            + ("%6s  %6s  %6s  %9s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
-        )
-        for ii in range(self.nsystems):
-            log.info(
-                "%s  %6d  %6d  %6d  %9.3e  %3s"
-                % (
-                    self._format_name_length(self.system_dirs[ii], sys_width),
-                    self.natoms[ii],
-                    # TODO batch size * nbatches = number of structures
-                    self.batch_size[ii],
-                    self.nbatches[ii],
-                    self.sys_probs[ii],
-                    "T" if self.data_systems[ii].pbc else "F",
-                )
-            )
-        log.info(
-            "--------------------------------------------------------------------------------------"
-        )
-
-    def _make_auto_bs(self, rule):
-        bs = []
-        for ii in self.data_systems:
-            ni = ii.get_natoms()
-            bsi = rule // ni
-            if bsi * ni < rule:
-                bsi += 1
-            bs.append(bsi)
-        return bs
-
-    # ! added by Marián Rynik
-    def _make_auto_ts(self, percent):
-        ts = []
-        for ii in range(self.nsystems):
-            ni = self.batch_size[ii] * self.nbatches[ii]
-            tsi = int(ni * percent / 100)
-            ts.append(tsi)
-
-        return ts
-
-    def _check_type_map_consistency(self, type_map_list):
-        ret = []
-        for ii in type_map_list:
-            if ii is not None:
-                min_len = min([len(ii), len(ret)])
-                for idx in range(min_len):
-                    if ii[idx] != ret[idx]:
-                        raise RuntimeError(f"inconsistent type map: {ret!s} {ii!s}")
-                if len(ii) > len(ret):
-                    ret = ii
-        return ret
-
-
-def process_sys_probs(sys_probs, nbatch):
-    sys_probs = np.array(sys_probs)
-    type_filter = sys_probs >= 0
-    assigned_sum_prob = np.sum(type_filter * sys_probs)
-    # 1e-8 is to handle floating point error; See #1917
-    assert (
-        assigned_sum_prob <= 1.0 + 1e-8
-    ), "the sum of assigned probability should be less than 1"
-    rest_sum_prob = 1.0 - assigned_sum_prob
-    if not np.isclose(rest_sum_prob, 0):
-        rest_nbatch = (1 - type_filter) * nbatch
-        rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
-        ret_prob = rest_prob + type_filter * sys_probs
-    else:
-        ret_prob = sys_probs
-    assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
-    return ret_prob
-
-
-def prob_sys_size_ext(keywords, nsystems, nbatch):
-    block_str = keywords.split(";")[1:]
-    block_stt = []
-    block_end = []
-    block_weights = []
-    for ii in block_str:
-        stt = int(ii.split(":")[0])
-        end = int(ii.split(":")[1])
-        weight = float(ii.split(":")[2])
-        assert weight >= 0, "the weight of a block should be no less than 0"
-        block_stt.append(stt)
-        block_end.append(end)
-        block_weights.append(weight)
-    nblocks = len(block_str)
-    block_probs = np.array(block_weights) / np.sum(block_weights)
-    sys_probs = np.zeros([nsystems])
-    for ii in range(nblocks):
-        nbatch_block = nbatch[block_stt[ii] : block_end[ii]]
-        tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
-        sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
-    return sys_probs
diff --git a/deepmd_utils/utils/errors.py b/deepmd_utils/utils/errors.py
deleted file mode 100644
index 11f42ede96..0000000000
--- a/deepmd_utils/utils/errors.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-class OutOfMemoryError(Exception):
-    """This error is caused by out-of-memory (OOM)."""
diff --git a/deepmd_utils/utils/pair_tab.py b/deepmd_utils/utils/pair_tab.py
deleted file mode 100644
index 4451f53379..0000000000
--- a/deepmd_utils/utils/pair_tab.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env python3
-
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Tuple,
-)
-
-import numpy as np
-from scipy.interpolate import (
-    CubicSpline,
-)
-
-
-class PairTab:
-    """Pairwise tabulated potential.
-
-    Parameters
-    ----------
-    filename
-            File name for the short-range tabulated potential.
-            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
-            The first colume is the distance between atoms.
-            The second to the last columes are energies for pairs of certain types.
-            For example we have two atom types, 0 and 1.
-            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
-    """
-
-    def __init__(self, filename: str) -> None:
-        """Constructor."""
-        self.reinit(filename)
-
-    def reinit(self, filename: str) -> None:
-        """Initialize the tabulated interaction.
-
-        Parameters
-        ----------
-        filename
-            File name for the short-range tabulated potential.
-            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
-            The first colume is the distance between atoms.
-            The second to the last columes are energies for pairs of certain types.
-            For example we have two atom types, 0 and 1.
-            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
-        """
-        self.vdata = np.loadtxt(filename)
-        self.rmin = self.vdata[0][0]
-        self.hh = self.vdata[1][0] - self.vdata[0][0]
-        self.nspline = self.vdata.shape[0] - 1
-        ncol = self.vdata.shape[1] - 1
-        n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
-        self.ntypes = int(n0 + 0.1)
-        assert self.ntypes * (self.ntypes + 1) // 2 == ncol, (
-            "number of volumes provided in %s does not match guessed number of types %d"
-            % (filename, self.ntypes)
-        )
-        self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
-        self.tab_data = self._make_data()
-
-    def get(self) -> Tuple[np.array, np.array]:
-        """Get the serialized table."""
-        return self.tab_info, self.tab_data
-
-    def _make_data(self):
-        data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
-        stride = 4 * self.nspline
-        idx_iter = 0
-        xx = self.vdata[:, 0]
-        for t0 in range(self.ntypes):
-            for t1 in range(t0, self.ntypes):
-                vv = self.vdata[:, 1 + idx_iter]
-                cs = CubicSpline(xx, vv)
-                dd = cs(xx, 1)
-                dd *= self.hh
-                dtmp = np.zeros(stride)
-                for ii in range(self.nspline):
-                    dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1]
-                    dtmp[ii * 4 + 1] = (
-                        -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1]
-                    )
-                    dtmp[ii * 4 + 2] = dd[ii]
-                    dtmp[ii * 4 + 3] = vv[ii]
-                data[
-                    (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride
-                    + stride
-                ] = dtmp
-                data[
-                    (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride
-                    + stride
-                ] = dtmp
-                idx_iter += 1
-        return data
diff --git a/deepmd_utils/utils/path.py b/deepmd_utils/utils/path.py
deleted file mode 100644
index a8e4bc329f..0000000000
--- a/deepmd_utils/utils/path.py
+++ /dev/null
@@ -1,358 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import os
-from abc import (
-    ABC,
-    abstractmethod,
-)
-from functools import (
-    lru_cache,
-)
-from pathlib import (
-    Path,
-)
-from typing import (
-    List,
-    Optional,
-)
-
-import h5py
-import numpy as np
-from wcmatch.glob import (
-    globfilter,
-)
-
-
-class DPPath(ABC):
-    """The path class to data system (DeepmdData).
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __new__(cls, path: str):
-        if cls is DPPath:
-            if os.path.isdir(path):
-                return super().__new__(DPOSPath)
-            elif os.path.isfile(path.split("#")[0]):
-                # assume h5 if it is not dir
-                # TODO: check if it is a real h5? or just check suffix?
-                return super().__new__(DPH5Path)
-            raise FileNotFoundError("%s not found" % path)
-        return super().__new__(cls)
-
-    @abstractmethod
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-
-    @abstractmethod
-    def load_txt(self, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-
-    @abstractmethod
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-
-    @abstractmethod
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-
-    @abstractmethod
-    def is_file(self) -> bool:
-        """Check if self is file."""
-
-    @abstractmethod
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-
-    @abstractmethod
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-
-    @abstractmethod
-    def __lt__(self, other: "DPPath") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-
-    @abstractmethod
-    def __str__(self) -> str:
-        """Represent string."""
-
-    def __repr__(self) -> str:
-        return f"{type(self)} ({self!s})"
-
-    def __eq__(self, other) -> bool:
-        return str(self) == str(other)
-
-    def __hash__(self):
-        return hash(str(self))
-
-
-class DPOSPath(DPPath):
-    """The OS path class to data system (DeepmdData) for real directories.
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __init__(self, path: str) -> None:
-        super().__init__()
-        if isinstance(path, Path):
-            self.path = path
-        else:
-            self.path = Path(path)
-
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return np.load(str(self.path))
-
-    def load_txt(self, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return np.loadtxt(str(self.path), **kwargs)
-
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        # currently DPOSPath will only derivative DPOSPath
-        # TODO: discuss if we want to mix DPOSPath and DPH5Path?
-        return [type(self)(p) for p in self.path.glob(pattern)]
-
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        return [type(self)(p) for p in self.path.rglob(pattern)]
-
-    def is_file(self) -> bool:
-        """Check if self is file."""
-        return self.path.is_file()
-
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-        return self.path.is_dir()
-
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-        return type(self)(self.path / key)
-
-    def __lt__(self, other: "DPOSPath") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-        return self.path < other.path
-
-    def __str__(self) -> str:
-        """Represent string."""
-        return str(self.path)
-
-
-class DPH5Path(DPPath):
-    """The path class to data system (DeepmdData) for HDF5 files.
-
-    Notes
-    -----
-    OS - HDF5 relationship:
-        directory - Group
-        file - Dataset
-
-    Parameters
-    ----------
-    path : str
-        path
-    """
-
-    def __init__(self, path: str) -> None:
-        super().__init__()
-        # we use "#" to split path
-        # so we do not support file names containing #...
-        s = path.split("#")
-        self.root_path = s[0]
-        self.root = self._load_h5py(s[0])
-        # h5 path: default is the root path
-        self.name = s[1] if len(s) > 1 else "/"
-
-    @classmethod
-    @lru_cache(None)
-    def _load_h5py(cls, path: str) -> h5py.File:
-        """Load hdf5 file.
-
-        Parameters
-        ----------
-        path : str
-            path to hdf5 file
-        """
-        # this method has cache to avoid duplicated
-        # loading from different DPH5Path
-        # However the file will be never closed?
-        return h5py.File(path, "r")
-
-    def load_numpy(self) -> np.ndarray:
-        """Load NumPy array.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        return self.root[self.name][:]
-
-    def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
-        """Load NumPy array from text.
-
-        Returns
-        -------
-        np.ndarray
-            loaded NumPy array
-        """
-        arr = self.load_numpy()
-        if dtype:
-            arr = arr.astype(dtype)
-        return arr
-
-    def glob(self, pattern: str) -> List["DPPath"]:
-        """Search path using the glob pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        # got paths starts with current path first, which is faster
-        subpaths = [ii for ii in self._keys if ii.startswith(self.name)]
-        return [
-            type(self)(f"{self.root_path}#{pp}")
-            for pp in globfilter(subpaths, self._connect_path(pattern))
-        ]
-
-    def rglob(self, pattern: str) -> List["DPPath"]:
-        """This is like calling :meth:`DPPath.glob()` with `**/` added in front
-        of the given relative pattern.
-
-        Parameters
-        ----------
-        pattern : str
-            glob pattern
-
-        Returns
-        -------
-        List[DPPath]
-            list of paths
-        """
-        return self.glob("**" + pattern)
-
-    @property
-    def _keys(self) -> List[str]:
-        """Walk all groups and dataset."""
-        return self._file_keys(self.root)
-
-    @classmethod
-    @lru_cache(None)
-    def _file_keys(cls, file: h5py.File) -> List[str]:
-        """Walk all groups and dataset."""
-        l = []
-        file.visit(lambda x: l.append("/" + x))
-        return l
-
-    def is_file(self) -> bool:
-        """Check if self is file."""
-        if self.name not in self._keys:
-            return False
-        return isinstance(self.root[self.name], h5py.Dataset)
-
-    def is_dir(self) -> bool:
-        """Check if self is directory."""
-        if self.name not in self._keys:
-            return False
-        return isinstance(self.root[self.name], h5py.Group)
-
-    def __truediv__(self, key: str) -> "DPPath":
-        """Used for / operator."""
-        return type(self)(f"{self.root_path}#{self._connect_path(key)}")
-
-    def _connect_path(self, path: str) -> str:
-        """Connect self with path."""
-        if self.name.endswith("/"):
-            return f"{self.name}{path}"
-        return f"{self.name}/{path}"
-
-    def __lt__(self, other: "DPH5Path") -> bool:
-        """Whether this DPPath is less than other for sorting."""
-        if self.root_path == other.root_path:
-            return self.name < other.name
-        return self.root_path < other.root_path
-
-    def __str__(self) -> str:
-        """Returns path of self."""
-        return f"{self.root_path}#{self.name}"
diff --git a/deepmd_utils/utils/plugin.py b/deepmd_utils/utils/plugin.py
deleted file mode 100644
index 2a77b744c5..0000000000
--- a/deepmd_utils/utils/plugin.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-"""Base of plugin systems."""
-# copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py
-
-from abc import (
-    ABCMeta,
-)
-from typing import (
-    Callable,
-)
-
-
-class Plugin:
-    """A class to register and restore plugins.
-
-    Attributes
-    ----------
-    plugins : Dict[str, object]
-        plugins
-
-    Examples
-    --------
-    >>> plugin = Plugin()
-    >>> @plugin.register("xx")
-        def xxx():
-            pass
-    >>> print(plugin.plugins['xx'])
-    """
-
-    def __init__(self):
-        self.plugins = {}
-
-    def __add__(self, other) -> "Plugin":
-        self.plugins.update(other.plugins)
-        return self
-
-    def register(self, key: str) -> Callable[[object], object]:
-        """Register a plugin.
-
-        Parameters
-        ----------
-        key : str
-            key of the plugin
-
-        Returns
-        -------
-        Callable[[object], object]
-            decorator
-        """
-
-        def decorator(object: object) -> object:
-            self.plugins[key] = object
-            return object
-
-        return decorator
-
-    def get_plugin(self, key) -> object:
-        """Visit a plugin by key.
-
-        Parameters
-        ----------
-        key : str
-            key of the plugin
-
-        Returns
-        -------
-        object
-            the plugin
-        """
-        return self.plugins[key]
-
-
-class VariantMeta:
-    def __call__(cls, *args, **kwargs):
-        """Remove `type` and keys that starts with underline."""
-        obj = cls.__new__(cls, *args, **kwargs)
-        kwargs.pop("type", None)
-        to_pop = []
-        for kk in kwargs:
-            if kk[0] == "_":
-                to_pop.append(kk)
-        for kk in to_pop:
-            kwargs.pop(kk, None)
-        obj.__init__(*args, **kwargs)
-        return obj
-
-
-class VariantABCMeta(VariantMeta, ABCMeta):
-    pass
-
-
-class PluginVariant(metaclass=VariantABCMeta):
-    """A class to remove `type` from input arguments."""
-
-    pass
diff --git a/deepmd_utils/utils/random.py b/deepmd_utils/utils/random.py
deleted file mode 100644
index 8944419412..0000000000
--- a/deepmd_utils/utils/random.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    Optional,
-)
-
-import numpy as np
-
-_RANDOM_GENERATOR = np.random.RandomState()
-
-
-def choice(a: np.ndarray, p: Optional[np.ndarray] = None):
-    """Generates a random sample from a given 1-D array.
-
-    Parameters
-    ----------
-    a : np.ndarray
-        A random sample is generated from its elements.
-    p : np.ndarray
-        The probabilities associated with each entry in a.
-
-    Returns
-    -------
-    np.ndarray
-        arrays with results and their shapes
-    """
-    return _RANDOM_GENERATOR.choice(a, p=p)
-
-
-def random(size=None):
-    """Return random floats in the half-open interval [0.0, 1.0).
-
-    Parameters
-    ----------
-    size
-        Output shape.
-
-    Returns
-    -------
-    np.ndarray
-        Arrays with results and their shapes.
-    """
-    return _RANDOM_GENERATOR.random_sample(size)
-
-
-def seed(val: Optional[int] = None):
-    """Seed the generator.
-
-    Parameters
-    ----------
-    val : int
-        Seed.
-    """
-    _RANDOM_GENERATOR.seed(val)
-
-
-def shuffle(x: np.ndarray):
-    """Modify a sequence in-place by shuffling its contents.
-
-    Parameters
-    ----------
-    x : np.ndarray
-        The array or list to be shuffled.
-    """
-    _RANDOM_GENERATOR.shuffle(x)
-
-
-__all__ = ["choice", "random", "seed", "shuffle"]
diff --git a/deepmd_utils/utils/weight_avg.py b/deepmd_utils/utils/weight_avg.py
deleted file mode 100644
index b344d3bb75..0000000000
--- a/deepmd_utils/utils/weight_avg.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-from collections import (
-    defaultdict,
-)
-from typing import (
-    Dict,
-    List,
-    Tuple,
-)
-
-import numpy as np
-
-
-def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
-    """Compute wighted average of prediction errors (MAE or RMSE) for model.
-
-    Parameters
-    ----------
-    errors : List[Dict[str, Tuple[float, float]]]
-        List: the error of systems
-        Dict: the error of quantities, name given by the key
-        str: the name of the quantity, must starts with 'mae' or 'rmse'
-        Tuple: (error, weight)
-
-    Returns
-    -------
-    Dict
-        weighted averages
-    """
-    sum_err = defaultdict(float)
-    sum_siz = defaultdict(int)
-    for err in errors:
-        for kk, (ee, ss) in err.items():
-            if kk.startswith("mae"):
-                sum_err[kk] += ee * ss
-            elif kk.startswith("rmse"):
-                sum_err[kk] += ee * ee * ss
-            else:
-                raise RuntimeError("unknown error type")
-            sum_siz[kk] += ss
-    for kk in sum_err.keys():
-        if kk.startswith("mae"):
-            sum_err[kk] = sum_err[kk] / sum_siz[kk]
-        elif kk.startswith("rmse"):
-            sum_err[kk] = np.sqrt(sum_err[kk] / sum_siz[kk])
-        else:
-            raise RuntimeError("unknown error type")
-    return sum_err
diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css
index 1569dc4a38..d0b761e71d 100644
--- a/doc/_static/css/custom.css
+++ b/doc/_static/css/custom.css
@@ -1,14 +1,22 @@
 /*
  * SPDX-License-Identifier: LGPL-3.0-or-later
  */
-pre{
-	overflow: auto;
+pre {
+  overflow: auto;
 }
-.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo {
-    width: 275px;
+.wy-side-nav-search .wy-dropdown > a img.logo,
+.wy-side-nav-search > a img.logo {
+  width: 275px;
+}
+img.platform-icon {
+  height: 2ex;
 }
 @media (prefers-color-scheme: dark) {
-	.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo {
-		content: url("../logo-dark.svg");
-	}
+  .wy-side-nav-search .wy-dropdown > a img.logo,
+  .wy-side-nav-search > a img.logo {
+    content: url("../logo-dark.svg");
+  }
+  img.platform-icon {
+    filter: invert(1);
+  }
 }
diff --git a/doc/_static/logo_icon.svg b/doc/_static/logo_icon.svg
new file mode 100644
index 0000000000..d8f6893355
--- /dev/null
+++ b/doc/_static/logo_icon.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" id="图层_1" data-name="图层 1" viewBox="55 60 285 290"><path d="M104,275.34c0-18.87,2.92-37.94,5.78-56.38.69-4.44,1.38-9,2-13.5a53.12,53.12,0,0,0-2,103.76A101.22,101.22,0,0,1,104,275.34Z" class="cls-2"/><path d="M182.1,125.43c-3.12,0-5.65,4-5.65,8.92s2.53,8.91,5.65,8.91,5.64-4,5.64-8.91S185.22,125.43,182.1,125.43Z" class="cls-3"/><path d="M250.82,125.43c-3.12,0-5.65,4-5.65,8.92s2.53,8.91,5.65,8.91,5.65-4,5.65-8.91S253.94,125.43,250.82,125.43Z" class="cls-4"/><path d="M317.05,261.39c-1.19-18.66-5.72-37.25-7.55-55.81-1.85-18.86-4.22-37.65-6.57-56.46-.12-1-.25-1.91-.4-2.86a7.76,7.76,0,0,1,2.5-7.16,22.11,22.11,0,0,0-19.29-38.35,7.93,7.93,0,0,1-7.37-2.14c-15.88-15.76-37.87-23.55-61.94-23.55s-46.05,7.79-61.93,23.55a7.93,7.93,0,0,1-7.37,2.14,22.11,22.11,0,0,0-19.29,38.35,7.76,7.76,0,0,1,2.5,7.16c-.15,1-.28,1.9-.4,2.86q-3.6,28.89-7.21,57.8C118,245,103.05,299.86,137.05,329.86c21.43,18.91,60.16,10.63,63.84-19.92a32.53,32.53,0,0,0-11-28.19,40,40,0,1,1,53.19-.06A32.74,32.74,0,0,0,232,310l.17,1.44c2.78,23.05,26.77,32.23,47.21,26.37C310.77,328.81,318.86,289.59,317.05,261.39ZM227,186.33a4.56,4.56,0,0,1-4.47,3.66H210.37a4.56,4.56,0,0,1-4.48-3.66l-1.16-5.79a20.36,20.36,0,0,0,11.71-9.39,20.27,20.27,0,0,0,11.7,9.39Zm32.44-28.06a3.83,3.83,0,0,0-3.12,2.82c-1.92,9-10.81,15.81-21.49,15.81h-.19c-8.73-.06-15.72-6.47-15.72-14.23V151.51a4.93,4.93,0,0,1,2.47-4.3c5.44-3.08,11.39-9.84,10.13-14.38-2.21-8-26.92-7.63-30,0-1.76,4.35,4.42,11.27,10,14.4a4.92,4.92,0,0,1,2.48,4.3v11.13c0,7.76-7,14.17-15.72,14.23h-.19c-10.68,0-19.57-6.81-21.49-15.81a3.83,3.83,0,0,0-3.12-2.82c-12.86-2.56-20.94-8.13-20.94-20.23,0-22.78,28.62-41.24,63.92-41.24s63.93,18.46,63.93,41.24C280.36,150.14,272.28,155.71,259.42,158.27Z" class="cls-5"/></svg>
diff --git a/doc/_static/pytorch.svg b/doc/_static/pytorch.svg
new file mode 100644
index 0000000000..04aae0c2a3
--- /dev/null
+++ b/doc/_static/pytorch.svg
@@ -0,0 +1 @@
+<svg role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><title>PyTorch icon</title><path d="M12.005 0L4.952 7.053a9.865 9.865 0 000 14.022 9.866 9.866 0 0014.022 0c3.984-3.9 3.986-10.205.085-14.023l-1.744 1.743c2.904 2.905 2.904 7.634 0 10.538s-7.634 2.904-10.538 0-2.904-7.634 0-10.538l4.647-4.646.582-.665zm3.568 3.899a1.327 1.327 0 00-1.327 1.327 1.327 1.327 0 001.327 1.328A1.327 1.327 0 0016.9 5.226 1.327 1.327 0 0015.573 3.9z"/></svg>
diff --git a/doc/_static/tensorflow.svg b/doc/_static/tensorflow.svg
new file mode 100644
index 0000000000..48746104ec
--- /dev/null
+++ b/doc/_static/tensorflow.svg
@@ -0,0 +1 @@
+<svg role="img" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><title>TensorFlow icon</title><path d="M1.292 5.856L11.54 0v24l-4.095-2.378V7.603l-6.168 3.564.015-5.31zm21.43 5.311l-.014-5.31L12.46 0v24l4.095-2.378V14.87l3.092 1.788-.018-4.618-3.074-1.756V7.603l6.168 3.564z"/></svg>
diff --git a/doc/api_op.rst b/doc/api_op.rst
index 9f4c650497..d620ec6ef5 100644
--- a/doc/api_op.rst
+++ b/doc/api_op.rst
@@ -4,7 +4,7 @@ OP API
 op_module
 ---------
 
-.. automodule:: deepmd.env.op_module
+.. automodule:: deepmd.tf.env.op_module
    :members:
    :imported-members:
    :show-inheritance:
@@ -13,7 +13,7 @@ op_module
 op_grads_module
 ---------------
 
-.. automodule:: deepmd.env.op_grads_module
+.. automodule:: deepmd.tf.env.op_grads_module
    :members:
    :imported-members:
    :show-inheritance:
diff --git a/doc/backend.md b/doc/backend.md
new file mode 100644
index 0000000000..2f0bc7ed20
--- /dev/null
+++ b/doc/backend.md
@@ -0,0 +1,57 @@
+# Backend
+
+## Supported backends
+
+DeePMD-kit supports multiple backends: TensorFlow and PyTorch.
+To use DeePMD-kit, you must install at least one backend.
+Each backend does not support all features.
+In the documentation, TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }} icons are used to mark whether a backend supports a feature.
+
+### TensorFlow {{ tensorflow_icon }}
+
+- Model filename extension: `.pb`
+- Checkpoint filename extension: `.meta`, `.index`, `.data-00000-of-00001`
+
+[TensorFlow](https://tensorflow.org) 2.2 or above is required.
+DeePMD-kit does not use the TensorFlow v2 API but uses the TensorFlow v1 API (`tf.compat.v1`) in the graph mode.
+
+### PyTorch {{ pytorch_icon }}
+
+- Model filename extension: `.pth`
+- Checkpoint filename extension: `.pt`
+
+[PyTorch](https://pytorch.org/) 2.0 or above is required.
+While `.pth` and `.pt` are the same in the PyTorch package, they have different meanings in the DeePMD-kit to distinguish the model and the checkpoint.
+
+### DP {{ dpmodel_icon }}
+
+:::{note}
+This backend is only for development and should not take into production.
+:::
+
+- Model filename extension: `.dp`
+
+DP is a reference backend for development, which uses pure [NumPy](https://numpy.org/) to implement models without using any heavy deep-learning frameworks.
+Due to the limitation of NumPy, it doesn't support gradient calculation and thus cannot be used for training.
+As a reference backend, it is not aimed at the best performance, but only the correct results.
+The DP backend uses [HDF5](https://docs.h5py.org/) to store model serialization data, which is backend-independent.
+Only Python inference interface can load this format.
+
+## Switch the backend
+
+### Training
+
+When training and freezing a model, you can use `dp --tf` or `dp --pt` in the command line to switch the backend.
+
+### Inference
+
+When doing inference, DeePMD-kit detects the backend from the model filename.
+For example, when the model filename ends with `.pb` (the ProtoBuf file), DeePMD-kit will consider it using the TensorFlow backend.
+
+## Convert model files between backends
+
+If a model is supported by two backends, one can use [`dp convert-backend`](./cli.rst) to convert the model file between these two backends.
+
+:::{warning}
+Currently, only the `se_e2_a` model fully supports the backend conversion between TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }}.
+:::
diff --git a/doc/cli.rst b/doc/cli.rst
index 668a2df2e3..15891369e3 100644
--- a/doc/cli.rst
+++ b/doc/cli.rst
@@ -4,6 +4,6 @@ Command line interface
 ======================
 
 .. argparse::
-   :module: deepmd.entrypoints.main
+   :module: deepmd.tf.entrypoints.main
    :func: main_parser
    :prog: dp
diff --git a/doc/conf.py b/doc/conf.py
index 63af974a86..58181f9e1c 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -17,107 +17,15 @@
     date,
 )
 
-from deepmd.common import (
+from deepmd.utils.argcheck import (
     ACTIVATION_FN_DICT,
     PRECISION_DICT,
-)
-from deepmd.utils.argcheck import (
     list_to_doc,
 )
 
 sys.path.append(os.path.dirname(__file__))
 import sphinx_contrib_exhale_multiproject  # noqa: F401
 
-
-def mkindex(dirname):
-    dirname = dirname + "/"
-    oldfindex = open(dirname + "index.md")
-    oldlist = oldfindex.readlines()
-    oldfindex.close()
-
-    oldnames = []
-    for entry in oldlist:
-        _name = entry[entry.find("(") + 1 : entry.find(")")]
-        oldnames.append(_name)
-
-    newfindex = open(dirname + "index.md", "a")
-    for root, dirs, files in os.walk(dirname, topdown=False):
-        newnames = [
-            name for name in files if "index.md" not in name and name not in oldnames
-        ]
-        for name in newnames:
-            f = open(dirname + name)
-            _lines = f.readlines()
-            for _headline in _lines:
-                _headline = _headline.strip("#")
-                headline = _headline.strip()
-                if len(headline) == 0 or headline[0] == "." or headline[0] == "=":
-                    continue
-                else:
-                    break
-            longname = "- [" + headline + "]" + "(" + name + ")\n"
-            newfindex.write(longname)
-
-    newfindex.close()
-
-
-def classify_index_TS():
-    dirname = "troubleshooting/"
-    oldfindex = open(dirname + "index.md")
-    oldlist = oldfindex.readlines()
-    oldfindex.close()
-
-    oldnames = []
-    sub_titles = []
-    heads = []
-    while len(oldlist) > 0:
-        entry = oldlist.pop(0)
-        if entry.find("(") >= 0:
-            _name = entry[entry.find("(") + 1 : entry.find(")")]
-            oldnames.append(_name)
-            continue
-        if entry.find("##") >= 0:
-            _name = entry[entry.find("##") + 3 : -1]
-            sub_titles.append(_name)
-            continue
-        entry.strip()
-        if entry != "\n":
-            heads.append(entry)
-
-    newfindex = open(dirname + "index.md", "w")
-    for entry in heads:
-        newfindex.write(entry)
-    newfindex.write("\n")
-    sub_lists = [[], []]
-    for root, dirs, files in os.walk(dirname, topdown=False):
-        newnames = [name for name in files if "index.md" not in name]
-        for name in newnames:
-            f = open(dirname + name)
-            _lines = f.readlines()
-            f.close()
-            for _headline in _lines:
-                _headline = _headline.strip("#")
-                headline = _headline.strip()
-                if len(headline) == 0 or headline[0] == "." or headline[0] == "=":
-                    continue
-                else:
-                    break
-            longname = "- [" + headline + "]" + "(" + name + ")\n"
-            if "howtoset_" in name:
-                sub_lists[1].append(longname)
-            else:
-                sub_lists[0].append(longname)
-
-    newfindex.write("## Trouble shooting\n")
-    for entry in sub_lists[0]:
-        newfindex.write(entry)
-    newfindex.write("\n")
-    newfindex.write("## Parameters setting\n")
-    for entry in sub_lists[1]:
-        newfindex.write(entry)
-    newfindex.close()
-
-
 # -- Project information -----------------------------------------------------
 
 project = "DeePMD-kit"
@@ -169,10 +77,6 @@ def setup(app):
 #     'sphinx.ext.autosummary'
 # ]
 
-# mkindex("troubleshooting")
-# mkindex("development")
-# classify_index_TS()
-
 extensions = [
     "deepmodeling_sphinx",
     "dargs.sphinx",
@@ -188,6 +92,7 @@ def setup(app):
     "breathe",
     "exhale",
     "sphinxcontrib.bibtex",
+    "sphinx_design",
 ]
 
 # breathe_domain_by_extension = {
@@ -213,7 +118,10 @@ def setup(app):
 exhale_projects_args = {
     "cc": {
         "containmentFolder": "./API_CC",
-        "exhaleDoxygenStdin": "INPUT = ../source/api_cc/include/",
+        "exhaleDoxygenStdin": """INPUT = ../source/api_cc/include/
+                                 PREDEFINED += BUILD_TENSORFLOW
+                                               BUILD_PYTORCH
+        """,
         "rootFileTitle": "C++ API",
         "rootFileName": "api_cc.rst",
     },
@@ -275,6 +183,12 @@ def setup(app):
 .. |PRECISION| replace:: {list_to_doc(PRECISION_DICT.keys())}
 """
 
+myst_substitutions = {
+    "tensorflow_icon": """![TensorFlow](/_static/tensorflow.svg){class=platform-icon}""",
+    "pytorch_icon": """![PyTorch](/_static/pytorch.svg){class=platform-icon}""",
+    "dpmodel_icon": """![DP](/_static/logo_icon.svg){class=platform-icon}""",
+}
+
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
@@ -298,6 +212,8 @@ def setup(app):
 myst_enable_extensions = [
     "dollarmath",
     "colon_fence",
+    "substitution",
+    "attrs_inline",
 ]
 myst_fence_as_directive = ("math",)
 # fix emoji issue in pdf
diff --git a/doc/credits.rst b/doc/credits.rst
index 3fbe1d56d8..64880d9035 100644
--- a/doc/credits.rst
+++ b/doc/credits.rst
@@ -49,6 +49,13 @@ Cite DeePMD-kit and methods
 
    Zhang_2022_DPA1
 
+- If DPA-2 descriptor (`dpa2`) is used,
+
+.. bibliography::
+   :filter: False
+
+   Zhang_2023_DPA2
+
 - If frame-specific parameters (`fparam`, e.g. electronic temperature) is used,
 
 .. bibliography::
diff --git a/doc/data/data-conv.md b/doc/data/data-conv.md
index e8464b1ea9..7634daf5e6 100644
--- a/doc/data/data-conv.md
+++ b/doc/data/data-conv.md
@@ -5,6 +5,7 @@ Two binary formats, NumPy and HDF5, are supported for training. The raw format i
 ## NumPy format
 
 In a system with the Numpy format, the system properties are stored as text files ending with `.raw`, such as `type.raw` and `type_map.raw`, under the system directory. If one needs to train a non-periodic system, an empty `nopbc` file should be put under the system directory. Both input and labeled frame properties are saved as the [NumPy binary data (NPY) files](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#npy-format) ending with `.npy` in each of the `set.*` directories. Take an example, a system may contain the following files:
+
 ```
 type.raw
 type_map.raw
@@ -18,16 +19,19 @@ set.001/force.npy
 ```
 
 We assume that the atom types do not change in all frames. It is provided by `type.raw`, which has one line with the types of atoms written one by one. The atom types should be integers. For example the `type.raw` of a system that has 2 atoms with 0 and 1:
+
 ```bash
 $ cat type.raw
 0 1
 ```
 
 Sometimes one needs to map the integer types to atom names. The mapping can be given by the file `type_map.raw`. For example
+
 ```bash
 $ cat type_map.raw
 O H
 ```
+
 The type `0` is named by `"O"` and the type `1` is named by `"H"`.
 
 For training models with descriptor `se_atten`, a [new system format](../model/train-se-atten.md#data-format) is supported to put together the frame-sparse systems with the same atom number.
@@ -35,9 +39,11 @@ For training models with descriptor `se_atten`, a [new system format](../model/t
 ## HDF5 format
 
 A system with the HDF5 format has the same structure as the Numpy format, but in an HDF5 file, a system is organized as an [HDF5 group](https://docs.h5py.org/en/stable/high/group.html). The file name of a Numpy file is the key in an HDF5 file, and the data is the value of the key. One needs to use `#` in a DP path to divide the path to the HDF5 file and the HDF5 path:
+
 ```
 /path/to/data.hdf5#/H2O
 ```
+
 Here, `/path/to/data.hdf5` is the file path and `/H2O` is the HDF5 path. All HDF5 paths should start with `/`. There should be some data in the `H2O` group, such as `/H2O/type.raw` and `/H2O/set.000/force.npy`.
 
 An HDF5 file with a large number of systems has better performance than multiple NumPy files in a large cluster.
@@ -47,15 +53,18 @@ An HDF5 file with a large number of systems has better performance than multiple
 A raw file is a plain text file with each information item written in one file and one frame written on one line. **It's not directly supported**, but we provide a tool to convert them.
 
 In the raw format, the property of one frame is provided per line, ending with `.raw`. Take an example, the default files that provide box, coordinate, force, energy and virial are `box.raw`, `coord.raw`, `force.raw`, `energy.raw` and `virial.raw`, respectively. Here is an example of `force.raw`:
+
 ```bash
 $ cat force.raw
 -0.724  2.039 -0.951  0.841 -0.464  0.363
  6.737  1.554 -5.587 -2.803  0.062  2.222
 -1.968 -0.163  1.020 -0.225 -0.789  0.343
 ```
+
 This `force.raw` contains 3 frames with each frame having the forces of 2 atoms, thus it has 3 lines and 6 columns. Each line provides all the 3 force components of 2 atoms in 1 frame. The first three numbers are the 3 force components of the first atom, while the second three numbers are the 3 force components of the second atom. Other files are organized similarly. The number of lines of all raw files should be identical.
 
 One can use the script `$deepmd_source_dir/data/raw/raw_to_set.sh` to convert the prepared raw files to the NumPy format. For example, if we have a raw file that contains 6000 frames,
+
 ```bash
 $ ls
 box.raw  coord.raw  energy.raw  force.raw  type.raw  virial.raw
@@ -69,4 +78,5 @@ making set 2 ...
 $ ls
 box.raw  coord.raw  energy.raw  force.raw  set.000  set.001  set.002  type.raw  virial.raw
 ```
+
 It generates three sets `set.000`, `set.001` and `set.002`, with each set containing 2000 frames in the Numpy format.
diff --git a/doc/data/dpdata.md b/doc/data/dpdata.md
index 9b1a27ce82..63fe4f39c3 100644
--- a/doc/data/dpdata.md
+++ b/doc/data/dpdata.md
@@ -3,16 +3,19 @@
 One can use a convenient tool [`dpdata`](https://github.com/deepmodeling/dpdata) to convert data directly from the output of first principle packages to the DeePMD-kit format.
 
 To install one can execute
+
 ```bash
 pip install dpdata
 ```
 
 An example of converting data [VASP](https://www.vasp.at/) data in `OUTCAR` format to DeePMD-kit data can be found at
+
 ```
 $deepmd_source_dir/examples/data_conv
 ```
 
 Switch to that directory, then one can convert data by using the following python script
+
 ```python
 import dpdata
 
diff --git a/doc/data/index.md b/doc/data/index.md
deleted file mode 100644
index 838265427b..0000000000
--- a/doc/data/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Data
-
-In this section, we will introduce how to convert the DFT-labeled data into the data format used by DeePMD-kit.
-
-The DeePMD-kit organizes data in `systems`. Each `system` is composed of a number of `frames`. One may roughly view a `frame` as a snapshot of an MD trajectory, but it does not necessarily come from an MD simulation. A `frame` records the coordinates and types of atoms, cell vectors if the periodic boundary condition is assumed, energy, atomic forces and virials. It is noted that the `frames` in one `system` share the same number of atoms with the same type.
-
-- [System](system.md)
-- [Formats of a system](data-conv.md)
-- [Prepare data with dpdata](dpdata.md)
diff --git a/doc/data/system.md b/doc/data/system.md
index 0ecd0e9119..6ca044f1c9 100644
--- a/doc/data/system.md
+++ b/doc/data/system.md
@@ -4,44 +4,44 @@ DeePMD-kit takes a **system** as the data structure. A snapshot of a system is c
 
 A system should contain system properties, input frame properties, and labeled frame properties. The system property contains the following property:
 
-ID       | Property                | Raw file     | Required/Optional    | Shape                    | Description
--------- | ----------------------  | ------------ | -------------------- | -----------------------  | -----------
-type     | Atom type indexes       | type.raw     | Required             | Natoms                   | Integers that start with 0. If both the training parameter {ref}`type_map <model/type_map>` is set and `type_map.raw` is provided, the system atom type should be mapped to `type_map.raw` in `type.raw` and will be mapped to the model atom type when training; otherwise, the system atom type will be always mapped to the model atom type (whether {ref}`type_map <model/type_map>` is set or not)
-type_map | Atom type names         | type_map.raw | Optional             | Ntypes                   | Atom names that map to atom type, which is unnecessary to be contained in the periodic table. Only works when the training parameter {ref}`type_map <model/type_map>` is set
-nopbc    | Non-periodic system     | nopbc        | Optional             | 1                        | If True, this system is non-periodic; otherwise it's periodic
+| ID       | Property            | Raw file     | Required/Optional | Shape  | Description                                                                                                                                                                                                                                                                                                                                                                                             |
+| -------- | ------------------- | ------------ | ----------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| type     | Atom type indexes   | type.raw     | Required          | Natoms | Integers that start with 0. If both the training parameter {ref}`type_map <model/type_map>` is set and `type_map.raw` is provided, the system atom type should be mapped to `type_map.raw` in `type.raw` and will be mapped to the model atom type when training; otherwise, the system atom type will be always mapped to the model atom type (whether {ref}`type_map <model/type_map>` is set or not) |
+| type_map | Atom type names     | type_map.raw | Optional          | Ntypes | Atom names that map to atom type, which is unnecessary to be contained in the periodic table. Only works when the training parameter {ref}`type_map <model/type_map>` is set                                                                                                                                                                                                                            |
+| nopbc    | Non-periodic system | nopbc        | Optional          | 1      | If True, this system is non-periodic; otherwise it's periodic                                                                                                                                                                                                                                                                                                                                           |
 
 The input frame properties contain the following property, the first axis of which is the number of frames:
 
-ID       | Property                | Raw file       | Unit | Required/Optional    | Shape                    | Description
--------- | ----------------------  | -------------- | ---- | -------------------- | -----------------------  | -----------
-coord    | Atomic coordinates      | coord.raw      | Å    | Required             | Nframes \* Natoms \* 3   |
-box      | Boxes                   | box.raw        | Å    | Required if periodic | Nframes \* 3 \* 3        | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
-fparam   | Extra frame parameters  | fparam.raw     | Any  | Optional             | Nframes \* Any           |
-aparam   | Extra atomic parameters | aparam.raw     | Any  | Optional             | Nframes \* aparam \* Any |
-numb_copy     | Each frame is copied by the `numb_copy` (int) times | prob.raw     | 1    | Optional             | Nframes                  | Integer; Default is 1 for all frames
+| ID        | Property                                            | Raw file   | Unit | Required/Optional    | Shape                    | Description                               |
+| --------- | --------------------------------------------------- | ---------- | ---- | -------------------- | ------------------------ | ----------------------------------------- |
+| coord     | Atomic coordinates                                  | coord.raw  | Å    | Required             | Nframes \* Natoms \* 3   |
+| box       | Boxes                                               | box.raw    | Å    | Required if periodic | Nframes \* 3 \* 3        | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` |
+| fparam    | Extra frame parameters                              | fparam.raw | Any  | Optional             | Nframes \* Any           |
+| aparam    | Extra atomic parameters                             | aparam.raw | Any  | Optional             | Nframes \* aparam \* Any |
+| numb_copy | Each frame is copied by the `numb_copy` (int) times | prob.raw   | 1    | Optional             | Nframes                  | Integer; Default is 1 for all frames      |
 
 The labeled frame properties are listed as follows, all of which will be used for training if and only if the loss function contains such property:
 
-ID                     | Property                 | Raw file                 | Unit   | Shape                    | Description
----------------------- | -----------------------  | ------------------------ | ----   | -----------------------  | -----------
-energy                 | Frame energies           | energy.raw               | eV     | Nframes                  |
-force                  | Atomic forces            | force.raw                | eV/Å   | Nframes \* Natoms \* 3   |
-virial                 | Frame virial             | virial.raw               | eV     | Nframes \* 9             | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
-atom_ener              | Atomic energies          | atom_ener.raw            | eV     | Nframes \* Natoms        |
-atom_pref              | Weights of atomic forces | atom_pref.raw            | 1      | Nframes \* Natoms        |
-dipole                 | Frame dipole             | dipole.raw               | Any    | Nframes \* 3             |
-atomic_dipole          | Atomic dipole            | atomic_dipole.raw        | Any    | Nframes \* Natoms \* 3   |
-polarizability         | Frame polarizability     | polarizability.raw       | Any    | Nframes \* 9             | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
-atomic_polarizability  | Atomic polarizability    | atomic_polarizability.raw| Any    | Nframes \* Natoms \* 9   | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
-drdq                   | Partial derivative of atomic coordinates with respect to generalized coordinates | drdq.raw | 1 | Nframes \* Natoms \* 3 \* Ngen_coords |
+| ID                    | Property                                                                         | Raw file                  | Unit | Shape                                 | Description                               |
+| --------------------- | -------------------------------------------------------------------------------- | ------------------------- | ---- | ------------------------------------- | ----------------------------------------- |
+| energy                | Frame energies                                                                   | energy.raw                | eV   | Nframes                               |
+| force                 | Atomic forces                                                                    | force.raw                 | eV/Å | Nframes \* Natoms \* 3                |
+| virial                | Frame virial                                                                     | virial.raw                | eV   | Nframes \* 9                          | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` |
+| atom_ener             | Atomic energies                                                                  | atom_ener.raw             | eV   | Nframes \* Natoms                     |
+| atom_pref             | Weights of atomic forces                                                         | atom_pref.raw             | 1    | Nframes \* Natoms                     |
+| dipole                | Frame dipole                                                                     | dipole.raw                | Any  | Nframes \* 3                          |
+| atomic_dipole         | Atomic dipole                                                                    | atomic_dipole.raw         | Any  | Nframes \* Natoms \* 3                |
+| polarizability        | Frame polarizability                                                             | polarizability.raw        | Any  | Nframes \* 9                          | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` |
+| atomic_polarizability | Atomic polarizability                                                            | atomic_polarizability.raw | Any  | Nframes \* Natoms \* 9                | in the order `XX XY XZ YX YY YZ ZX ZY ZZ` |
+| drdq                  | Partial derivative of atomic coordinates with respect to generalized coordinates | drdq.raw                  | 1    | Nframes \* Natoms \* 3 \* Ngen_coords |
 
 In general, we always use the following convention of units:
 
-Property | Unit
----------| ----
-Time     | ps
-Length   | Å
-Energy   | eV
-Force    | eV/Å
-Virial   | eV
-Pressure | Bar
+| Property | Unit |
+| -------- | ---- |
+| Time     | ps   |
+| Length   | Å    |
+| Energy   | eV   |
+| Force    | eV/Å |
+| Virial   | eV   |
+| Pressure | Bar  |
diff --git a/doc/development/cmake.md b/doc/development/cmake.md
index 3073327856..f8508d8992 100644
--- a/doc/development/cmake.md
+++ b/doc/development/cmake.md
@@ -9,11 +9,13 @@ find_package(DeePMD REQUIRED)
 Note that you may need to add ${deepmd_root} to the cached CMake variable `CMAKE_PREFIX_PATH`.
 
 To link against the C interface library, using
+
 ```cmake
 target_link_libraries(some_library PRIVATE DeePMD::deepmd_c)
 ```
 
 To link against the C++ interface library, using
+
 ```cmake
 target_link_libraries(some_library PRIVATE DeePMD::deepmd_cc)
 ```
diff --git a/doc/development/coding-conventions.rst b/doc/development/coding-conventions.rst
index ad4203ee4f..137b0d0d51 100644
--- a/doc/development/coding-conventions.rst
+++ b/doc/development/coding-conventions.rst
@@ -30,7 +30,7 @@ Rules
 -----
 
 The code must be compatible with the oldest supported version of python
-which is 3.7
+which is 3.8.
 
 The project follows the generic coding conventions as
 specified in the `Style Guide for Python Code`_, `Docstring
diff --git a/doc/development/create-a-model-pt.md b/doc/development/create-a-model-pt.md
new file mode 100644
index 0000000000..35d81b364a
--- /dev/null
+++ b/doc/development/create-a-model-pt.md
@@ -0,0 +1,163 @@
+# Create a model in PyTorch
+
+If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section.
+
+To incorporate your custom model you'll need to:
+
+1. Register and implement new components (e.g. descriptor) in a Python file.
+2. Register new arguments for user inputs.
+3. Package new codes into a Python package.
+4. Test new models.
+
+## Design a new component
+
+With DeePMD-kit v3, we have expanded support to include two additional backends alongside TensorFlow: the PyTorch backend and the framework-independent backend (dpmodel). The PyTorch backend adopts a highly modularized design to provide flexibility and extensibility. It ensures a consistent experience for both training and inference, aligning with the TensorFlow backend.
+
+The framework-independent backend is implemented in pure NumPy, serving as a reference backend to ensure consistency in tests. Its design pattern closely parallels that of the PyTorch backend.
+
+### New descriptors
+
+When creating a new descriptor, it is essential to inherit from both the {py:class}`deepmd.pt.model.descriptor.base_descriptor.BaseDescriptor` class and the {py:class}`torch.nn.Module` class. Abstract methods, including {py:class}`deepmd.pt.model.descriptor.base_descriptor.BaseDescriptor.forward`, must be implemented, while others remain optional. It is crucial to adhere to the original method arguments without any modifications. Once the implementation is complete, the next step involves registering the component with a designated key:
+
+```py
+from deepmd.pt.model.descriptor.base_descriptor import (
+    BaseDescriptor,
+)
+
+
+@BaseDescriptor.register("some_descrpt")
+class SomeDescript(BaseDescriptor, torch.nn.Module):
+    def __init__(self, arg1: bool, arg2: float) -> None:
+        pass
+
+    def get_rcut(self) -> float:
+        pass
+
+    def get_nnei(self) -> int:
+        pass
+
+    def get_ntypes(self) -> int:
+        pass
+
+    def get_dim_out(self) -> int:
+        pass
+
+    def get_dim_emb(self) -> int:
+        pass
+
+    def mixed_types(self) -> bool:
+        pass
+
+    def forward(
+        self,
+        coord_ext: torch.Tensor,
+        atype_ext: torch.Tensor,
+        nlist: torch.Tensor,
+        mapping: Optional[torch.Tensor] = None,
+    ):
+        pass
+
+    def serialize(self) -> dict:
+        pass
+
+    def deserialize(cls, data: dict) -> "SomeDescript":
+        pass
+
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        pass
+```
+
+The serialize and deserialize methods are important for cross-backend model conversion.
+
+### New fitting nets
+
+In many instances, there is no requirement to create a new fitting net. For fitting user-defined scalar properties, the {py:class}`deepmd.pt.model.task.ener.InvarFitting` class can be utilized. However, if there is a need for a new fitting net, one should inherit from both the {py:class}`deepmd.pt.model.task.base_fitting.BaseFitting` class and the {py:class}`torch.nn.Module` class. Alternatively, for a more straightforward approach, inheritance from the {py:class}`deepmd.pt.model.task.fitting.GeneralFitting` class is also an option.
+
+```py
+from deepmd.pt.model.task.fitting import (
+    GeneralFitting,
+)
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    fitting_check_output,
+)
+
+
+@GeneralFitting.register("some_fitting")
+@fitting_check_output
+class SomeFittingNet(GeneralFitting):
+    def __init__(self, arg1: bool, arg2: float) -> None:
+        pass
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+    ):
+        pass
+
+    def output_def(self) -> FittingOutputDef:
+        pass
+```
+
+### New models
+
+The PyTorch backend's model architecture is meticulously structured with multiple layers of abstraction, ensuring a high degree of flexibility. Typically, the process commences with an atomic model responsible for atom-wise property calculations. This atomic model inherits from both the {py:class}`deepmd.pt.model.atomic_model.base_atomic_model.BaseAtomicModel` class and the {py:class}`torch.nn.Module` class.
+
+Subsequently, the `AtomicModel` is encapsulated using the `make_model(AtomicModel)` function, which leverages the `deepmd.pt.model.model.make_model.make_model` function. The purpose of the `make_model` wrapper is to facilitate the translation between atomic property predictions and the extended property predictions and differentiation , e.g. the reduction of atomic energy contribution and the autodiff for calculating the forces and virial. The developers usually need to implement an `AtomicModel` not a `Model`.
+
+```py
+from deepmd.pt.model.atomic_model.base_atomic_model import (
+    BaseAtomicModel,
+)
+
+
+class SomeAtomicModel(BaseAtomicModel, torch.nn.Module):
+    def __init__(self, arg1: bool, arg2: float) -> None:
+        pass
+
+    def forward_atomic(self):
+        pass
+```
+
+## Register new arguments
+
+To let someone uses your new component in their input file, you need to create a new method that returns some `Argument` of your new component, and then register new arguments. For example, the code below
+
+```py
+from typing import List
+
+from dargs import Argument
+from deepmd.utils.argcheck import descrpt_args_plugin
+
+
+@descrpt_args_plugin.register("some_descrpt")
+def descrpt_some_args() -> List[Argument]:
+    return [
+        Argument("arg1", bool, optional=False, doc="balabala"),
+        Argument("arg2", float, optional=True, default=6.0, doc="haha"),
+    ]
+```
+
+allows one to use your new descriptor as below:
+
+```json
+"descriptor" :{
+    "type": "some_descrpt",
+    "arg1": true,
+    "arg2": 6.0
+}
+```
+
+The arguments here should be consistent with the class arguments of your new component.
+
+## Unit tests
+
+When transferring features from another backend to the PyTorch backend, it is essential to include a regression test in `/source/tests/consistent` to validate the consistency of the PyTorch backend with other backends. Presently, the regression tests cover self-consistency and cross-backend consistency between TensorFlow, PyTorch, and DP (Numpy) through the serialization/deserialization technique.
+
+During the development of new components within the PyTorch backend, it is necessary to provide a DP (Numpy) implementation and incorporate corresponding regression tests. For PyTorch components, developers are also required to include a unit test using `torch.jit`.
diff --git a/doc/development/create-a-model.md b/doc/development/create-a-model-tf.md
similarity index 83%
rename from doc/development/create-a-model.md
rename to doc/development/create-a-model-tf.md
index 6634403021..b39313a8d3 100644
--- a/doc/development/create-a-model.md
+++ b/doc/development/create-a-model-tf.md
@@ -1,8 +1,9 @@
-# Create a model
+# Create a model in TensorFlow
 
 If you'd like to create a new model that isn't covered by the existing DeePMD-kit library, but reuse DeePMD-kit's other efficient modules such as data processing, trainner, etc, you may want to read this section.
 
 To incorporate your custom model you'll need to:
+
 1. Register and implement new components (e.g. descriptor) in a Python file. You may also want to register new TensorFlow OPs if necessary.
 2. Register new arguments for user inputs.
 3. Package new codes into a Python package.
@@ -10,11 +11,12 @@ To incorporate your custom model you'll need to:
 
 ## Design a new component
 
-When creating a new component, take descriptor as the example, you should inherit {py:class}`deepmd.descriptor.descriptor.Descriptor` class and override several methods. Abstract methods such as {py:class}`deepmd.descriptor.descriptor.Descriptor.build` must be implemented and others are not. You should keep arguments of these methods unchanged.
+When creating a new component, take descriptor as the example, one should inherit from the {py:class}`deepmd.tf.descriptor.descriptor.Descriptor` class and override several methods. Abstract methods such as {py:class}`deepmd.tf.descriptor.descriptor.Descriptor.build` must be implemented and others are not. You should keep arguments of these methods unchanged.
 
 After implementation, you need to register the component with a key:
+
 ```py
-from deepmd.descriptor import Descriptor
+from deepmd.tf.descriptor import Descriptor
 
 
 @Descriptor.register("some_descrpt")
diff --git a/doc/development/type-embedding.md b/doc/development/type-embedding.md
index 5919d6c944..10eeed6ee9 100644
--- a/doc/development/type-embedding.md
+++ b/doc/development/type-embedding.md
@@ -1,11 +1,15 @@
 # Atom Type Embedding
+
 ## Overview
+
 Here is an overview of the DeePMD-kit algorithm. Given a specific centric atom, we can obtain the matrix describing its local environment, named $\mathcal R$. It consists of the distance between the centric atom and its neighbors, as well as a direction vector. We can embed each distance into a vector of $M_1$ dimension by an `embedding net`, so the environment matrix $\mathcal R$ can be embedded into matrix $\mathcal G$. We can thus extract a descriptor vector (of $M_1 \times M_2$ dim) of the centric atom from the $\mathcal G$ by some matrix multiplication, and put the descriptor into `fitting net` to get the predicted energy $E$. The vanilla version of DeePMD-kit builds `embedding net` and `fitting net` relying on the atom type, resulting in $O(N)$ memory usage. After applying atom type embedding, in DeePMD-kit v2.0, we can share one `embedding net` and one `fitting net` in total, which reduces training complexity largely.
 
 ## Preliminary
+
 In the following chart, you can find the meaning of symbols used to clarify the atom-type embedding algorithm.
 
 <!-- GitHub Markdown cannot render math in a table... -->
+
 $i$: Type of centric atom
 
 $j$: Type of neighbor atom
@@ -40,8 +44,10 @@ $$E = F( [ \text{Multi}( \mathcal G( [s_{ij}, A(j)] ) ), A(j)] )$$
 The difference between the two variants above is whether using the information of centric atom when generating the descriptor. Users can choose by modifying the `type_one_side` hyper-parameter in the input JSON file.
 
 ## How to use
+
 A detailed introduction can be found at [`se_e2_a_tebd`](../model/train-se-e2-a-tebd.md). Looking for a fast start-up, you can simply add a `type_embedding` section in the input JSON file as displayed in the following, and the algorithm will adopt the atom type embedding algorithm automatically.
 An example of `type_embedding` is like
+
 ```json
     "type_embedding":{
        "neuron":    [2, 4, 8],
@@ -50,19 +56,26 @@ An example of `type_embedding` is like
     }
 ```
 
-
 ## Code Modification
+
 Atom-type embedding can be applied to varied `embedding net` and `fitting net`, as a result, we build a class `TypeEmbedNet` to support this free combination. In the following, we will go through the execution process of the code to explain our code modification.
 
 ### trainer (train/trainer.py)
+
 In trainer.py, it will parse the parameter from the input JSON file. If a `type_embedding` section is detected, it will build a `TypeEmbedNet`, which will be later input in the `model`. `model` will be built in the function `_build_network`.
+
 ### model (model/ener.py)
+
 When building the operation graph of the `model` in `model.build`. If a `TypeEmbedNet` is detected, it will build the operation graph of `type embed net`, `embedding net` and `fitting net` by order. The building process of `type embed net` can be found in `TypeEmbedNet.build`, which output the type embedding vector of each atom type (of [$\text{ntypes} \times \text{nchanl}$] dimensions). We then save the type embedding vector into `input_dict`, so that they can be fetched later in `embedding net` and `fitting net`.
-### embedding net (descriptor/se*.py)
+
+### embedding net (descriptor/se\*.py)
+
 In `embedding net`, we shall take local environment $\mathcal R$ as input and output matrix $\mathcal G$. Functions called in this process by the order is
+
 ```
 build -> _pass_filter -> _filter -> _filter_lower
 ```
+
 `_pass_filter`: It will first detect whether an atom type embedding exists, if so, it will apply atom type embedding algorithm and doesn't divide the input by type.
 
 `_filter`: It will call `_filter_lower` function to obtain the result of matrix multiplication ($\mathcal G^T\cdot \mathcal R$), do further multiplication involved in $\text{Multi}(\cdot)$, and finally output the result of descriptor vector of $M_1 \times M_2$ dim.
@@ -70,8 +83,8 @@ build -> _pass_filter -> _filter -> _filter_lower
 `_filter_lower`: The main function handling input modification. If type embedding exists, it will call `_concat_type_embedding` function to concat the first column of input $\mathcal R$ (the column of $s_{ij}$) with the atom type embedding information. It will decide whether to use the atom type embedding vector of the centric atom according to the value of `type_one_side` (if set **True**, then we only use the vector of the neighbor atom). The modified input will be put into the `fitting net` to get $\mathcal G$ for further matrix multiplication stage.
 
 ### fitting net (fit/ener.py)
-In `fitting net`, it takes the descriptor vector as input, whose dimension is [natoms, $M_1\times M_2$]. Because we need to involve information on the centric atom in this step, we need to generate a matrix named `atype_embed` (of dim [natoms, nchanl]), in which each row is the type embedding vector of the specific centric atom. The input is sorted by type of centric atom, we also know the number of a particular atom type (stored in `natoms[2+i]`), thus we get the type vector of the centric atom. In the build phase of the fitting net, it will check whether type embedding exists in `input_dict` and fetch them. After that, call `embed_atom_type` function to look up the embedding vector for the type vector of the centric atom to obtain `atype_embed`, and concat input with it ([input, atype_embed]). The modified input goes through `fitting` net` to get predicted energy.
 
+In `fitting net`, it takes the descriptor vector as input, whose dimension is [natoms, $M_1\times M_2$]. Because we need to involve information on the centric atom in this step, we need to generate a matrix named `atype_embed` (of dim [natoms, nchanl]), in which each row is the type embedding vector of the specific centric atom. The input is sorted by type of centric atom, we also know the number of a particular atom type (stored in `natoms[2+i]`), thus we get the type vector of the centric atom. In the build phase of the fitting net, it will check whether type embedding exists in `input_dict` and fetch them. After that, call `embed_atom_type` function to look up the embedding vector for the type vector of the centric atom to obtain `atype_embed`, and concat input with it ([input, atype_embed]). The modified input goes through `fitting` net` to get predicted energy.
 
 :::{note}
 You can't apply the compression method while using atom-type embedding.
diff --git a/doc/environment.yml b/doc/environment.yml
index 97060c3004..85d5a97c5b 100644
--- a/doc/environment.yml
+++ b/doc/environment.yml
@@ -7,7 +7,7 @@ dependencies:
   - python=3.9
   - pip>=20.1
   - pip:
-    - ..[docs,cpu]
-    - "exhale @ https://github.com/svenevs/exhale/archive/2759a394268307b88f5440487ae0920ee4ebf81e.zip"
-    # https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/309
-    - docutils!=0.18.*,!=0.19.*
+      - ..[docs,cpu,torch]
+      - "exhale @ https://github.com/svenevs/exhale/archive/2759a394268307b88f5440487ae0920ee4ebf81e.zip"
+      # https://github.com/mcmtroffaes/sphinxcontrib-bibtex/issues/309
+      - docutils!=0.18.*,!=0.19.*
diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md
index 7394f77143..01cc9fa3a8 100644
--- a/doc/freeze/compress.md
+++ b/doc/freeze/compress.md
@@ -1,4 +1,8 @@
-# Compress a model
+# Compress a model {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 ## Theory
 
@@ -7,37 +11,46 @@ The compression of the DP model uses three techniques, tabulated inference, oper
 For better performance, the NN inference can be replaced by tabulated function evaluations if the input of the NN is of dimension one.
 The idea is to approximate the output of the NN by a piece-wise polynomial fitting.
 The input domain (a compact domain in $\mathbb R$) is divided into $L_c$ equally spaced intervals, in which we apply a fifth-order polynomial $g^l_m(x)$ approximation of the $m$-th output component of the NN function:
+
 ```math
     g^l_m(x) = a^l_m x^5 + b^l_m x^4 + c^l_m x^3 + d^l_m x^2 + e^l_m x + f^l_m,\quad
     x \in [x_l, x_{l+1}),
 ```
+
 where $l=1,2,\dots,L_c$ is the index of the intervals, $x_1, \dots, x_{L_c}, x_{L_c+1}$ are the endpoints of the intervals, and $a^l_m$, $b^l_m$, $c^l_m$, $d^l_m$, $e^l_m$, and $f^l_m$ are the fitting parameters.
 The fitting parameters can be computed by the equations below:
+
 ```math
     a^l_m = \frac{1}{2\Delta x_l^5}[12h_{m,l}-6(y'_{m,l+1}+y'_{m,l})\Delta x_l + (y''_{m,l+1}-y''_{m,l})\Delta x_l^2],
 ```
+
 ```math
     b^l_m = \frac{1}{2\Delta x_l^4}[-30h_{m,l} +(14y'_{m,l+1}+16y'_{m,l})\Delta x_l + (-2y''_{m,l+1}+3y''_{m,l})\Delta x_l^2],
 ```
+
 ```math
     c^l_m = \frac{1}{2\Delta x_l^3}[20h_{m,l}-(8y'_{m,l+1}+12y'_{m,l})\Delta x_l + (y''_{m,l+1}-3y''_{m,l})\Delta x_l^2],
 ```
+
 ```math
     d^l_m = \frac{1}{2}y''_{m,l},
 ```
+
 ```math
     e^l_m = y_{m,l}',
 ```
+
 ```math
     f^l_m = y_{m,l},
 ```
+
 where $\Delta x_l=x_{l+1}-x_l$ denotes the size of the interval. $h_{m,l}=y_{m,l+1}-y_{m,l}$. $y_{m,l} = y_m(x_l)$, $y'_{m,l} = y'_m(x_l)$ and $y''_{m,l} = y''_m(x_l)$ are the value, the first-order derivative, and the second-order derivative of the $m$-th component of the target NN function at the interval point $x_l$, respectively.
 The first and second-order derivatives are easily calculated by the back-propagation of the NN functions.
 
-In the standard DP model inference, taking the [two-body embedding descriptor](../model/train-se-e2-a.md) as an example, the matrix product $(\mathcal G^i)^T \mathcal R$ requires the transfer of the tensor  $\mathcal G^i$ between the register and the host/device memories, which usually becomes the bottle-neck of the computation due to the relatively small memory bandwidth of the GPUs.
+In the standard DP model inference, taking the [two-body embedding descriptor](../model/train-se-e2-a.md) as an example, the matrix product $(\mathcal G^i)^T \mathcal R$ requires the transfer of the tensor $\mathcal G^i$ between the register and the host/device memories, which usually becomes the bottle-neck of the computation due to the relatively small memory bandwidth of the GPUs.
 The compressed DP model merges the matrix multiplication $(\mathcal G^i)^T \mathcal R$ with the tabulated inference step.
 More specifically, once one column of the $(\mathcal G^i)^T$ is evaluated, it is immediately multiplied with one row of the environment matrix in the register, and the outer product is deposited to the result of $(\mathcal G^i)^T \mathcal R$.
-By the operator merging technique, the allocation of  $\mathcal G^i$ and the memory movement between register and host/device memories is avoided.
+By the operator merging technique, the allocation of $\mathcal G^i$ and the memory movement between register and host/device memories is avoided.
 The operator merging of the three-body embedding can be derived analogously.
 
 The first dimension, $N_c$, of the environment ($\mathcal R^i$) and embedding ($\mathcal G^i$) matrices is the expected maximum number of neighbors.
@@ -45,19 +58,24 @@ If the number of neighbors of an atom is smaller than $N_c$, the corresponding p
 In practice, if the real number of neighbors is significantly smaller than $N_c$, a notable operation is spent on the multiplication of padding zeros.
 In the compressed DP model, the number of neighbors is precisely indexed at the tabulated inference stage, further saving computational costs.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 Once the frozen model is obtained from DeePMD-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way:
+
 ```bash
 dp compress -i graph.pb -o graph-compress.pb
 ```
+
 where `-i` gives the original frozen model, `-o` gives the compressed model. Several other command line options can be passed to `dp compress`, which can be checked with
+
 ```bash
 $ dp compress --help
 ```
+
 An explanation will be provided
+
 ```
 usage: dp compress [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}] [-l LOG_PATH]
                    [-m {master,collect,workers}] [-i INPUT] [-o OUTPUT]
@@ -114,11 +132,12 @@ optional arguments:
                         The training script of the input frozen model
                         (default: None)
 ```
+
 **Parameter explanation**
 
 Model compression, which includes tabulating the embedding net.
-The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. For model descriptor with `se_e2_a` type, the first sub-table takes the stride(parameter) as its uniform stride, while the second sub-table takes 10 * stride as its uniform stride; For model descriptor with `se_e3` type, the first sub-table takes 10 * stride as it's uniform stride, while the second sub-table takes 100 * stride as it's uniform stride.
-The range of the first table is automatically detected by DeePMD-kit, while the second table ranges from the first table's upper boundary(upper) to the extrapolate(parameter) * upper.
+The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. For model descriptor with `se_e2_a` type, the first sub-table takes the stride(parameter) as its uniform stride, while the second sub-table takes 10 _ stride as its uniform stride; For model descriptor with `se_e3` type, the first sub-table takes 10 _ stride as it's uniform stride, while the second sub-table takes 100 _ stride as it's uniform stride.
+The range of the first table is automatically detected by DeePMD-kit, while the second table ranges from the first table's upper boundary(upper) to the extrapolate(parameter) _ upper.
 Finally, we added a check frequency parameter. It indicates how often the program checks for overflow(if the input environment matrix overflows the first or second table range) during the MD inference.
 
 **Justification of model compression**
@@ -127,14 +146,14 @@ Model compression, with little loss of accuracy, can greatly speed up MD inferen
 
 **Acceptable original model version**
 
-The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: ```dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb```)
+The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: `dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb`)
 
 **Acceptable descriptor type**
 
 Descriptors with `se_e2_a`, `se_e3`, `se_e2_r` and `se_atten_v2` types are supported by the model compression feature. `Hybrid` mixed with the above descriptors is also supported.
 
-
 **Available activation functions for descriptor:**
+
 - tanh
 - gelu
 - relu
diff --git a/doc/freeze/freeze.md b/doc/freeze/freeze.md
index ba0cd44606..b80928a119 100644
--- a/doc/freeze/freeze.md
+++ b/doc/freeze/freeze.md
@@ -1,14 +1,35 @@
 # Freeze a model
 
 The trained neural network is extracted from a checkpoint and dumped into a protobuf(.pb) file. This process is called "freezing" a model. The idea and part of our code are from [Morgan](https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc). To freeze a model, typically one does
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
 ```bash
-$ dp freeze -o graph.pb
+$ dp freeze -o model.pb
 ```
-in the folder where the model is trained. The output model is called `graph.pb`.
+
+in the folder where the model is trained. The output model is called `model.pb`.
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```bash
+$ dp --pt freeze -o model.pth
+```
+
+in the folder where the model is trained. The output model is called `model.pth`.
+
+:::
+
+::::
 
 In [multi-task mode](../train/multi-task-training.md):
+
 - This process will in default output several models, each of which contains the common descriptor and
-one of the user-defined fitting nets in {ref}`fitting_net_dict <model/fitting_net_dict>`, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`.
-Those frozen models are exactly the same as single-task output with fitting net `fitting_key`.
+  one of the user-defined fitting nets in {ref}`fitting_net_dict <model/fitting_net_dict>`, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`.
+  Those frozen models are exactly the same as single-task output with fitting net `fitting_key`.
 - If you add `--united-model` option in this situation,
-the total multi-task model will be frozen into one unit `graph.pb`, which is mainly for multi-task initialization and can not be used directly for inference.
+  the total multi-task model will be frozen into one unit `graph.pb`, which is mainly for multi-task initialization and can not be used directly for inference.
diff --git a/doc/freeze/index.md b/doc/freeze/index.md
deleted file mode 100644
index 0bc3664144..0000000000
--- a/doc/freeze/index.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Freeze and Compress
-
-- [Freeze a model](freeze.md)
-- [Compress a model](compress.md)
diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb
index ec939265fd..1c53665b7d 100644
--- a/doc/getting-started/quick_start.ipynb
+++ b/doc/getting-started/quick_start.ipynb
@@ -239,7 +239,7 @@
    "id": "a999f41b-e343-4dc2-8499-84fee6e52221",
    "metadata": {},
    "source": [
-    "The DeePMD-kit adopts a compressed data format. All training data should first be converted into this format and can then be used by DeePMD-kit. The data format is explained in detail in the DeePMD-kit manual that can be found in [the DeePMD-kit Data Introduction](../data/index.md)."
+    "The DeePMD-kit adopts a compressed data format. All training data should first be converted into this format and can then be used by DeePMD-kit. The data format is explained in detail in the DeePMD-kit manual that can be found in [the DeePMD-kit Data Introduction](../data/system.md)."
    ]
   },
   {
@@ -1001,7 +1001,7 @@
       "WARNING:tensorflow:From /opt/mamba/lib/python3.10/site-packages/deepmd/utils/batch_size.py:61: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n",
       "Instructions for updating:\n",
       "Use `tf.config.list_physical_devices('GPU')` instead.\n",
-      "WARNING:deepmd.utils.batch_size:You can use the environment variable DP_INFER_BATCH_SIZE tocontrol the inference batch size (nframes * natoms). The default value is 1024.\n"
+      "WARNING:deepmd.tf.utils.batch_size:You can use the environment variable DP_INFER_BATCH_SIZE tocontrol the inference batch size (nframes * natoms). The default value is 1024.\n"
      ]
     }
    ],
diff --git a/doc/index.rst b/doc/index.rst
index b60430b566..7bff8d3957 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -34,6 +34,7 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
    :numbered:
    :caption: Advanced
 
+   backend
    install/index
    data/index
    model/index
@@ -63,7 +64,8 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
    :caption: Developer Guide
 
    development/cmake
-   development/create-a-model
+   development/create-a-model-tf
+   development/create-a-model-pt
    development/type-embedding
    development/coding-conventions
    development/cicd
diff --git a/doc/inference/cxx.md b/doc/inference/cxx.md
index 6188daba4c..58c74df068 100644
--- a/doc/inference/cxx.md
+++ b/doc/inference/cxx.md
@@ -1,6 +1,9 @@
 # C/C++ interface
+
 ## C++ interface
+
 The C++ interface of DeePMD-kit is also available for the model interface, which is considered faster than the Python interface. An example `infer_water.cpp` is given below:
+
 ```cpp
 #include "deepmd/DeepPot.h"
 
@@ -14,14 +17,18 @@ int main(){
   dp.compute (e, f, v, coord, atype, cell);
 }
 ```
+
 where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
 See {cpp:class}`deepmd::DeepPot` for details.
 
 You can compile `infer_water.cpp` using `gcc`:
+
 ```sh
 gcc infer_water.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_cc -lstdc++ -ltensorflow_cc -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water
 ```
+
 and then run the program:
+
 ```sh
 ./infer_water
 ```
@@ -31,6 +38,7 @@ and then run the program:
 Although C is harder to write, the C library will not be affected by different versions of C++ compilers.
 
 An example `infer_water.c` is given below:
+
 ```cpp
 #include <stdio.h>
 #include <stdlib.h>
@@ -62,7 +70,7 @@ int main(){
   free(v);
   free(ae);
   free(av);
-  free(dp);
+  DP_DeleteDeepPot(dp);
 }
 ```
 
@@ -71,10 +79,13 @@ where `e`, `f` and `v` are predicted energy, force and virial of the system, res
 See {cpp:func}`DP_DeepPotCompute` for details.
 
 You can compile `infer_water.c` using `gcc`:
+
 ```sh
 gcc infer_water.c -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water
 ```
+
 and then run the program:
+
 ```sh
 ./infer_water
 ```
@@ -103,10 +114,13 @@ Note that the feature of the header-only C++ library is still limited compared t
 See {cpp:class}`deepmd::hpp::DeepPot` for details.
 
 You can compile `infer_water_hpp.cpp` using `gcc`:
+
 ```sh
 gcc infer_water_hpp.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water_hpp
 ```
+
 and then run the program:
+
 ```sh
 ./infer_water_hpp
 ```
diff --git a/doc/inference/index.md b/doc/inference/index.md
deleted file mode 100644
index fa0a747eb4..0000000000
--- a/doc/inference/index.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Inference
-
-Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details.
-
-- [Python interface](python.md)
-- [C++ interface](cxx.md)
-- [Node.js interface](nodejs.md)
diff --git a/doc/inference/nodejs.md b/doc/inference/nodejs.md
index 72bfa6f9d9..8d58881898 100644
--- a/doc/inference/nodejs.md
+++ b/doc/inference/nodejs.md
@@ -9,9 +9,9 @@ const deepmd = require("deepmd-kit");
 
 const dp = new deepmd.DeepPot("graph.pb");
 
-const coord = [1., 0., 0., 0., 0., 1.5, 1., 0., 3.];
+const coord = [1, 0, 0, 0, 0, 1.5, 1, 0, 3];
 const atype = [1, 0, 1];
-const cell = [10., 0., 0., 0., 10., 0., 0., 0., 10.];
+const cell = [10, 0, 0, 0, 10, 0, 0, 0, 10];
 
 const v_coord = new deepmd.vectord(coord.length);
 const v_atype = new deepmd.vectori(atype.length);
@@ -20,15 +20,21 @@ for (var i = 0; i < coord.length; i++) v_coord.set(i, coord[i]);
 for (var i = 0; i < atype.length; i++) v_atype.set(i, atype[i]);
 for (var i = 0; i < cell.length; i++) v_cell.set(i, cell[i]);
 
-var energy = 0.0
+var energy = 0.0;
 var v_forces = new deepmd.vectord();
 var v_virials = new deepmd.vectord();
 
 energy = dp.compute(energy, v_forces, v_virials, v_coord, v_atype, v_cell);
 
 console.log("energy:", energy);
-console.log("forces:", [...Array(v_forces.size()).keys()].map(i => v_forces.get(i)));
-console.log("virials:", [...Array(v_virials.size()).keys()].map(i => v_virials.get(i)));
+console.log(
+  "forces:",
+  [...Array(v_forces.size()).keys()].map((i) => v_forces.get(i)),
+);
+console.log(
+  "virials:",
+  [...Array(v_virials.size()).keys()].map((i) => v_virials.get(i)),
+);
 ```
 
 Energy, forces, and virials will be printed to the screen.
diff --git a/doc/inference/python.md b/doc/inference/python.md
index b5d3ca1efc..73faa2b329 100644
--- a/doc/inference/python.md
+++ b/doc/inference/python.md
@@ -1,6 +1,7 @@
 # Python interface
 
 One may use the python interface of DeePMD-kit for model inference, an example is given as follows
+
 ```python
 from deepmd.infer import DeepPot
 import numpy as np
@@ -11,9 +12,11 @@ cell = np.diag(10 * np.ones(3)).reshape([1, -1])
 atype = [1, 0, 1]
 e, f, v = dp.eval(coord, cell, atype)
 ```
+
 where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
 
 Furthermore, one can use the python interface to calculate model deviation.
+
 ```python
 from deepmd.infer import calc_model_devi
 from deepmd.infer import DeepPot as DP
@@ -26,9 +29,14 @@ graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
 model_devi = calc_model_devi(coord, cell, atype, graphs)
 ```
 
-Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. Otherwise, tensorFlow will never release the memory and this may lead to an out-of-memory (OOM) error.
+Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times.
+Otherwise, TensorFlow or PyTorch will never release the memory, and this may lead to an out-of-memory (OOM) error.
+
+## External neighbor list algorithm {{ tensorflow_icon }}
 
-## External neighbor list algorithm
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 The native neighbor list algorithm of the DeePMD-kit is in $O(N^2)$ complexity ($N$ is the number of atoms).
 While this is not a problem for small systems that quantum methods can afford, the large systems for molecular dynamics have slow performance.
diff --git a/doc/install/build-conda.md b/doc/install/build-conda.md
index 41c9f90a6e..14dee5c263 100644
--- a/doc/install/build-conda.md
+++ b/doc/install/build-conda.md
@@ -1,5 +1,12 @@
 # Building conda packages
 
+::::{danger}
+:::{deprecated} 3.0.0
+The official channel has been deprecated since 3.0.0.
+Refer to [conda-forge documentation](https://conda-forge.org/docs/maintainer/adding_pkgs/) for how to contribute and build packages locally.
+:::
+::::
+
 One may want to keep both convenience and personalization of the DeePMD-kit. To achieve this goal, one can consider building conda packages. We provide building scripts in [deepmd-kit-recipes organization](https://github.com/deepmd-kit-recipes/). These building tools are driven by [conda-build](https://github.com/conda/conda-build) and [conda-smithy](https://github.com/conda-forge/conda-smithy).
 
 For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-feedstock`](https://github.com/deepmd-kit-recipes/lammps-feedstock/) repository and modify `recipe/build.sh`. `-D PKG_MPIIO=OFF` should be changed to `-D PKG_MPIIO=ON`. Then go to the main directory and execute
@@ -9,6 +16,7 @@ For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-f
 ```
 
 This requires that Docker has been installed. After the building, the packages will be generated in `build_artifacts/linux-64` and `build_artifacts/noarch`, and then one can install then executing
+
 ```sh
 conda create -n deepmd lammps -c file:///path/to/build_artifacts -c https://conda.deepmodeling.com -c nvidia
 ```
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index f3cf52c1f5..bb68272ace 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -19,15 +19,24 @@ For CUDA 11.8 support, use the `devel_cu11` tag.
 Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
 
 ```sh
-pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
+pip install -U --pre deepmd-kit[gpu,cu12,lmp,torch] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
 ```
 
 `cu12` and `lmp` are optional, which is the same as the stable version.
 
-## Download pre-compiled C Library
+## Download pre-compiled C Library {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip), or via a shell command:
 
 ```sh
 wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip && unzip libdeepmd_c-0-libdeepmd_c.tar.gz.zip
 ```
+
+## Pre-release conda-forge packages
+
+Pre-release conda-forge packages are in `conda-forge/label/deepmd-kit_dev` or `conda-forge/label/deepmd-kit_rc` channels, other than the `conda-forge` channel.
+See [conda-forge documentation](https://conda-forge.org/docs/maintainer/knowledge_base/#pre-release-builds) for more information.
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index 3bc1f4b944..0c56fdb0c5 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -6,6 +6,11 @@ After your easy installation, DeePMD-kit (`dp`) and LAMMPS (`lmp`) will be avail
 
 :::{note}
 Note: The off-line packages and conda packages require the [GNU C Library](https://www.gnu.org/software/libc/) 2.17 or above. The GPU version requires [compatible NVIDIA driver](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#minor-version-compatibility) to be installed in advance. It is possible to force conda to [override detection](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html#overriding-detected-packages) when installation, but these requirements are still necessary during runtime.
+You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for more information.
+:::
+
+:::{note}
+Python 3.8 or above is required for Python interface.
 :::
 
 - [Install off-line packages](#install-off-line-packages)
@@ -13,73 +18,88 @@ Note: The off-line packages and conda packages require the [GNU C Library](https
 - [Install with docker](#install-with-docker)
 - [Install Python interface with pip](#install-python-interface-with-pip)
 
-
 ## Install off-line packages
-Both CPU and GPU version offline packages are available in [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases).
 
-Some packages are splited into two files due to size limit of GitHub. One may merge them into one after downloading:
+Both CPU and GPU version offline packages are available on [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases).
+
+Some packages are split into two files due to the size limit of GitHub. One may merge them into one after downloading:
+
 ```bash
-cat deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.0 deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.1 > deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh
+cat deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.0 deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh.1 > deepmd-kit-2.2.9-cuda118-Linux-x86_64.sh
 ```
 
 One may enable the environment using
+
 ```bash
 conda activate /path/to/deepmd-kit
 ```
 
 ## Install with conda
-DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html) first.
 
-### Official channel
+DeePMD-kit is available with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section), [Miniconda](https://docs.conda.io/en/latest/miniconda.html), or [miniforge](https://conda-forge.org/download/) first.
+You can refer to [DeepModeling conda FAQ](https://docs.deepmodeling.com/faq/conda.html) for how to setup a conda environment.
+
+### conda-forge channel
+
+DeePMD-kit is available on the [conda-forge](https://conda-forge.org/) channel:
+
+```bash
+conda create -n deepmd deepmd-kit lammps horovod -c conda-forge
+```
+
+The supported platforms include Linux x86-64, macOS x86-64, and macOS arm64.
+Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages.
+
+### Official channel (deprecated)
+
+::::{danger}
+:::{deprecated} 3.0.0
+The official channel has been deprecated since 3.0.0, due to the challenging work of building dependencies for [multiple backends](../backend.md).
+Old packages will still be available at https://conda.deepmodeling.com.
+Maintainers will build packages in the conda-forge organization together with other conda-forge members.
+:::
+::::
 
 One may create an environment that contains the CPU version of DeePMD-kit and LAMMPS:
+
 ```bash
 conda create -n deepmd deepmd-kit=*=*cpu libdeepmd=*=*cpu lammps -c https://conda.deepmodeling.com -c defaults
 ```
 
 Or one may want to create a GPU environment containing [CUDA Toolkit](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver):
+
 ```bash
 conda create -n deepmd deepmd-kit=*=*gpu libdeepmd=*=*gpu lammps cudatoolkit=11.6 horovod -c https://conda.deepmodeling.com -c defaults
 ```
+
 One could change the CUDA Toolkit version from `10.2` or `11.6`.
 
-One may specify the DeePMD-kit version such as `2.1.1` using
-```bash
-conda create -n deepmd deepmd-kit=2.1.1=*cpu libdeepmd=2.1.1=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults
-```
+One may specify the DeePMD-kit version such as `2.2.9` using
 
-One may enable the environment using
 ```bash
-conda activate deepmd
+conda create -n deepmd deepmd-kit=2.2.9=*cpu libdeepmd=2.2.9=*cpu lammps horovod -c https://conda.deepmodeling.com -c defaults
 ```
 
-### conda-forge channel
-
-DeePMD-kit is also available on the [conda-forge](https://conda-forge.org/) channel:
+One may enable the environment using
 
 ```bash
-conda create -n deepmd deepmd-kit lammps horovod -c conda-forge
+conda activate deepmd
 ```
 
-The supported platform includes Linux x86-64, macOS x86-64, and macOS arm64.
-Read [conda-forge FAQ](https://conda-forge.org/docs/user/tipsandtricks.html#installing-cuda-enabled-packages-like-tensorflow-and-pytorch) to learn how to install CUDA-enabled packages.
-
 ## Install with docker
-A docker for installing the DeePMD-kit is available [here](https://github.com/orgs/deepmodeling/packages/container/package/deepmd-kit).
+
+A docker for installing the DeePMD-kit is available [here](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit).
 
 To pull the CPU version:
+
 ```bash
-docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cpu
+docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cpu
 ```
 
 To pull the GPU version:
-```bash
-docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cuda11.6_gpu
-```
 
-To pull the ROCm version:
 ```bash
-docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021
+docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cuda12.0_gpu
 ```
 
 ## Install Python interface with pip
@@ -87,7 +107,7 @@ docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021
 If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported:
 
 ```bash
-pip install deepmd-kit[gpu,cu12]
+pip install deepmd-kit[gpu,cu12,torch]
 ```
 
 `cu12` is required only when CUDA Toolkit and cuDNN were not installed.
@@ -95,24 +115,29 @@ pip install deepmd-kit[gpu,cu12]
 To install the package built against CUDA 11.8, use
 
 ```bash
+pip install torch --index-url https://download.pytorch.org/whl/cu118
 pip install deepmd-kit-cu11[gpu,cu11]
 ```
 
 Or install the CPU version without CUDA supported:
+
 ```bash
+pip install torch --index-url https://download.pytorch.org/whl/cpu
 pip install deepmd-kit[cpu]
 ```
 
-[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras:
+[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras:
+
 ```bash
-pip install deepmd-kit[gpu,cu12,lmp,ipi]
+pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
 ```
+
 MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
 
 It is suggested to install the package into an isolated environment.
 The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.
-A specific version of TensorFlow which is compatible with DeePMD-kit will be also installed.
+A specific version of TensorFlow and PyTorch which is compatible with DeePMD-kit will be also installed.
 
 :::{Warning}
-If your platform is not supported, or want to build against the installed TensorFlow, or want to enable ROCM support, please [build from source](install-from-source.md).
+If your platform is not supported, or you want to build against the installed TensorFlow, or you want to enable ROCM support, please [build from source](install-from-source.md).
 :::
diff --git a/doc/install/index.md b/doc/install/index.md
deleted file mode 100644
index 8428255f5a..0000000000
--- a/doc/install/index.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Installation
-
-- [Easy install](easy-install.md)
-- [Install from source code](install-from-source.md)
-- [Install from pre-compiled C library](doc/install/install-from-c-library.md)
-- [Install LAMMPS](install-lammps.md)
-- [Install i-PI](install-ipi.md)
-- [Install GROMACS](install-gromacs.md)
-- [Building conda packages](build-conda.md)
-- [Install Node.js interface](install-nodejs.md)
-- [Easy install the latest development version](easy-install-dev.md)
diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md
index 7613fdb772..f1a5496b59 100644
--- a/doc/install/install-from-c-library.md
+++ b/doc/install/install-from-c-library.md
@@ -1,4 +1,8 @@
-# Install from pre-compiled C library
+# Install from pre-compiled C library {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own.
 It can be downloaded via the shell command:
@@ -10,7 +14,7 @@ tar xzf libdeepmd_c.tar.gz
 
 The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website.
 
-## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch
+## Use Pre-compiled C Library to build the LAMMPS plugin, i-PI driver, and GROMACS patch
 
 When one [installs DeePMD-kit's C++ interface](./install-from-source.md#install-deepmd-kits-c-interface), one can use the CMake argument `DEEPMD_C_ROOT` to the path `libdeepmd_c`.
 
@@ -23,4 +27,5 @@ make -j8
 make install
 ```
 
-Then one can follow the manual [Install LAMMPS](./install-lammps.md) and/or [Install GROMACS](./install-gromacs.md).
+Then the i-PI driver `dp_ipi` will be built and installed.
+One can also follow the manual [Install LAMMPS](./install-lammps.md) and/or [Install GROMACS](./install-gromacs.md).
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index 4f94b9c793..5195992853 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -3,54 +3,102 @@
 Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel).
 
 Or get the DeePMD-kit source code by `git clone`
+
 ```bash
 cd /some/workspace
 git clone https://github.com/deepmodeling/deepmd-kit.git deepmd-kit
 ```
 
 For convenience, you may want to record the location of the source to a variable, saying `deepmd_source_dir` by
+
 ```bash
 cd deepmd-kit
 deepmd_source_dir=`pwd`
 ```
 
-## Install the python interface
-### Install Tensorflow's python interface
-First, check the python version on your machine
+## Install the Python interface
+
+### Install Backend's Python interface
+
+First, check the Python version on your machine.
+Python 3.8 or above is required.
+
 ```bash
 python --version
 ```
 
-We follow the virtual environment approach to install TensorFlow's Python interface. The full instruction can be found on the official [TensorFlow website](https://www.tensorflow.org/install/pip). TensorFlow 1.8 or later is supported. Now we assume that the Python interface will be installed to the virtual environment directory `$tensorflow_venv`
+We follow the virtual environment approach to install the backend's Python interface.
+Now we assume that the Python interface will be installed in the virtual environment directory `$deepmd_venv`:
+
 ```bash
-virtualenv -p python3 $tensorflow_venv
-source $tensorflow_venv/bin/activate
+virtualenv -p python3 $deepmd_venv
+source $deepmd_venv/bin/activate
 pip install --upgrade pip
+```
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+The full instruction to install TensorFlow can be found on the official [TensorFlow website](https://www.tensorflow.org/install/pip). TensorFlow 2.2 or later is supported.
+
+```bash
 pip install --upgrade tensorflow
 ```
-It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by
+
+If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by
+
 ```bash
-source $tensorflow_venv/bin/activate
+pip install --upgrade tensorflow-cpu
 ```
-if one wants to skip out of the virtual environment, he/she can do
+
+One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to install TensorFlow from [conda-forge](https://conda-forge.org).
+
+To verify the installation, run
+
 ```bash
-deactivate
+python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
 ```
-If one has multiple python interpreters named something like python3.x, it can be specified by, for example
+
+One can also [build the TensorFlow Python interface from source](https://www.tensorflow.org/install/source) for customized hardware optimization, such as CUDA, ROCM, or OneDNN support.
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+To install PyTorch, run
+
+```sh
+pip install torch
+```
+
+Follow [PyTorch documentation](https://pytorch.org/get-started/locally/) to install PyTorch built against different CUDA versions or without CUDA.
+
+One can also [use conda](https://docs.deepmodeling.org/faq/conda.html) to install PyTorch from [conda-forge](https://conda-forge.org).
+
+:::
+
+::::
+
+It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by
+
 ```bash
-virtualenv -p python3.8 $tensorflow_venv
+source $deepmd_venv/bin/activate
 ```
-If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by
+
+if one wants to skip out of the virtual environment, he/she can do
+
 ```bash
-pip install --upgrade tensorflow-cpu
+deactivate
 ```
-To verify the installation, run
+
+If one has multiple python interpreters named something like python3.x, it can be specified by, for example
+
 ```bash
-python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+virtualenv -p python3.8 $deepmd_venv
 ```
-One should remember to activate the virtual environment every time he/she uses DeePMD-kit.
 
-One can also [build the TensorFlow Python interface from source](https://www.tensorflow.org/install/source) for custom hardware optimization, such as CUDA, ROCM, or OneDNN support.
+One should remember to activate the virtual environment every time he/she uses DeePMD-kit.
 
 ### Install the DeePMD-kit's python interface
 
@@ -60,9 +108,30 @@ Check the compiler version on your machine
 gcc --version
 ```
 
-The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
+The compiler GCC 4.8 or later is supported in the DeePMD-kit.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+You can set the environment variable `export DP_ENABLE_PYTORCH=1` to enable customized C++ OPs in the PyTorch backend.
+Note that PyTorch may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by PyTorch.
+
+The customized C++ OPs are not enabled by default because TensorFlow and PyTorch packages from the PyPI use different `_GLIBCXX_USE_CXX11_ABI` flags.
+We recommend conda-forge packages in this case.
+
+:::
+
+::::
 
 Execute
+
 ```bash
 cd $deepmd_source_dir
 pip install .
@@ -70,25 +139,32 @@ pip install .
 
 One may set the following environment variables before executing `pip`:
 
-| Environment variables | Allowed value          | Default value | Usage                      |
-| --------------------- | ---------------------- | ------------- | -------------------------- |
-| DP_VARIANT            | `cpu`, `cuda`, `rocm`  | `cpu`         | Build CPU variant or GPU variant with CUDA or ROCM support. |
-| CUDAToolkit_ROOT | Path                   | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
-| ROCM_ROOT             | Path                   | Detected automatically | The path to the ROCM toolkit directory. |
-| TENSORFLOW_ROOT       | Path                   | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
-| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1           | 0             | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
-| CMAKE_ARGS             | str                   | -             | Additional CMake arguments |
-| &lt;LANG&gt;FLAGS (`<LANG>`=`CXX`, `CUDA` or `HIP`)   | str            | -             | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). |
+| Environment variables                               | Allowed value         | Default value          | Usage                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| --------------------------------------------------- | --------------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| DP_VARIANT                                          | `cpu`, `cuda`, `rocm` | `cpu`                  | Build CPU variant or GPU variant with CUDA or ROCM support.                                                                                                                                                                                                                                                                                                                                                                                         |
+| CUDAToolkit_ROOT                                    | Path                  | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required.                                                                                                                                                                                                                                                                                                                                                           |
+| ROCM_ROOT                                           | Path                  | Detected automatically | The path to the ROCM toolkit directory.                                                                                                                                                                                                                                                                                                                                                                                                             |
+| DP_ENABLE_TENSORFLOW                                | 0, 1                  | 1                      | {{ tensorflow_icon }} Enable the TensorFlow backend.                                                                                                                                                                                                                                                                                                                                                                                                |
+| DP_ENABLE_PYTORCH                                   | 0, 1                  | 0                      | {{ pytorch_icon }} Enable customized C++ OPs for the PyTorch backend. PyTorch can still run without customized C++ OPs, but features will be limited.                                                                                                                                                                                                                                                                                               |
+| TENSORFLOW_ROOT                                     | Path                  | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against. |
+| DP_ENABLE_NATIVE_OPTIMIZATION                       | 0, 1                  | 0                      | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs.                                                                                                                                                                                                                                                                                                                   |
+| CMAKE_ARGS                                          | str                   | -                      | Additional CMake arguments                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| &lt;LANG&gt;FLAGS (`<LANG>`=`CXX`, `CUDA` or `HIP`) | str                   | -                      | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html).                                                                                                                                                                                                                                                                                  |
 
 To test the installation, one should first jump out of the source directory
+
 ```
 cd /some/other/workspace
 ```
+
 then execute
+
 ```bash
 dp -h
 ```
+
 It will print the help information like
+
 ```text
 usage: dp [-h] {train,freeze,test} ...
 
@@ -105,15 +181,17 @@ Valid subcommands:
     test               test the model
 ```
 
-### Install horovod and mpi4py
+### Install horovod and mpi4py {{ tensorflow_icon }}
 
 [Horovod](https://github.com/horovod/horovod) and [mpi4py](https://github.com/mpi4py/mpi4py) are used for parallel training. For better performance on GPU, please follow the tuning steps in [Horovod on GPU](https://github.com/horovod/horovod/blob/master/docs/gpus.rst).
+
 ```bash
 # With GPU, prefer NCCL as a communicator.
 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_HOME=/path/to/nccl pip install horovod mpi4py
 ```
 
 If your work in a CPU environment, please prepare runtime as below:
+
 ```bash
 # By default, MPI is used as communicator.
 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_TENSORFLOW=1 pip install horovod mpi4py
@@ -151,7 +229,11 @@ If you don't install Horovod, DeePMD-kit will fall back to serial mode.
 
 If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section.
 
-### Install Tensorflow's C++ interface (optional)
+### Install Backends' C++ interface (optional)
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
 
 Since TensorFlow 2.12, TensorFlow C++ library (`libtensorflow_cc`) is packaged inside the Python library. Thus, you can skip building TensorFlow C++ library manually. If that does not work for you, you can still build it manually.
 
@@ -159,9 +241,21 @@ The C++ interface of DeePMD-kit was tested with compiler GCC >= 4.8. It is notic
 
 First, the C++ interface of Tensorflow should be installed. It is noted that the version of Tensorflow should be consistent with the python interface. You may follow [the instruction](install-tf.2.12.md) or run the script `$deepmd_source_dir/source/install/build_tf.py` to install the corresponding C++ interface.
 
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+If you have installed PyTorch using pip, you can use libtorch inside the PyTorch Python package.
+You can also download libtorch prebuilt library from the [PyTorch website](https://pytorch.org/get-started/locally/).
+
+:::
+
+::::
+
 ### Install DeePMD-kit's C++ interface
 
 Now go to the source code directory of DeePMD-kit and make a building place.
+
 ```bash
 cd $deepmd_source_dir/source
 mkdir build
@@ -174,36 +268,72 @@ The installation requires CMake 3.16 or later for the CPU version, CMake 3.23 or
 pip install -U cmake
 ```
 
+You must enable at least one backend.
+If you enable two or more backends, these backend libraries must be built in a compatible way, e.g. using the same `_GLIBCXX_USE_CXX11_ABI` flag.
+We recommend using [conda pacakges](https://docs.deepmodeling.org/faq/conda.html) from [conda-forge](https://conda-forge.org), which are usually compatible to each other.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
 I assume you have activated the TensorFlow Python environment and want to install DeePMD-kit into path `$deepmd_root`, then execute CMake
+
 ```bash
-cmake -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
+cmake -DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
 ```
 
 If you specify `-DUSE_TF_PYTHON_LIBS=FALSE`, you need to give the location where TensorFlow's C++ interface is installed to `-DTENSORFLOW_ROOT=${tensorflow_root}`.
 
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+I assume you have installed the PyTorch (either Python or C++ interface) to `$torch_root`, then execute CMake
+
+```bash
+cmake -DENABLE_PYTORCH=TRUE -DCMAKE_PREFIX_PATH=$torch_root -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
+```
+
+You can specify `-DUSE_PT_PYTHON_LIBS=TRUE` to use libtorch from the Python installation,
+but you need to be careful that [PyTorch PyPI packages are still built using `_GLIBCXX_USE_CXX11_ABI=0`](https://github.com/pytorch/pytorch/issues/51039), which may be not compatible with other libraries.
+
+```bash
+cmake -DENABLE_PYTORCH=TRUE -DUSE_PT_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
+```
+
+:::
+
+::::
+
 One may add the following arguments to `cmake`:
 
-| CMake Aurgements         | Allowed value       | Default value | Usage                   |
-| ------------------------ | ------------------- | ------------- | ------------------------|
-| -DTENSORFLOW_ROOT=&lt;value&gt;  | Path              | -             | The Path to TensorFlow's C++ interface. |
-| -DCMAKE_INSTALL_PREFIX=&lt;value&gt; | Path          | -             | The Path where DeePMD-kit will be installed. |
-| -DUSE_CUDA_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with CUDA toolkit. |
-| -DCUDAToolkit_ROOT=&lt;value&gt; | Path         | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
-| -DUSE_ROCM_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with ROCM toolkit. |
-| -DCMAKE_HIP_COMPILER_ROCM_ROOT=&lt;value&gt; | Path         | Detected automatically | The path to the ROCM toolkit directory. |
-| -DLAMMPS_SOURCE_ROOT=&lt;value&gt; | Path         | - | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled. |
-| -DUSE_TF_PYTHON_LIBS=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build C++ interface with TensorFlow's Python libraries(TensorFlow's Python Interface is required). And there's no need for building TensorFlow's C++ interface.|
-| -DENABLE_NATIVE_OPTIMIZATION=&lt;value&gt;       | `TRUE` or `FALSE` | `FALSE`       | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
-| -DCMAKE_&lt;LANG&gt;_FLAGS=&lt;value&gt; (`<LANG>`=`CXX`, `CUDA` or `HIP`)   | str            | -             | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). |
+| CMake Aurgements                                                             | Allowed value     | Default value          | Usage                                                                                                                                                                                             |
+| ---------------------------------------------------------------------------- | ----------------- | ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -DENABLE_TENSORFLOW=&lt;value&gt;                                            | `TRUE` or `FALSE` | `FALSE`                | {{ tensorflow_icon }} Whether building the TensorFlow backend.                                                                                                                                    |
+| -DENABLE_PYTORCH=&lt;value&gt;                                               | `TRUE` or `FALSE` | `FALSE`                | {{ pytorch_icon }} Whether building the PyTorch backend.                                                                                                                                          |
+| -DTENSORFLOW_ROOT=&lt;value&gt;                                              | Path              | -                      | {{ tensorflow_icon }} The Path to TensorFlow's C++ interface.                                                                                                                                     |
+| -DCMAKE_INSTALL_PREFIX=&lt;value&gt;                                         | Path              | -                      | The Path where DeePMD-kit will be installed.                                                                                                                                                      |
+| -DUSE_CUDA_TOOLKIT=&lt;value&gt;                                             | `TRUE` or `FALSE` | `FALSE`                | If `TRUE`, Build GPU support with CUDA toolkit.                                                                                                                                                   |
+| -DCUDAToolkit_ROOT=&lt;value&gt;                                             | Path              | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required.                                                                                                         |
+| -DUSE_ROCM_TOOLKIT=&lt;value&gt;                                             | `TRUE` or `FALSE` | `FALSE`                | If `TRUE`, Build GPU support with ROCM toolkit.                                                                                                                                                   |
+| -DCMAKE_HIP_COMPILER_ROCM_ROOT=&lt;value&gt;                                 | Path              | Detected automatically | The path to the ROCM toolkit directory.                                                                                                                                                           |
+| -DLAMMPS_SOURCE_ROOT=&lt;value&gt;                                           | Path              | -                      | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled.     |
+| -DUSE_TF_PYTHON_LIBS=&lt;value&gt;                                           | `TRUE` or `FALSE` | `FALSE`                | {{ tensorflow_icon }} If `TRUE`, Build C++ interface with TensorFlow's Python libraries (TensorFlow's Python Interface is required). And there's no need for building TensorFlow's C++ interface. |
+| -DUSE_PT_PYTHON_LIBS=&lt;value&gt;                                           | `TRUE` or `FALSE` | `FALSE`                | {{ pytorch_icon }} If `TRUE`, Build C++ interface with PyTorch's Python libraries (PyTorch's Python Interface is required). And there's no need for downloading PyTorch's C++ libraries.          |
+| -DENABLE_NATIVE_OPTIMIZATION=&lt;value&gt;                                   | `TRUE` or `FALSE` | `FALSE`                | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs.                                                                 |
+| -DCMAKE\_&lt;LANG&gt;\_FLAGS=&lt;value&gt; (`<LANG>`=`CXX`, `CUDA` or `HIP`) | str               | -                      | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html).                                |
 
 If the CMake has been executed successfully, then run the following make commands to build the package:
+
 ```bash
 make -j4
 make install
 ```
+
 Option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware.
 
 If everything works fine, you will have the executable and libraries installed in `$deepmd_root/bin` and `$deepmd_root/lib`
+
 ```bash
 $ ls $deepmd_root/bin
 $ ls $deepmd_root/lib
diff --git a/doc/install/install-gromacs.md b/doc/install/install-gromacs.md
index 758ad7784a..147822cf17 100644
--- a/doc/install/install-gromacs.md
+++ b/doc/install/install-gromacs.md
@@ -3,11 +3,14 @@
 Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed.
 
 ## Patch source code of GROMACS
+
 Download the source code of a supported GROMACS version (2020.2) from https://manual.gromacs.org/2020.2/download.html. Run the following command:
+
 ```bash
 export PATH=$PATH:$deepmd_kit_root/bin
 dp_gmx_patch -d $gromacs_root -v $version -p
 ```
+
 where `deepmd_kit_root` is the directory where the latest version of DeePMD-kit is installed, and `gromacs_root` refers to the source code directory of GROMACS. And `version` represents the version of GROMACS, where **only 2020.2 is supported now**. If attempting to patch another version of GROMACS you will still need to set `version` to `2020.2` as this is the only supported version, we cannot guarantee that patching other versions of GROMACS will work.
 
 <!-- ## Install C++ api of deepmd-kit and tensorflow
@@ -15,7 +18,9 @@ The C++ interface of `deepmd-kit 2.x` and `tensorflow 2.x` are required. -->
 <!-- + Tips: C++ api of deepmd and TensorFlow could be easily installed from the deepmd-kit offline packages. But before using tensorflow, you need to manually change the protobuf package to [version 3.9.2](https://github.com/protocolbuffers/protobuf/releases/tag/v3.9.2) in `$deepmd_env_dir/include/google/protobuf` (the offline package will install a version of 3.14, which will cause incompatibility). Here `deepmd_env_dir` refers to the directory of conda environment created by the deepmd-kit offline packages.  -->
 
 ## Compile GROMACS with deepmd-kit
+
 The C++ interface of `Deepmd-kit 2.x` and `TensorFlow 2.x` are required. And be aware that only DeePMD-kit with **high precision** is supported now since we cannot ensure single precision is enough for a GROMACS simulation. Here is a sample compile script:
+
 ```bash
 #!/bin/bash
 export CC=/usr/bin/gcc
diff --git a/doc/install/install-ipi.md b/doc/install/install-ipi.md
index 1f4de7474c..3dd45d6749 100644
--- a/doc/install/install-ipi.md
+++ b/doc/install/install-ipi.md
@@ -1,11 +1,14 @@
 # Install i-PI
+
 The i-PI works in a client-server model. The i-PI provides the server for integrating the replica positions of atoms, while the DeePMD-kit provides a client named `dp_ipi` that computes the interactions (including energy, forces and virials). The server and client communicate via the Unix domain socket or the Internet socket. Full documentation for i-PI can be found [here](http://ipi-code.org/). The source code and a complete installation guide for i-PI can be found [here](https://github.com/i-pi/i-pi).
 To use i-PI with already existing drivers, install and update using Pip:
+
 ```bash
 pip install -U i-PI
 ```
 
 Test with Pytest:
+
 ```bash
 pip install pytest
 pytest --pyargs ipi.tests
diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index 5dbf690c67..c24bfac06b 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -3,6 +3,7 @@
 There are two ways to install LAMMPS: the built-in mode and the plugin mode. The built-in mode builds LAMMPS along with the DeePMD-kit and DeePMD-kit will be loaded automatically when running LAMMPS. The plugin mode builds LAMMPS and a plugin separately, so one needs to use `plugin load` command to load the DeePMD-kit's LAMMPS plugin library.
 
 ## Install LAMMPS's DeePMD-kit module (built-in mode)
+
 Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed.
 
 DeePMD-kit provides a module for running MD simulations with LAMMPS. Now make the DeePMD-kit module for LAMMPS.
@@ -11,37 +12,45 @@ DeePMD-kit provides a module for running MD simulations with LAMMPS. Now make th
 cd $deepmd_source_dir/source/build
 make lammps
 ```
+
 DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory, which supports either double or single float precision interface. Now download the LAMMPS code, and uncompress it.
+
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz
-tar xf stable_2Aug2023_update2.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update3.tar.gz
+tar xf stable_2Aug2023_update3.tar.gz
 ```
-The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`.
+
+The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update3`.
 
 Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake.
 
 ### With make
 
 Now go into the LAMMPS code and copy the DeePMD-kit module like this
+
 ```bash
-cd lammps-stable_2Aug2023_update2/src/
+cd lammps-stable_2Aug2023_update3/src/
 cp -r $deepmd_source_dir/source/build/USER-DEEPMD .
 make yes-kspace
 make yes-extra-fix
 make yes-user-deepmd
 ```
+
 You can enable any other package you want. Now build LAMMPS
+
 ```bash
 make mpi -j4
 ```
 
 If everything works fine, you will end up with an executable `lmp_mpi`.
+
 ```bash
 ./lmp_mpi -h
 ```
 
 The DeePMD-kit module can be removed from the LAMMPS source code by
+
 ```bash
 make no-user-deepmd
 ```
@@ -51,8 +60,8 @@ make no-user-deepmd
 Now go into the LAMMPS directory and create a directory called `build`:
 
 ```bash
-mkdir -p lammps-stable_2Aug2023_update2/build/
-cd lammps-stable_2Aug2023_update2/build/
+mkdir -p lammps-stable_2Aug2023_update3/build/
+cd lammps-stable_2Aug2023_update3/build/
 ```
 
 Patch the LAMMPS `CMakeLists.txt` file:
@@ -64,6 +73,7 @@ echo "include(${deepmd_source_dir}/source/lmp/builtin.cmake)" >> ../cmake/CMakeL
 It's expected to see one extra line in the end of `CMakeLists.txt`.
 
 Now build LAMMPS. You can install any other package you want.
+
 ```bash
 cmake -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -DCMAKE_PREFIX_PATH=${deepmd_root} ../cmake
 make -j4
@@ -71,27 +81,32 @@ make install
 ```
 
 If everything works fine, you will end up with an executable `${deepmd_root}/bin/lmp`.
+
 ```bash
 ${deepmd_root}/bin/lmp -h
 ```
 
 ## Install LAMMPS (plugin mode)
+
 Starting from `8Apr2021`, LAMMPS also provides a plugin mode, allowing one to build LAMMPS and a plugin separately.
 
 Now download the LAMMPS code (`8Apr2021` or later), and uncompress it:
+
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update2.tar.gz
-tar xf stable_2Aug2023_update2.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update3.tar.gz
+tar xf stable_2Aug2023_update3.tar.gz
 ```
 
-The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update2`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
+The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update3`. The directory of the source code should be specified as the CMAKE argument `LAMMPS_SOURCE_ROOT` during installation of the DeePMD-kit C++ interface. Now go into the LAMMPS directory and create a directory called `build`
 
 ```bash
-mkdir -p lammps-stable_2Aug2023_update2/build/
-cd lammps-stable_2Aug2023_update2/build/
+mkdir -p lammps-stable_2Aug2023_update3/build/
+cd lammps-stable_2Aug2023_update3/build/
 ```
+
 Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want.
+
 ```bash
 cmake -D PKG_PLUGIN=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${deepmd_root}/lib ../cmake
 make -j4
@@ -99,6 +114,7 @@ make install
 ```
 
 If everything works fine, you will end up with an executable `${deepmd_root}/bin/lmp`.
+
 ```bash
 ${deepmd_root}/bin/lmp -h
 ```
@@ -109,4 +125,5 @@ If `${tensorflow_root}`, `${deepmd_root}`, or the path to TensorFlow Python pack
 ```sh
 patchelf --add-rpath "${tensorflow_root}/lib" liblammps.so
 ```
+
 :::
diff --git a/doc/install/install-tf.1.12.md b/doc/install/install-tf.1.12.md
index f4009405d7..13abd8f7a7 100644
--- a/doc/install/install-tf.1.12.md
+++ b/doc/install/install-tf.1.12.md
@@ -1,5 +1,7 @@
 # Install TensorFlow's C++ interface
+
 The TensorFlow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.15.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
+
 ```bash
 cd /some/workspace
 wget https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip
@@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 cd /some/workspace
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.12.0 --depth=1
@@ -18,26 +21,35 @@ cd tensorflow
 ```
 
 DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute
+
 ```bash
 ./configure
 ```
+
 You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory):
+
 ```bash
 Please specify the location of python. [Default is $tensorflow_venv/bin/python]:
 ```
+
 The library path for Python should be set accordingly.
 
 Now build the shared library of TensorFlow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Before moving on, we need to compile the dependencies of TensorFlow, including Protobuf, Eigen, nsync and absl. Firstly, protobuf
+
 ```bash
 mkdir /tmp/proto
 sed -i 's;PROTOBUF_URL=.*;PROTOBUF_URL=\"https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz\";g' tensorflow/contrib/makefile/download_dependencies.sh
@@ -48,7 +60,9 @@ cd tensorflow/contrib/makefile/downloads/protobuf/
 make
 make install
 ```
+
 Then Eigen
+
 ```bash
 mkdir /tmp/eigen
 cd ../eigen
@@ -57,7 +71,9 @@ cd build_dir
 cmake -DCMAKE_INSTALL_PREFIX=/tmp/eigen/ ../
 make install
 ```
+
 nsync
+
 ```bash
 mkdir /tmp/nsync
 cd ../../nsync
@@ -67,7 +83,9 @@ cmake -DCMAKE_INSTALL_PREFIX=/tmp/nsync/ ../
 make
 make install
 ```
+
 And absl
+
 ```bash
 cd ../../absl
 bazel build
@@ -75,7 +93,9 @@ mkdir -p $tensorflow_root/include/
 rsync -avzh --include '*/' --include '*.h' --exclude '*' absl $tensorflow_root/include/
 cd ../../../../..
 ```
+
 Now, copy the libraries to the tensorflow's installation directory:
+
 ```bash
 mkdir $tensorflow_root/lib
 cp bazel-bin/tensorflow/libtensorflow_cc.so $tensorflow_root/lib/
@@ -83,7 +103,9 @@ cp bazel-bin/tensorflow/libtensorflow_framework.so $tensorflow_root/lib/
 cp /tmp/proto/lib/libprotobuf.a $tensorflow_root/lib/
 cp /tmp/nsync/lib64/libnsync.a $tensorflow_root/lib/
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 cp -r bazel-genfiles/* $tensorflow_root/include/
@@ -94,12 +116,16 @@ cp -r /tmp/proto/include/* $tensorflow_root/include
 cp -r /tmp/eigen/include/eigen3/* $tensorflow_root/include
 cp -r /tmp/nsync/include/*h $tensorflow_root/include
 ```
+
 Now clean up the source files in the header directories:
+
 ```bash
 cd $tensorflow_root/include
 find . -name "*.cc" -type f -delete
 ```
+
 The temporary installation directories for the dependencies can be removed:
+
 ```bash
 rm -fr /tmp/proto /tmp/eigen /tmp/nsync
 ```
diff --git a/doc/install/install-tf.1.14-gpu.md b/doc/install/install-tf.1.14-gpu.md
index 4e9fcaf7fc..5850af24ba 100644
--- a/doc/install/install-tf.1.14-gpu.md
+++ b/doc/install/install-tf.1.14-gpu.md
@@ -1,5 +1,7 @@
 # Install TensorFlow-GPU's C++ interface
+
 TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. It is highly recommended that the Bazel version 0.24.1 is used. Full instructions on Bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
+
 ```bash
 cd /some/workspace
 wget https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel-0.24.1-dist.zip
@@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 cd /some/workspace
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.14.0 --depth=1
@@ -20,6 +23,7 @@ cd tensorflow
 DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute
 
 You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory):
+
 ```bash
 ./configure
 Please specify the location of python. [Default is xxx]:
@@ -93,23 +97,30 @@ Configuration finished
 The library path for Python should be set accordingly.
 
 Now build the shared library of TensorFlow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Now, copy the libraries to the TensorFlow's installation directory:
+
 ```bash
 mkdir $tensorflow_root/lib
 cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/
 cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/
 cp -d $tensorflow_root/lib/libtensorflow_framework.so.1 $tensorflow_root/lib/libtensorflow_framework.so
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 cp -r bazel-genfiles/* $tensorflow_root/include/
@@ -121,16 +132,20 @@ cp -r bazel-tensorflow/external/eigen_archive/unsupported/ $tensorflow_root/incl
 rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/protobuf_archive/src/ $tensorflow_root/include/
 rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/com_google_absl/absl/ $tensorflow_root/include/absl
 ```
+
 Now clean up the source files in the header directories:
+
 ```bash
 cd $tensorflow_root/include
 find . -name "*.cc" -type f -delete
 ```
 
 # Troubleshooting
+
 ```bash
 git: unknown command -C ...
 ```
+
 This may be your git version issue because the low version of Git does not support this command. Upgrading your Git may be helpful.
 
 ```bash
@@ -139,9 +154,11 @@ Please set them or make sure they are set and tested correctly in the CMake file
 FFTW_LIB (ADVANCED)
     linked by target "FFTW" in directory xxx
 ```
+
 Currently, when building the Eigen package, you can delete the FFTW in the CMake file.
 
 ```bash
 fatal error: absl/numeric/int128_have_intrinsic.inc: No such file or directory
 ```
+
 Basically, you could build an empty file named "int128_have_intrinsic.inc" in the same directory of "int128.h".
diff --git a/doc/install/install-tf.1.14.md b/doc/install/install-tf.1.14.md
index 065df9cad9..6457d484ad 100644
--- a/doc/install/install-tf.1.14.md
+++ b/doc/install/install-tf.1.14.md
@@ -1,5 +1,7 @@
 # Install tensorflow's C++ interface
+
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.24.1 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
+
 ```bash
 cd /some/workspace
 wget https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel-0.24.1-dist.zip
@@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH
 ```
 
 Firstly get the source code of the tensorflow
+
 ```bash
 cd /some/workspace
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.14.0 --depth=1
@@ -18,33 +21,44 @@ cd tensorflow
 ```
 
 DeePMD-kit is compiled by cmake, so we need to compile and integrate tensorflow with cmake projects. The rest of this section basically follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute
+
 ```bash
 ./configure
 ```
+
 You will answer a list of questions that help configure the building of tensorflow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` by the virtual environment directory):
+
 ```bash
 Please specify the location of python. [Default is $tensorflow_venv/bin/python]:
 ```
+
 The library path for Python should be set accordingly.
 
 Now build the shared library of tensorflow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install tensorflow in directory `$tensorflow_root`. Create the directory if it does not exists
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Now, copy the libraries to the tensorflow's installation directory:
+
 ```bash
 mkdir $tensorflow_root/lib
 cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/
 cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/
 cp -d $tensorflow_root/lib/libtensorflow_framework.so.1 $tensorflow_root/lib/libtensorflow_framework.so
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 cp -r bazel-genfiles/* $tensorflow_root/include/
@@ -56,7 +70,9 @@ cp -r bazel-tensorflow/external/eigen_archive/unsupported/ $tensorflow_root/incl
 rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/protobuf_archive/src/ $tensorflow_root/include/
 rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-tensorflow/external/com_google_absl/absl/ $tensorflow_root/include/absl
 ```
+
 Now clean up the source files in the header directories:
+
 ```bash
 cd $tensorflow_root/include
 find . -name "*.cc" -type f -delete
diff --git a/doc/install/install-tf.1.8.md b/doc/install/install-tf.1.8.md
index bfc1a616d4..f9554f9348 100644
--- a/doc/install/install-tf.1.8.md
+++ b/doc/install/install-tf.1.8.md
@@ -1,5 +1,7 @@
 # Install tensorflow's C++ interface
+
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.10.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
+
 ```bash
 cd /some/workspace
 wget https://github.com/bazelbuild/bazel/releases/download/0.10.0/bazel-0.10.0-dist.zip
@@ -11,6 +13,7 @@ export PATH=`pwd`/output:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 cd /some/workspace
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v1.8.0 --depth=1
@@ -18,26 +21,35 @@ cd tensorflow
 ```
 
 DeePMD-kit is compiled by CMake, so we need to compile and integrate TensorFlow with CMake projects. The rest of this section basically follows [the instruction provided by Tuatini](http://tuatini.me/building-tensorflow-as-a-standalone-project/). Now execute
+
 ```bash
 ./configure
 ```
+
 You will answer a list of questions that help configure the building of TensorFlow. It is recommended to build for Python3. You may want to answer the question like this (please replace `$tensorflow_venv` with the virtual environment directory):
+
 ```bash
 Please specify the location of python. [Default is $tensorflow_venv/bin/python]:
 ```
+
 The library path for Python should be set accordingly.
 
 Now build the shared library of TensorFlow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Before moving on, we need to compile the dependencies of TensorFlow, including Protobuf, Eigen and nsync. Firstly, protobuf
+
 ```bash
 mkdir /tmp/proto
 tensorflow/contrib/makefile/download_dependencies.sh
@@ -47,7 +59,9 @@ cd tensorflow/contrib/makefile/downloads/protobuf/
 make
 make install
 ```
+
 Then Eigen
+
 ```bash
 mkdir /tmp/eigen
 cd ../eigen
@@ -56,7 +70,9 @@ cd build_dir
 cmake -DCMAKE_INSTALL_PREFIX=/tmp/eigen/ ../
 make install
 ```
+
 And nsync
+
 ```bash
 mkdir /tmp/nsync
 cd ../../nsync
@@ -67,7 +83,9 @@ make
 make install
 cd ../../../../../..
 ```
+
 Now, copy the libraries to the TensorFlow's installation directory:
+
 ```bash
 mkdir $tensorflow_root/lib
 cp bazel-bin/tensorflow/libtensorflow_cc.so $tensorflow_root/lib/
@@ -75,7 +93,9 @@ cp bazel-bin/tensorflow/libtensorflow_framework.so $tensorflow_root/lib/
 cp /tmp/proto/lib/libprotobuf.a $tensorflow_root/lib/
 cp /tmp/nsync/lib/libnsync.a $tensorflow_root/lib/
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 cp -r bazel-genfiles/* $tensorflow_root/include/
@@ -86,12 +106,16 @@ cp -r /tmp/proto/include/* $tensorflow_root/include
 cp -r /tmp/eigen/include/eigen3/* $tensorflow_root/include
 cp -r /tmp/nsync/include/*h $tensorflow_root/include
 ```
+
 Now clean up the source files in the header directories:
+
 ```bash
 cd $tensorflow_root/include
 find . -name "*.cc" -type f -delete
 ```
+
 The temporary installation directories for the dependencies can be removed:
+
 ```bash
 rm -fr /tmp/proto /tmp/eigen /tmp/nsync
 ```
diff --git a/doc/install/install-tf.2.12.md b/doc/install/install-tf.2.12.md
index dce0c224d5..8523345d3d 100644
--- a/doc/install/install-tf.2.12.md
+++ b/doc/install/install-tf.2.12.md
@@ -1,4 +1,5 @@
 # Install TensorFlow's C++ interface
+
 TensorFlow's C++ interface will be compiled from the source code. In this manual, we install TensorFlow 2.12.0. It is noted that the source code of TensorFlow 2.12.0 uses C++ 17, so one needs a C++ compiler that supports C++ 17.
 
 Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel).
@@ -10,6 +11,7 @@ export PATH=/some/workspace/bazel/bin:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.12.0 --depth=1
 cd tensorflow
@@ -76,23 +78,30 @@ Configuration finished
 The library path for Python should be set accordingly.
 
 Now build the shared library of TensorFlow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Now, copy the libraries to the TensorFlow's installation directory:
+
 ```bash
 mkdir -p $tensorflow_root/lib
 cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/
 cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/
 cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/
@@ -107,12 +116,15 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel
 ```
 
 If you've enabled oneDNN, also copy `libiomp5.so`:
+
 ```bash
 cp -d bazel-out/k8-opt/bin/external/llvm_openmp/libiomp5.so $tensorflow_root/lib/
 ```
 
 # Troubleshooting
+
 ```bash
 git: unknown command -C ...
 ```
+
 This may be an issue with your Git version issue. Early versions of Git do not support this command, in this case upgrading your Git to a newer version may resolve any issues.
diff --git a/doc/install/install-tf.2.3.md b/doc/install/install-tf.2.3.md
index e538607db0..2fc7b35f2c 100644
--- a/doc/install/install-tf.2.3.md
+++ b/doc/install/install-tf.2.3.md
@@ -1,5 +1,7 @@
 # Install TensorFlow's C++ interface
+
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. The bazel version 3.1.0 should be used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
+
 ```bash
 cd /some/workspace
 wget https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-linux-x86_64.sh
@@ -9,6 +11,7 @@ export PATH=/some/workspace/bazel/bin:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.3.0 --depth=1
 cd tensorflow
@@ -75,23 +78,30 @@ Configuration finished
 The library path for Python should be set accordingly.
 
 Now build the shared library of tensorflow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Now, copy the libraries to the tensorflow's installation directory:
+
 ```bash
 mkdir -p $tensorflow_root/lib
 cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/
 cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/
 cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/
@@ -105,7 +115,9 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel
 ```
 
 # Troubleshooting
+
 ```bash
 git: unknown command -C ...
 ```
+
 This may be an issue with your git version issue. Early versions of git do not support this command, in this case upgrading your git to a newer version may resolve any issues.
diff --git a/doc/install/install-tf.2.8.md b/doc/install/install-tf.2.8.md
index da1f299131..4145ba01d1 100644
--- a/doc/install/install-tf.2.8.md
+++ b/doc/install/install-tf.2.8.md
@@ -1,4 +1,5 @@
 # Install TensorFlow's C++ interface
+
 TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel).
 
 ```bash
@@ -8,6 +9,7 @@ export PATH=/some/workspace/bazel/bin:$PATH
 ```
 
 Firstly get the source code of the TensorFlow
+
 ```bash
 git clone https://github.com/tensorflow/tensorflow tensorflow -b v2.8.0 --depth=1
 cd tensorflow
@@ -74,23 +76,30 @@ Configuration finished
 The library path for Python should be set accordingly.
 
 Now build the shared library of TensorFlow:
+
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`.
+
+You may want to add options `--copt=-msse4.2`, `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. If you want to enable [oneDNN optimization](https://www.oneapi.io/blog/tensorflow-and-onednn-in-partnership/), add `--config=mkl`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
+
 ```bash
 mkdir -p $tensorflow_root
 ```
+
 Now, copy the libraries to the TensorFlow's installation directory:
+
 ```bash
 mkdir -p $tensorflow_root/lib
 cp -d bazel-bin/tensorflow/libtensorflow_cc.so* $tensorflow_root/lib/
 cp -d bazel-bin/tensorflow/libtensorflow_framework.so* $tensorflow_root/lib/
 cp -d $tensorflow_root/lib/libtensorflow_framework.so.2 $tensorflow_root/lib/libtensorflow_framework.so
 ```
+
 Then copy the headers
+
 ```bash
 mkdir -p $tensorflow_root/include/tensorflow
 rsync -avzh --exclude '_virtual_includes/' --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel-bin/ $tensorflow_root/include/
@@ -104,12 +113,15 @@ rsync -avzh --include '*/' --include '*.h' --include '*.inc' --exclude '*' bazel
 ```
 
 If you've enabled oneDNN, also copy `libiomp5.so`:
+
 ```bash
 cp -d bazel-out/k8-opt/bin/external/llvm_openmp/libiomp5.so $tensorflow_root/lib/
 ```
 
 # Troubleshooting
+
 ```bash
 git: unknown command -C ...
 ```
+
 This may be an issue with your Git version issue. Early versions of Git do not support this command, in this case upgrading your Git to a newer version may resolve any issues.
diff --git a/doc/logo.md b/doc/logo.md
index 420f378336..67c303f651 100644
--- a/doc/logo.md
+++ b/doc/logo.md
@@ -1,5 +1,5 @@
-# Logo
-
-<picture><source media="(prefers-color-scheme: dark)" srcset="./_static/logo-dark.svg"><source media="(prefers-color-scheme: light)" srcset="./_static/logo.svg"><img alt="DeePMD-kit logo" src="./_static/logo.svg"></picture>
-
-The logo of DeePMD-kit is a beaver. Beavers were widely distributed in Europe and Asia but became nearly extinct due to hunting. Listed as a first-class state-protected animal in China, the population of beavers in China is less than the giant pandas. We hope that users of DeePMD-kit can enhance the awareness to protect beavers.
+# Logo
+
+<picture><source media="(prefers-color-scheme: dark)" srcset="./_static/logo-dark.svg"><source media="(prefers-color-scheme: light)" srcset="./_static/logo.svg"><img alt="DeePMD-kit logo" src="./_static/logo.svg"></picture>
+
+The logo of DeePMD-kit is a beaver. Beavers were widely distributed in Europe and Asia but became nearly extinct due to hunting. Listed as a first-class state-protected animal in China, the population of beavers in China is less than the giant pandas. We hope that users of DeePMD-kit can enhance the awareness to protect beavers.
diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
new file mode 100644
index 0000000000..e295f6b6bb
--- /dev/null
+++ b/doc/model/dpa2.md
@@ -0,0 +1,5 @@
+# Descriptor DPA-2 {{ pytorch_icon }}
+
+:::{note}
+**Supported backends**: PyTorch {{ pytorch_icon }}
+:::
diff --git a/doc/model/dplr.md b/doc/model/dplr.md
index feea84e562..ec95f9f424 100644
--- a/doc/model/dplr.md
+++ b/doc/model/dplr.md
@@ -1,4 +1,8 @@
-# Deep potential long-range (DPLR)
+# Deep potential long-range (DPLR) {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 Notice: **The interfaces of DPLR are not stable and subject to change**
 
@@ -9,33 +13,42 @@ In the following, we take the DPLR model for example to introduce the training a
 ## Theory
 
 The Deep Potential Long Range (DPLR) model adds the electrostatic energy to the total energy:
+
 ```math
     E=E_{\text{DP}} + E_{\text{ele}},
 ```
+
 where $E_{\text{DP}}$ is the short-range contribution constructed as the [standard energy model](./train-energy.md) that is fitted against $(E^\ast-E_{\text{ele}})$.
 $E_{\text{ele}}$ is the electrostatic energy
 introduced by a group of Gaussian distributions that is an approximation of the electronic structure of the system, and is calculated in Fourier space by
+
 ```math
     E_{\text{ele}} = \frac{1}{2\pi V}\sum_{m \neq 0, \|m\|\leq L} \frac{\exp({-\pi ^2 m^2/\beta ^2})}{m^2}S^2(m),
 ```
+
 where $\beta$ is a freely tunable parameter that controls the spread of the Gaussians.
 $L$ is the cutoff in Fourier space and $S(m)$, the structure factor, is given by
+
 ```math
     S(m)=\sum_i q_i e^{-2\pi \imath m \boldsymbol r_i} + \sum_n q_n e^{-2\pi \imath m \boldsymbol W_n},
 ```
+
 where $\imath = \sqrt{-1}$ denotes the imaginary unit, $\boldsymbol r_i$ indicates ion coordinates, $q_i$ is the charge of the ion $i$, and $W_n$ is the $n$-th Wannier centroid (WC) which can be obtained from a separated [dipole model](./train-fitting-tensor.md).
 It can be proved that the error in the electrostatic energy introduced by the Gaussian approximations is dominated by a summation of dipole-quadrupole interactions that decay as $r^{-4}$, where $r$ is the distance between the dipole and quadrupole.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Train a deep Wannier model for Wannier centroids
 
 We use the deep Wannier model (DW) to represent the relative position of the Wannier centroid (WC) with the atom with which it is associated. One may consult the introduction of the [dipole model](train-fitting-tensor.md) for a detailed introduction. An example input `wc.json` and a small dataset `data` for tutorial purposes can be found in
+
 ```bash
 $deepmd_source_dir/examples/water/dplr/train/
 ```
+
 It is noted that **the tutorial dataset is not enough for training a productive model**.
 Two settings make the training input script different from an energy training input:
+
 ```json
 	"fitting_net": {
 	    "type":		"dipole",
@@ -44,8 +57,10 @@ Two settings make the training input script different from an energy training in
 	    "seed":		1
 	},
 ```
+
 The type of fitting is set to {ref}`dipole <model/fitting_net[dipole]>`. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms.
 The loss section is provided as follows
+
 ```json
     "loss": {
 	"type":		"tensor",
@@ -53,9 +68,11 @@ The loss section is provided as follows
 	"pref_atomic":	1.0
     },
 ```
+
 so that the atomic dipole is trained as labels. Note that the NumPy compressed file `atomic_dipole.npy` should be provided in each dataset.
 
 The training and freezing can be started from the example directory by
+
 ```bash
 dp train dw.json && dp freeze -o dw.pb
 ```
@@ -63,6 +80,7 @@ dp train dw.json && dp freeze -o dw.pb
 ## Train the DPLR model
 
 The training of the DPLR model is very similar to the standard short-range DP models. An example input script can be found in the example directory. The following section is introduced to compute the long-range energy contribution of the DPLR model, and modify the short-range DP model by this part.
+
 ```json
         "modifier": {
             "type":             "dipole_charge",
@@ -73,8 +91,10 @@ The training of the DPLR model is very similar to the standard short-range DP mo
             "ewald_beta":       0.40
         },
 ```
-The {ref}`model_name <model/modifier[dipole_charge]/model_name>` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map <model/modifier[dipole_charge]/model_charge_map>` gives the amount of charge assigned to WCs. {ref}`sys_charge_map <model/modifier[dipole_charge]/sys_charge_map>` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta <model/modifier[dipole_charge]/ewald_beta>` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h <model/modifier[dipole_charge]/ewald_h>`  (unit Å) assigns the grid size of Fourier transformation.
+
+The {ref}`model_name <model/modifier[dipole_charge]/model_name>` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map <model/modifier[dipole_charge]/model_charge_map>` gives the amount of charge assigned to WCs. {ref}`sys_charge_map <model/modifier[dipole_charge]/sys_charge_map>` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta <model/modifier[dipole_charge]/ewald_beta>` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h <model/modifier[dipole_charge]/ewald_h>` (unit Å) assigns the grid size of Fourier transformation.
 The DPLR model can be trained and frozen by (from the example directory)
+
 ```bash
 dp train ener.json && dp freeze -o ener.pb
 ```
@@ -84,11 +104,13 @@ dp train ener.json && dp freeze -o ener.pb
 In MD simulations, the long-range part of the DPLR is calculated by the LAMMPS `kspace` support. Then the long-range interaction is back-propagated to atoms by DeePMD-kit. This setup is commonly used in classical molecular dynamics simulations as the "virtual site". Unfortunately, LAMMPS does not natively support virtual sites, so we have to hack the LAMMPS code, which makes the input configuration and script a little wired.
 
 An example of an input configuration file and script can be found in
+
 ```bash
 $deepmd_source_dir/examples/water/dplr/lmp/
 ```
 
 We use `atom_style full` for DPLR simulations. the coordinates of the WCs are explicitly written in the configuration file. Moreover, a virtual bond is established between the oxygens and the WCs to indicate they are associated together. The configuration file containing 128 H2O molecules is thus written as
+
 ```
 
 512 atoms
@@ -123,13 +145,17 @@ Bonds
 2 1 2 386
 ...
 ```
+
 The oxygens and hydrogens are assigned with atom types 1 and 2 (corresponding to training atom types 0 and 1), respectively. The WCs are assigned with atom type 3. We want to simulate heavy water so the mass of hydrogens is set to 2.
 
 An example input script is provided in
+
 ```bash
 $deepmd_source_dir/examples/water/dplr/lmp/in.lammps
 ```
+
 Here are some explanations
+
 ```lammps
 # groups of real and virtual atoms
 group           real_atom type 1 2
@@ -144,6 +170,7 @@ bond_style      zero
 bond_coeff      *
 special_bonds   lj/coul 1 1 1 angle no
 ```
+
 Type 1 and 2 (O and H) are `real_atom`s, while type 3 (WCs) are `virtual_atom`s. The model file `ener.pb` stores both the DW and DPLR models, so the position of WCs and the energy can be inferred from it. A virtual bond type is specified by `bond_style zero`. The `special_bonds` command switches off the exclusion of intramolecular interactions.
 
 ```lammps
@@ -153,19 +180,22 @@ Type 1 and 2 (O and H) are `real_atom`s, while type 3 (WCs) are `virtual_atom`s.
 kspace_style	pppm/dplr 1e-5
 kspace_modify	gewald ${BETA} diff ik mesh ${KMESH} ${KMESH} ${KMESH}
 ```
+
 The long-range part is calculated by the `kspace` support of LAMMPS. The `kspace_style` `pppm/dplr` is required. The spread parameter set by variable `BETA` should be set the same as that used in training. The `KMESH` should be set dense enough so the long-range calculation is converged.
 
 ### fix dplr command
 
 **Syntax**
 
-
 ```
 fix ID group-ID style_name keyword value ...
 ```
-* ID, group-ID are documented in :doc:`fix <fix>` command
-* style_name = *dplr*
-* three or more keyword/value pairs may be appended
+
+<!-- See https://github.com/prettier/prettier/issues/16160 -->
+<!-- prettier-ignore -->
+- ID, group-ID are documented in :doc:`fix <fix>` command
+- style\_name = _dplr_
+- three or more keyword/value pairs may be appended
 
 ```
 keyword = *model* or *type_associate* or *bond_type* or *efield*
@@ -197,6 +227,7 @@ The atom names specified in [pair_style `deepmd`](../third-party/lammps-command.
 If it is not set, the training parameter {ref}`type_map <model/type_map>` will be mapped to LAMMPS atom types.
 
 To use a time-dependent electric field, LAMMPS's `variable` feature can be utilized:
+
 ```lammps
 variable EFIELD_Z equal 2*sin(2*PI*time/0.006)
 fix 0 all dplr model ener.pb type_associate 1 3 bond_type 1 efield 0 0 v_EFIELD_Z
@@ -212,21 +243,23 @@ compute		real_press all pressure real_temp
 fix		1 real_atom nvt temp ${TEMP} ${TEMP} ${TAU_T}
 fix_modify	1 temp real_temp
 ```
+
 The temperature of the system should be computed from the real atoms. The kinetic contribution in the pressure tensor is also computed from the real atoms. The thermostat is applied to only real atoms. The computed temperature and pressure of real atoms can be accessed by, e.g.
+
 ```lammps
 fix             thermo_print all print ${THERMO_FREQ} "$(step) $(pe) $(ke) $(etotal) $(enthalpy) $(c_real_temp) $(c_real_press) $(vol) $(c_real_press[1]) $(c_real_press[2]) $(c_real_press[3])" append thermo.out screen no title "# step pe ke etotal enthalpy temp press vol pxx pyy pzz"
 ```
 
 The LAMMPS simulation can be started from the example directory by
+
 ```bash
 lmp -i in.lammps
 ```
+
 If LAMMPS complains that no model file `ener.pb` exists, it can be copied from the training example directory.
 
 The MD simulation lasts for only 20 steps. If one runs a longer simulation, it will blow up, because the model is trained with a very limited dataset for very short training steps, thus is of poor quality.
 
 Another restriction that should be noted is that the energies printed at the zero steps are not correct. This is because at the zero steps the position of the WC has not been updated with the DW model. The energies printed in later steps are correct.
 
-
-
 [1]: https://arxiv.org/abs/2112.13327
diff --git a/doc/model/dprc.md b/doc/model/dprc.md
index c7547a769f..33dde237d7 100644
--- a/doc/model/dprc.md
+++ b/doc/model/dprc.md
@@ -1,4 +1,8 @@
-# Deep Potential - Range Correction (DPRc)
+# Deep Potential - Range Correction (DPRc) {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+:::
 
 Deep Potential - Range Correction (DPRc) is designed to combine with QM/MM method, and corrects energies from a low-level QM/MM method to a high-level QM/MM method:
 
@@ -11,6 +15,7 @@ E=E_\text{QM}(\mathbf R; \mathbf P)  + E_\text{QM/MM}(\mathbf R; \mathbf P) + E_
 Deep Potential - Range Correction (DPRc) was initially designed to correct the potential energy from a fast, linear-scaling low-level semiempirical QM/MM theory to a high-level ''ab initio'' QM/MM theory in a range-correction way to quantitatively correct short and mid-range non-bonded interactions leveraging the non-bonded lists routinely used in molecular dynamics simulations using molecular mechanical force fields such as AMBER.
 In this way, long-ranged electrostatic interactions can be modeled efficiently using the particle mesh Ewald method or its extensions for multipolar and QM/MM potentials.
 In a DPRc model, the switch function is modified to disable MM-MM interaction:
+
 ```math
   s_\text{DPRc}(r_{ij}) =
   \begin{cases}
@@ -18,12 +23,16 @@ In a DPRc model, the switch function is modified to disable MM-MM interaction:
   s(r_{ij}), &\text{otherwise},
   \end{cases}
 ```
+
 where $s_\text{DPRc}(r_{ij})$ is the new switch function and $s(r_{ij})$ is the old one.
 This ensures the forces between MM atoms are zero, i.e.
+
 ```math
 {\boldsymbol F}_{ij} = - \frac{\partial E}{\partial \boldsymbol r_{ij}} = 0, \quad i \in \text{MM} \land j \in \text{MM}.
 ```
+
 The fitting network is revised to remove energy bias from MM atoms:
+
 ```math
   E_i=
   \begin{cases}
@@ -31,10 +40,11 @@ The fitting network is revised to remove energy bias from MM atoms:
   \mathcal{F}_0(\mathcal{D}^i) - \mathcal{F}_0(\mathbf{0}), &\text{if $i \in \text{MM}$},
   \end{cases}
 ```
+
 where $\mathbf{0}$ is a zero matrix.
 It is worth mentioning that usage of DPRc is not limited to its initial design for QM/MM correction and can be expanded to any similar interaction.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 See the [JCTC paper](https://doi.org/10.1021/acs.jctc.1c00201) for details.
 
@@ -58,6 +68,10 @@ In a DPRc model, QM atoms and MM atoms have different atom types. Assuming we ha
 
 As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\text{QM/MM}$ within the cutoff, so we use a hybrid descriptor to describe them separatedly:
 
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
 ```json
 "descriptor" :{
     "type":             "hybrid",
@@ -87,7 +101,47 @@ As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\te
 }
 ```
 
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```json
+"descriptor" :{
+    "type":             "hybrid",
+    "list" : [
+        {
+            "type":     "se_e2_a",
+            "sel":              [6, 11, 0, 6, 0, 1],
+            "rcut_smth":        1.00,
+            "rcut":             9.00,
+            "neuron":           [12, 25, 50],
+            "exclude_types":    [[2, 2], [2, 4], [4, 4], [0, 2], [0, 4], [1, 2], [1, 4], [3, 2], [3, 4], [5, 2], [5, 4]],
+            "axis_neuron":      12,
+            "type_one_side":    true,
+            "_comment": " QM/QM interaction"
+        },
+        {
+            "type":     "se_e2_a",
+            "sel":              [6, 11, 100, 6, 50, 1],
+            "rcut_smth":        0.50,
+            "rcut":             6.00,
+            "neuron":           [12, 25, 50],
+            "exclude_types":    [[0, 0], [0, 1], [0, 3], [0, 5], [1, 1], [1, 3], [1, 5], [3, 3], [3, 5], [5, 5], [2, 2], [2, 4], [4, 4]],
+            "axis_neuron":      12,
+            "set_davg_zero":    true,
+            "type_one_side":    true,
+            "_comment": " QM/MM interaction"
+        }
+    ]
+}
+```
+
+:::
+
+::::
+
 {ref}`exclude_types <model/descriptor[se_a_ebd_v2]/exclude_types>` can be generated by the following Python script:
+
 ```py
 from itertools import combinations_with_replacement, product
 
@@ -127,10 +181,14 @@ The DPRc model has the best practices with the [AMBER](../third-party/out-of-dee
 
 ## Pairwise DPRc
 
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
+
 If one wants to correct from a low-level method into a full DFT level, and the system is too large to do full DFT calculation, one may try the experimental pairwise DPRc model.
 In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue $l$:
 
-$$ E = E_\text{QM} + \sum_{l} E_{\text{QM/MM},l} $$
+$$ E = E*\text{QM} + \sum*{l} E\_{\text{QM/MM},l} $$
 
 In this way, the interaction between the QM region and each MM fragmentation can be computed and trained separately.
 Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md).
@@ -142,32 +200,19 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used
 {
   "model": {
     "type": "pairwise_dprc",
-    "type_map": [
-      "C",
-      "P",
-      "O",
-      "H",
-      "OW",
-      "HW"
-    ],
+    "type_map": ["C", "P", "O", "H", "OW", "HW"],
     "type_embedding": {
-      "neuron": [
-        8
-      ],
+      "neuron": [8],
       "precision": "float32"
     },
     "qm_model": {
       "descriptor": {
         "type": "se_atten_v2",
         "sel": 24,
-        "rcut_smth": 0.50,
-        "rcut": 9.00,
+        "rcut_smth": 0.5,
+        "rcut": 9.0,
         "attn_layer": 0,
-        "neuron": [
-          25,
-          50,
-          100
-        ],
+        "neuron": [25, 50, 100],
         "resnet_dt": false,
         "axis_neuron": 12,
         "precision": "float32",
@@ -175,21 +220,10 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used
       },
       "fitting_net": {
         "type": "ener",
-        "neuron": [
-          240,
-          240,
-          240
-        ],
+        "neuron": [240, 240, 240],
         "resnet_dt": true,
         "precision": "float32",
-        "atom_ener": [
-          null,
-          null,
-          null,
-          null,
-          0.0,
-          0.0
-        ],
+        "atom_ener": [null, null, null, null, 0.0, 0.0],
         "seed": 1
       }
     },
@@ -197,92 +231,38 @@ It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used
       "descriptor": {
         "type": "se_atten_v2",
         "sel": 27,
-        "rcut_smth": 0.50,
-        "rcut": 6.00,
+        "rcut_smth": 0.5,
+        "rcut": 6.0,
         "attn_layer": 0,
-        "neuron": [
-          25,
-          50,
-          100
-        ],
+        "neuron": [25, 50, 100],
         "resnet_dt": false,
         "axis_neuron": 12,
         "set_davg_zero": true,
         "exclude_types": [
-          [
-            0,
-            0
-          ],
-          [
-            0,
-            1
-          ],
-          [
-            0,
-            2
-          ],
-          [
-            0,
-            3
-          ],
-          [
-            1,
-            1
-          ],
-          [
-            1,
-            2
-          ],
-          [
-            1,
-            3
-          ],
-          [
-            2,
-            2
-          ],
-          [
-            2,
-            3
-          ],
-          [
-            3,
-            3
-          ],
-          [
-            4,
-            4
-          ],
-          [
-            4,
-            5
-          ],
-          [
-            5,
-            5
-          ]
+          [0, 0],
+          [0, 1],
+          [0, 2],
+          [0, 3],
+          [1, 1],
+          [1, 2],
+          [1, 3],
+          [2, 2],
+          [2, 3],
+          [3, 3],
+          [4, 4],
+          [4, 5],
+          [5, 5]
         ],
         "precision": "float32",
         "seed": 1
       },
       "fitting_net": {
         "type": "ener",
-        "neuron": [
-          240,
-          240,
-          240
-        ],
+        "neuron": [240, 240, 240],
         "resnet_dt": true,
         "seed": 1,
         "precision": "float32",
-        "atom_ener": [
-          0.0,
-          0.0,
-          0.0,
-          0.0,
-          0.0,
-          0.0
-        ]
+        "atom_ener": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
       }
     }
   }
diff --git a/doc/model/index.md b/doc/model/index.md
deleted file mode 100644
index 589b39b2b5..0000000000
--- a/doc/model/index.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Model
-
-- [Overall](overall.md)
-- [Descriptor `"se_e2_a"`](train-se-e2-a.md)
-- [Descriptor `"se_e2_r"`](train-se-e2-r.md)
-- [Descriptor `"se_e3"`](train-se-e3.md)
-- [Descriptor `"se_atten"`](train-se-atten.md)
-- [Descriptor `"se_atten_v2"`](train-se-atten.md#descriptor-se_atten_v2)
-- [Descriptor `"se_a_mask"`](train-se-a-mask.md)
-- [Descriptor `"hybrid"`](train-hybrid.md)
-- [Descriptor `sel`](sel.md)
-- [Fit energy](train-energy.md)
-- [Fit spin energy](train-energy-spin.md)
-- [Fit `tensor` like `Dipole` and `Polarizability`](train-fitting-tensor.md)
-- [Fit electronic density of states (DOS)](train-fitting-dos.md)
-- [Train a Deep Potential model using `type embedding` approach](train-se-e2-a-tebd.md)
-- [Deep potential long-range](dplr.md)
-- [Deep Potential - Range Correction (DPRc)](dprc.md)
-- [Linear model](linear.md)
-- [Interpolation or combination with a pairwise potential](pairtab.md)
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 1e850cac67..7b7fb082f1 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -9,6 +9,7 @@ Model
    train-se-e2-r
    train-se-e3
    train-se-atten
+   dpa2
    train-hybrid
    sel
    train-energy
diff --git a/doc/model/linear.md b/doc/model/linear.md
index b5e7c5c76a..3891559d90 100644
--- a/doc/model/linear.md
+++ b/doc/model/linear.md
@@ -1,4 +1,8 @@
-## Linear model
+## Linear model {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 One can linearly combine existing models with arbitrary coefficients:
 
diff --git a/doc/model/overall.md b/doc/model/overall.md
index f8fb2fa151..102a8fc671 100644
--- a/doc/model/overall.md
+++ b/doc/model/overall.md
@@ -16,17 +16,20 @@ The indices of the neighboring atoms (i.e. atoms within a certain cutoff radius)
 Note that the Cartesian coordinates can be either under the periodic boundary condition (PBC) or in vacuum (under the open boundary condition).
 The network parameters are denoted by $\boldsymbol \theta = \{\boldsymbol \theta_d, \boldsymbol \theta_f\}$, where $\boldsymbol \theta_d$ and $\boldsymbol\theta_f$ yield the network parameters of the descriptor (if any) and those of the fitting network, respectively.
 From the above equation, one may compute the global property of the system by
+
 ```math
     \boldsymbol y = \sum_{i=1}^N \boldsymbol y_i,
 ```
+
 where $N$ is the number of atoms in a frame.
 For example, if $y_i$ represents the potential energy contribution of atom $i$, then $y$ gives the total potential energy of the frame.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 A model has two parts, a descriptor that maps atomic configuration to a set of symmetry invariant features, and a fitting net that takes descriptor as input and predicts the atomic contribution to the target physical property. It's defined in the {ref}`model <model>` section of the `input.json`, for example,
+
 ```json
     "model": {
         "type_map":	["O", "H"],
@@ -38,11 +41,13 @@ A model has two parts, a descriptor that maps atomic configuration to a set of s
         }
     }
 ```
+
 The two subsections, {ref}`descriptor <model/descriptor>` and {ref}`fitting_net <model/fitting_net>`, define the descriptor and the fitting net, respectively.
 
 The {ref}`type_map <model/type_map>` is optional, which provides the element names (but not necessarily same as the actual name of the element) of the corresponding atom types. A water model, as in this example, has two kinds of atoms. The atom types are internally recorded as integers, e.g., `0` for oxygen and `1` for hydrogen here. A mapping from the atom type to their names is provided by {ref}`type_map <model/type_map>`.
 
 DeePMD-kit implements the following descriptors:
+
 1. [`se_e2_a`](train-se-e2-a.md): DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input.
 2. [`se_e2_r`](train-se-e2-r.md): DeepPot-SE constructed from radial information of atomic configurations. The embedding takes the distance between atoms as input.
 3. [`se_e3`](train-se-e3.md): DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes angles between two neighboring atoms as input.
@@ -51,6 +56,7 @@ DeePMD-kit implements the following descriptors:
 6. [`hybrid`](train-hybrid.md): Concate a list of descriptors to form a new descriptor.
 
 The fitting of the following physical properties is supported
+
 1. [`ener`](train-energy.md): Fit the energy of the system. The force (derivative with atom positions) and the virial (derivative with the box tensor) can also be trained.
 2. [`dipole`](train-fitting-tensor.md): The dipole moment.
 3. [`polar`](train-fitting-tensor.md): The polarizability.
diff --git a/doc/model/pairtab.md b/doc/model/pairtab.md
index 115345796a..c8763705f7 100644
--- a/doc/model/pairtab.md
+++ b/doc/model/pairtab.md
@@ -1,17 +1,27 @@
-# Interpolation or combination with a pairwise potential
+# Interpolation or combination with a pairwise potential {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 ## Theory
+
 In applications like the radiation damage simulation, the interatomic distance may become too close, so that the DFT calculations fail.
 In such cases, the DP model that is an approximation of the DFT potential energy surface is usually replaced by an empirical potential, like the Ziegler-Biersack-Littmark (ZBL) screened nuclear repulsion potential in the radiation damage simulations.
 The DeePMD-kit package supports the interpolation between DP and an empirical pairwise potential
+
 ```math
   E_i = (1-w_i) E_i^{\mathrm{DP}} + w_i (E_i^0 + E_i^{\mathrm{pair}}),
 ```
+
 where the $w_i$ is the interpolation weight and the $E_i^{\mathrm{pair}}  $ is the atomic contribution due to the pairwise potential $u^{\mathrm{pair}}(r)$, i.e.
+
 ```math
   E_i^{\mathrm{pair}} = \sum_{j\in n(i)} u^{\mathrm{pair}}(r_{ij}).
 ```
+
 The interpolation weight $w_i$ is defined by
+
 ```math
     w_i =
     \begin{cases}
@@ -20,19 +30,22 @@ The interpolation weight $w_i$ is defined by
     0, & \sigma_i \geq r_b,
     \end{cases}
 ```
+
 where $u_i = (\sigma_i - r_a ) / (r_b - r_a)$.
 $E_i^0$ is the atom energy bias.
 In the range $[r_a, r_b]$, the DP model smoothly switched off and the pairwise potential smoothly switched on from $r_b$ to $r_a$. The $\sigma_i$ is the softmin of the distance between atom $i$ and its neighbors,
+
 ```math
   \sigma_i =
   \dfrac
   {\sum\limits_{j\in n(i)} r_{ij} e^{-r_{ij} / \alpha_s}}
   {\sum\limits_{j\in n(i)} e^{-r_{ij} / \alpha_s}},
 ```
+
 where the scale $\alpha_s$ is a tunable scale of the interatomic distance $r_{ij}$.
 The pairwise potential $u^{\textrm{pair}}(r)$ is defined by a user-defined table that provides the value of $u^{\textrm{pair}}$ on an evenly discretized grid from 0 to the cutoff distance.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 DeePMD-kit also supports combination with a pairwise potential:
 
@@ -49,6 +62,10 @@ in the order of Type_0-Type_0, Type_0-Type_1, ..., Type_0-Type_N, Type_1-Type_1,
 
 The interaction should be smooth at the cut-off distance.
 
+:::{note}
+In instances where the interaction at the cut-off distance is not delineated within the table file, extrapolation will be conducted utilizing the available interaction data. This extrapolative procedure guarantees a smooth transition from the table-provided value to `0` whenever feasible.
+:::
+
 ## Interpolation with a short-range pairwise potential
 
 ```json
diff --git a/doc/model/sel.md b/doc/model/sel.md
index f4a3cf6c09..8455c242a9 100644
--- a/doc/model/sel.md
+++ b/doc/model/sel.md
@@ -5,9 +5,11 @@ All descriptors require to set `sel`, which means the expected maximum number of
 `sel` should not be too large or too small. If `sel` is too large, the computing will become much slower and cost more memory. If `sel` is not enough, the energy will be not conserved, making the accuracy of the model worse.
 
 To determine a proper `sel`, one can calculate the neighbor stat of the training data before training:
+
 ```sh
 dp neighbor-stat -s data -r 6.0 -t O H
 ```
+
 where `data` is the directory of data, `6.0` is the cutoff radius, and `O` and `H` is the type map. The program will give the `max_nbor_size`. For example, `max_nbor_size` of the water example is `[38, 72]`, meaning an atom may have 38 O neighbors and 72 H neighbors in the training data.
 
 The `sel` should be set to a higher value than that of the training data, considering there may be some extreme geometries during MD simulations. As a result, we set `sel` to `[46, 92]` in the water example.
diff --git a/doc/model/train-energy-spin.md b/doc/model/train-energy-spin.md
index d155ec977d..3eb589590b 100644
--- a/doc/model/train-energy-spin.md
+++ b/doc/model/train-energy-spin.md
@@ -1,10 +1,15 @@
-# Fit spin energy
+# Fit spin energy {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 In this section, we will take `$deepmd_source_dir/examples/NiO/se_e2_a/input.json` as an example of the input file.
 
 ## Spin
 
 The construction of the fitting net is give by section {ref}`spin <model/spin>`
+
 ```json
     "spin" : {
         "use_spin":         [true, false],
@@ -12,9 +17,10 @@ The construction of the fitting net is give by section {ref}`spin <model/spin>`
         "spin_norm":        [1.2737],
     },
 ```
-* {ref}`use_spin <model/spin[ener_spin]/use_spin>` determines whether to turn on the magnetism of the atoms.The index of this option matches option `type_map <model/type_map>`.
-* {ref}`virtual_len <model/spin[ener_spin]/virtual_len>` specifies the distance between virtual atom and the belonging real atom.
-* {ref}`spin_norm <model/spin[ener_spin]/spin_norm>` gives the magnitude of the magnetic moment for each magnatic atom.
+
+- {ref}`use_spin <model/spin[ener_spin]/use_spin>` determines whether to turn on the magnetism of the atoms.The index of this option matches option `type_map <model/type_map>`.
+- {ref}`virtual_len <model/spin[ener_spin]/virtual_len>` specifies the distance between virtual atom and the belonging real atom.
+- {ref}`spin_norm <model/spin[ener_spin]/spin_norm>` gives the magnitude of the magnetic moment for each magnatic atom.
 
 ## Spin Loss
 
@@ -29,11 +35,13 @@ The prefectors may not be a constant, rather it changes linearly with the learni
 $$p_{fr}(t) = p_{fr}^0 \frac{ \alpha(t) }{ \alpha(0) } + p_{fr}^\infty ( 1 - \frac{ \alpha(t) }{ \alpha(0) })$$
 
 where $\alpha(t)$ denotes the learning rate at step $t$. $p_{fr}^0$ and $p_{fr}^\infty$ specifies the $p_f$ at the start of the training and at the limit of $t \to \infty$ (set by {ref}`start_pref_fr <loss[ener_spin]/start_pref_fr>` and {ref}`limit_pref_f <loss[ener_spin]/limit_pref_fr>`, respectively), i.e.
+
 ```math
 pref_fr(t) = start_pref_fr * ( lr(t) / start_lr ) + limit_pref_fr * ( 1 - lr(t) / start_lr )
 ```
 
 The {ref}`loss <loss>` section in the `input.json` is
+
 ```json
     "loss" :{
 	"type":		        "ener_spin",
@@ -47,6 +55,7 @@ The {ref}`loss <loss>` section in the `input.json` is
 	"limit_pref_v":	    0,
     },
 ```
+
 The options {ref}`start_pref_e <loss[ener_spin]/start_pref_e>`, {ref}`limit_pref_e <loss[ener_spin]/limit_pref_e>`, {ref}`start_pref_fr <loss[ener_spin]/start_pref_fr>`, {ref}`limit_pref_fm <loss[ener_spin]/limit_pref_fm>`, {ref}`start_pref_v <loss[ener_spin]/start_pref_v>` and {ref}`limit_pref_v <loss[ener_spin]/limit_pref_v>` determine the start and limit prefactors of energy, atomic force, magnatic force and virial, respectively.
 
 If one does not want to train with virial, then he/she may set the virial prefactors {ref}`start_pref_v <loss[ener_spin]/start_pref_v>` and {ref}`limit_pref_v <loss[ener_spin]/limit_pref_v>` to 0.
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index 90e027d7a0..c1da1f4c1f 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -1,66 +1,86 @@
-# Fit energy
+# Fit energy {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+:::
 
 In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.json` as an example of the input file.
 
 ## Theory
 
-In the DP model, we let the fitting network $\mathcal{F}_ 0$ maps the descriptor $\mathcal{D}^i$ to a scalar, where the subscript $0$ means that the output is a zero-order tensor (i.e. scalar).  The model can then be used to predict the total potential energy of the system by
+In the DP model, we let the fitting network $\mathcal{F}_ 0$ maps the descriptor $\mathcal{D}^i$ to a scalar, where the subscript $0$ means that the output is a zero-order tensor (i.e. scalar). The model can then be used to predict the total potential energy of the system by
+
 ```math
     E  =  \sum_i E_i = \sum_i \mathcal F_0 (\mathcal D^i),
 ```
+
 where the output of the fitting network is treated as the atomic potential energy contribution, i.e. $E_i$.
 The output scalar can also be treated as other scalar properties defined on an atom, for example, the partial charge of atom $i$.
 
-In some cases, atomic-specific or frame-specific  parameters, such as electron temperature, may be treated as extra input to the fitting network.
+In some cases, atomic-specific or frame-specific parameters, such as electron temperature, may be treated as extra input to the fitting network.
 We denote the atomic and frame-specific parameters by $\boldsymbol{P}^i\in \mathbb{R}^{N_p}$ (with $N_p$ being the dimension) and $\boldsymbol{Q}\in \mathbb{R}^{N_q}$ (with $N_q$ being the dimension), respectively.
+
 ```math
     E_i=\mathcal{F}_0(\{\mathcal{D}^i, \boldsymbol{P}^i, \boldsymbol Q\}).
 ```
 
 The atomic force $\boldsymbol{F}_ {i}$ and the virial tensor $\boldsymbol{\Xi} = (\Xi_{\alpha\beta})$ (if PBC is applied) can be derived from the potential energy $E$:
+
 ```math
     F_{i,\alpha}=-\frac{\partial E}{\partial r_{i,\alpha}},
 ```
+
 ```math
     \Xi_{\alpha\beta}=-\sum_{\gamma} \frac{\partial E}{\partial h_{\gamma\alpha}} h_{\gamma\beta},
 ```
+
 where $r_{i,\alpha}$ and $F_{i,\alpha}$ denotes the $\alpha$-th component of the coordinate and force of atom $i$. $h_{\alpha\beta}$ is the $\beta$-th component of the $\alpha$-th basis vector of the simulation region.
 
 The properties $\eta$ of the energy loss function could be energy $E$, force $\boldsymbol{F}$, virial $\boldsymbol{\Xi}$, relative energy $\Delta E$, or any combination among them, and the loss functions of them are
+
 ```math
     L_E(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}(E(\boldsymbol{x};\boldsymbol{\theta})-E^*)^2,
 ```
+
 ```math
     L_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_{\alpha=1}^3(F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2,
 ```
+
 ```math
     L_\Xi(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{9N}\sum_{\alpha,\beta=1}^{3}(\Xi_{\alpha\beta}(\boldsymbol{x};\boldsymbol{\theta})-\Xi_{\alpha\beta}^*)^2,
 ```
+
 ```math
     L_{\Delta E}(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{N}({\Delta E}(\boldsymbol{x};\boldsymbol{\theta})-{\Delta E}^*)^2,
 ```
+
 where $F_{k,\alpha}$ is the $\alpha$-th component of the force on atom $k$, and the superscript $\ast$ indicates the label of the property that should be provided in advance.
 Using $N$ ensures that each loss of fitting property is averaged over atomic contributions before they contribute to the total loss by weight.
 
 If part of atoms is more important than others, for example, certain atoms play an essential role when calculating free energy profiles or kinetic isotope effects, the MSE of atomic forces with prefactors $q_{k}$ can also be used as the loss function:
+
 ```math
     L_F^p(\mathbf{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N} \sum_{\alpha} q_{k} (F_{k,\alpha}(\mathbf{x};\boldsymbol{\theta})-F_{k,\alpha}^*)^2.
 ```
+
 The atomic forces with larger prefactors will be fitted more accurately than those in other atoms.
 
 If some forces are quite large, for example, forces can be greater than 60 eV/Å in high-temperature reactive simulations, one may also prefer the force loss is relative to the magnitude:
+
 ```math
     L^r_F(\boldsymbol{x};\boldsymbol{\theta})=\frac{1}{3N}\sum_{k=1}^{N}\sum_\alpha \left(\frac{F_{k,\alpha}(\boldsymbol{x};\boldsymbol{\theta})-F_{k,\alpha}^*}{\lvert\boldsymbol{F}^\ast_k\lvert + \nu}\right)^2.
 ```
+
 where $\nu$ is a small constant used to protect
 an atom where the magnitude of $\boldsymbol{F}^\ast_k$ is small from having a large $L^r_F$.
 Benefiting from the relative force loss, small forces can be fitted more accurately.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## The fitting network
 
 The construction of the fitting net is given by section {ref}`fitting_net <model/fitting_net>`
+
 ```json
 	"fitting_net" : {
 	    "neuron":		[240, 240, 240],
@@ -68,9 +88,10 @@ The construction of the fitting net is given by section {ref}`fitting_net <model
 	    "seed":		1
 	},
 ```
-* {ref}`neuron <model/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* If the option {ref}`resnet_dt <model/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-* {ref}`seed <model/fitting_net[ener]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+
+- {ref}`neuron <model/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- If the option {ref}`resnet_dt <model/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model/fitting_net[ener]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 ## Loss
 
@@ -83,11 +104,13 @@ where $L_e$, $L_f$, and $L_v$ denote the loss in energy, forces and virials, res
 $$p_f(t) = p_f^0 \frac{ \alpha(t) }{ \alpha(0) } + p_f^\infty ( 1 - \frac{ \alpha(t) }{ \alpha(0) })$$
 
 where $\alpha(t)$ denotes the learning rate at step $t$. $p_f^0$ and $p_f^\infty$ specifies the $p_f$ at the start of the training and the limit of $t \to \infty$ (set by {ref}`start_pref_f <loss[ener]/start_pref_f>` and {ref}`limit_pref_f <loss[ener]/limit_pref_f>`, respectively), i.e.
+
 ```math
 pref_f(t) = start_pref_f * ( lr(t) / start_lr ) + limit_pref_f * ( 1 - lr(t) / start_lr )
 ```
 
 The {ref}`loss <loss>` section in the `input.json` is
+
 ```json
     "loss" : {
 	"start_pref_e":	0.02,
@@ -98,6 +121,7 @@ The {ref}`loss <loss>` section in the `input.json` is
 	"limit_pref_v":	0
     }
 ```
+
 The options {ref}`start_pref_e <loss[ener]/start_pref_e>`, {ref}`limit_pref_e <loss[ener]/limit_pref_e>`, {ref}`start_pref_f <loss[ener]/start_pref_f>`, {ref}`limit_pref_f <loss[ener]/limit_pref_f>`, {ref}`start_pref_v <loss[ener]/start_pref_v>` and {ref}`limit_pref_v <loss[ener]/limit_pref_v>` determine the start and limit prefactors of energy, force and virial, respectively.
 
 If one does not want to train with virial, then he/she may set the virial prefactors {ref}`start_pref_v <loss[ener]/start_pref_v>` and {ref}`limit_pref_v <loss[ener]/limit_pref_v>` to 0.
diff --git a/doc/model/train-fitting-dos.md b/doc/model/train-fitting-dos.md
index bbe5b50690..7b68525a45 100644
--- a/doc/model/train-fitting-dos.md
+++ b/doc/model/train-fitting-dos.md
@@ -1,4 +1,8 @@
-# Fit electronic density of states (DOS)
+# Fit electronic density of states (DOS) {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 Here we present an API to DeepDOS model, which can be used to fit electronic density of state (DOS) (which is a vector).
 
@@ -32,9 +36,9 @@ The JSON of `dos` type should be provided like
 	},
 ```
 
--   `type` specifies which type of fitting net should be used. It should be `dos`.
--   `numb_dos` specifies the length of output vector (density of states), which the same as the `NEDOS` set in VASP software, this argument defines the output length of the neural network. We note that the length of `dos` provided in training set should be the same.
--   The rest arguments have the same meaning as they do in `ener` mode.
+- `type` specifies which type of fitting net should be used. It should be `dos`.
+- `numb_dos` specifies the length of output vector (density of states), which the same as the `NEDOS` set in VASP software, this argument defines the output length of the neural network. We note that the length of `dos` provided in training set should be the same.
+- The rest arguments have the same meaning as they do in `ener` mode.
 
 ## Loss
 
@@ -62,13 +66,12 @@ The loss section should be provided like
 	},
 ```
 
--   {ref}`type <loss/type>` should be written as `dos` as a distinction from `ener` mode.
--   `pref_dos` and `pref_ados`, respectively specify the weight of global and atomic loss. If set to 0, the corresponding label will not be included in the training process.
--   We also provides a combination training of vector and its cumulative distribution function `cdf`, which can be defined as
+- {ref}`type <loss/type>` should be written as `dos` as a distinction from `ener` mode.
+- `pref_dos` and `pref_ados`, respectively specify the weight of global and atomic loss. If set to 0, the corresponding label will not be included in the training process.
+- We also provides a combination training of vector and its cumulative distribution function `cdf`, which can be defined as
 
 $$D(\epsilon) = \int_{e_{min}}^{\epsilon} g(\epsilon')d\epsilon'$$
 
-
 ## Training Data Preparation
 
 The global label should be named `dos.npy/raw`, while the atomic label should be named `atomic_dos.npy/raw`. If wrongly named, DP will report an error.
diff --git a/doc/model/train-fitting-tensor.md b/doc/model/train-fitting-tensor.md
index 90370adfcf..4d5cb22707 100644
--- a/doc/model/train-fitting-tensor.md
+++ b/doc/model/train-fitting-tensor.md
@@ -1,157 +1,243 @@
-# Fit `tensor` like `Dipole` and `Polarizability`
-
-Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in
-
-```bash
-$deepmd_source_dir/examples/water_tensor/dipole/dipole_input.json
-$deepmd_source_dir/examples/water_tensor/polar/polar_input.json
-```
-
-The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.**
-
-Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net <model/fitting_net>` and {ref}`loss <loss>`.
-
-## Theory
-
-To represent the first-order tensorial properties (i.e. vector properties), we let the fitting network, denoted by $\mathcal F_{1}$, output an $M$-dimensional vector; then we have the representation,
-
-```math
-(T_i^{(1)})_\alpha =
-\frac{1}{N_c}
-\sum_{j=1}^{N_c}\sum_{m=1}^M (\mathcal G^i)_{jm} (\mathcal R^i)_{j,\alpha+1}
-(\mathcal F_{1}(\mathcal D^i))_m, \ \alpha=1,2,3.
-```
-We let the fitting network $\mathcal F_{2}$ output an $M$-dimensional vector, and the second-order tensorial properties (matrix properties) are formulated as
-```math
-(T_i^{(2)})_{\alpha\beta} =
-\frac{1}{N_c^2}
-\sum_{j=1}^{N_c}\sum_{k=1}^{N_c}\sum_{m=1}^M
-(\mathcal G^i)_{jm}
-(\mathcal R^i)_{j,\alpha+1}
-(\mathcal R^i)_{k,\beta+1}
-(\mathcal G^i)_{km}
-(\mathcal F_{2}(\mathcal D^i))_m,
-\ \alpha,\beta=1,2,3,
-```
-
-where $\mathcal{G}^i$ and $\mathcal{R}^i$ can be found in [`se_e2_a`](./train-se-e2-a.md).
-Thus, the tensor fitting network requires the descriptor to have the same or similar form as the DeepPot-SE descriptor.
-$\mathcal{F}_1$ and $\mathcal F_2$ are the neural network functions.
-The total tensor $\boldsymbol{T}$ (total dipole $\boldsymbol{T}^{(1)}$ or total polarizability $\boldsymbol{T}^{(2)}$) is the sum of the atomic tensor:
-```math
-    \boldsymbol{T} = \sum_i \boldsymbol{T}_i.
-```
-The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1]
-
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
-
-## The fitting Network
-
-The {ref}`fitting_net <model/fitting_net>` section tells DP which fitting net to use.
-
-The JSON of `dipole` type should be provided like
-
-```json
-	"fitting_net" : {
-		"type": "dipole",
-		"sel_type": [0],
-		"neuron": [100,100,100],
-		"resnet_dt": true,
-		"seed": 1,
-	},
-```
-
-The JSON of `polar` type should be provided like
-
-```json
-	"fitting_net" : {
-	   	"type": "polar",
-		"sel_type": [0],
-		"neuron": [100,100,100],
-		"resnet_dt": true,
-		"seed": 1,
-	},
-```
-
--   `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md).
--   `sel_type` is a list specifying which type of atoms have the quantity you want to fit. For example, in the water system, `sel_type` is `[0]` since `0` represents atom `O`. If left unset, all types of atoms will be fitted.
--   The rest arguments have the same meaning as they do in `ener` mode.
-
-## Loss
-
-DP supports a combinational training of the global system (only a global `tensor` label, i.e. dipole or polar, is provided in a frame) and atomic system (labels for **each** atom included in `sel_type` are provided). In a global system, each frame has just **one** `tensor` label. For example, when fitting `polar`, each frame will just provide a `1 x 9` vector which gives the elements of the polarizability tensor of that frame in order XX, XY, XZ, YX, YY, YZ, XZ, ZY, ZZ. By contrast, in an atomic system, each atom in `sel_type` has a `tensor` label. For example, when fitting a dipole, each frame will provide a `#sel_atom x 3` matrices, where `#sel_atom` is the number of atoms whose type are in `sel_type`.
-
-The {ref}`loss <loss>` section tells DP the weight of these two kinds of loss, i.e.
-
-```python
-loss = pref * global_loss + pref_atomic * atomic_loss
-```
-
-The loss section should be provided like
-
-```json
-	"loss" : {
-		"type":		"tensor",
-		"pref":		1.0,
-		"pref_atomic":	1.0
-	},
-```
-
--   {ref}`type <loss/type>` should be written as `tensor` as a distinction from `ener` mode.
--   {ref}`pref <loss[tensor]/pref>` and {ref}`pref_atomic <loss[tensor]/pref_atomic>` respectively specify the weight of global loss and atomic loss. It can not be left unset. If set to 0, the corresponding label will NOT be included in the training process.
-
-## Training Data Preparation
-
-In tensor mode, the identification of the label's type (global or atomic) is derived from the file name. The global label should be named `dipole.npy/raw` or `polarizability.npy/raw`, while the atomic label should be named `atomic_dipole.npy/raw` or `atomic_polarizability.npy/raw`. If wrongly named, DP will report an error
-
-```bash
-ValueError: cannot reshape array of size xxx into shape (xx,xx). This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`.
-```
-
-In this case, please check the file name of the label.
-
-## Train the Model
-
-The training command is the same as `ener` mode, i.e.
-
-```bash
-dp train input.json
-```
-
-The detailed loss can be found in `lcurve.out`:
-
-```
-#  step    rmse_val   rmse_trn  rmse_lc_val rmse_lc_trn rmse_gl_val rmse_gl_trn  lr
-     0     8.34e+00   8.26e+00   8.34e+00   8.26e+00    0.00e+00    0.00e+00   1.0e-02
-   100     3.51e-02   8.55e-02   0.00e+00   8.55e-02    4.38e-03    0.00e+00   5.0e-03
-   200     4.77e-02   5.61e-02   0.00e+00   5.61e-02    5.96e-03    0.00e+00   2.5e-03
-   300     5.68e-02   1.47e-02   0.00e+00   0.00e+00    7.10e-03    1.84e-03   1.3e-03
-   400     3.73e-02   3.48e-02   1.99e-02   0.00e+00    2.18e-03    4.35e-03   6.3e-04
-   500     2.77e-02   5.82e-02   1.08e-02   5.82e-02    2.11e-03    0.00e+00   3.2e-04
-   600     2.81e-02   5.43e-02   2.01e-02   0.00e+00    1.01e-03    6.79e-03   1.6e-04
-   700     2.97e-02   3.28e-02   2.03e-02   0.00e+00    1.17e-03    4.10e-03   7.9e-05
-   800     2.25e-02   6.19e-02   9.05e-03   0.00e+00    1.68e-03    7.74e-03   4.0e-05
-   900     3.18e-02   5.54e-02   9.93e-03   5.54e-02    2.74e-03    0.00e+00   2.0e-05
-  1000     2.63e-02   5.02e-02   1.02e-02   5.02e-02    2.01e-03    0.00e+00   1.0e-05
-  1100     3.27e-02   5.89e-02   2.13e-02   5.89e-02    1.43e-03    0.00e+00   5.0e-06
-  1200     2.85e-02   2.42e-02   2.85e-02   0.00e+00    0.00e+00    3.02e-03   2.5e-06
-  1300     3.47e-02   5.71e-02   1.07e-02   5.71e-02    3.00e-03    0.00e+00   1.3e-06
-  1400     3.13e-02   5.76e-02   3.13e-02   5.76e-02    0.00e+00    0.00e+00   6.3e-07
-  1500     3.34e-02   1.11e-02   2.09e-02   0.00e+00    1.57e-03    1.39e-03   3.2e-07
-  1600     3.11e-02   5.64e-02   3.11e-02   5.64e-02    0.00e+00    0.00e+00   1.6e-07
-  1700     2.97e-02   5.05e-02   2.97e-02   5.05e-02    0.00e+00    0.00e+00   7.9e-08
-  1800     2.64e-02   7.70e-02   1.09e-02   0.00e+00    1.94e-03    9.62e-03   4.0e-08
-  1900     3.28e-02   2.56e-02   3.28e-02   0.00e+00    0.00e+00    3.20e-03   2.0e-08
-  2000     2.59e-02   5.71e-02   1.03e-02   5.71e-02    1.94e-03    0.00e+00   1.0e-08
-```
-
-One may notice that in each step, some of the local loss and global loss will be `0.0`. This is because our training data and validation data consist of the global system and atomic system, i.e.
-```
-	--training_data
-		>atomic_system
-		>global_system
-	--validation_data
-		>atomic_system
-		>global_system
-```
-During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros.
+# Fit `tensor` like `Dipole` and `Polarizability` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+:::
+
+Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+```bash
+$deepmd_source_dir/examples/water_tensor/dipole/dipole_input.json
+$deepmd_source_dir/examples/water_tensor/polar/polar_input.json
+```
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```bash
+$deepmd_source_dir/examples/water_tensor/dipole/dipole_input_torch.json
+$deepmd_source_dir/examples/water_tensor/polar/polar_input_torch.json
+```
+
+:::
+
+::::
+
+The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.**
+
+Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-e2-a.md). To fit a tensor, one needs to modify {ref}`model/fitting_net <model/fitting_net>` and {ref}`loss <loss>`.
+
+## Theory
+
+To represent the first-order tensorial properties (i.e. vector properties), we let the fitting network, denoted by $\mathcal F_{1}$, output an $M$-dimensional vector; then we have the representation,
+
+```math
+(T_i^{(1)})_\alpha =
+\frac{1}{N_c}
+\sum_{j=1}^{N_c}\sum_{m=1}^M (\mathcal G^i)_{jm} (\mathcal R^i)_{j,\alpha+1}
+(\mathcal F_{1}(\mathcal D^i))_m, \ \alpha=1,2,3.
+```
+
+We let the fitting network $\mathcal F_{2}$ output an $M$-dimensional vector, and the second-order tensorial properties (matrix properties) are formulated as
+
+```math
+(T_i^{(2)})_{\alpha\beta} =
+\frac{1}{N_c^2}
+\sum_{j=1}^{N_c}\sum_{k=1}^{N_c}\sum_{m=1}^M
+(\mathcal G^i)_{jm}
+(\mathcal R^i)_{j,\alpha+1}
+(\mathcal R^i)_{k,\beta+1}
+(\mathcal G^i)_{km}
+(\mathcal F_{2}(\mathcal D^i))_m,
+\ \alpha,\beta=1,2,3,
+```
+
+where $\mathcal{G}^i$ and $\mathcal{R}^i$ can be found in [`se_e2_a`](./train-se-e2-a.md).
+Thus, the tensor fitting network requires the descriptor to have the same or similar form as the DeepPot-SE descriptor.
+$\mathcal{F}_1$ and $\mathcal F_2$ are the neural network functions.
+The total tensor $\boldsymbol{T}$ (total dipole $\boldsymbol{T}^{(1)}$ or total polarizability $\boldsymbol{T}^{(2)}$) is the sum of the atomic tensor:
+
+```math
+    \boldsymbol{T} = \sum_i \boldsymbol{T}_i.
+```
+
+The tensorial models can be used to calculate IR spectrum and Raman spectrum.[^1]
+
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+## The fitting Network
+
+The {ref}`fitting_net <model/fitting_net>` section tells DP which fitting net to use.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+The JSON of `dipole` type should be provided like
+
+```json
+	"fitting_net" : {
+		"type": "dipole",
+		"sel_type": [0],
+		"neuron": [100,100,100],
+		"resnet_dt": true,
+		"seed": 1,
+	},
+```
+
+The JSON of `polar` type should be provided like
+
+```json
+	"fitting_net" : {
+	   	"type": "polar",
+		"sel_type": [0],
+		"neuron": [100,100,100],
+		"resnet_dt": true,
+		"seed": 1,
+	},
+```
+
+- `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md).
+- `sel_type` is a list specifying which type of atoms have the quantity you want to fit. For example, in the water system, `sel_type` is `[0]` since `0` represents atom `O`. If left unset, all types of atoms will be fitted.
+- The rest arguments have the same meaning as they do in `ener` mode.
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+The JSON of `dipole` type should be provided like
+
+```json
+	"atom_exclude_types": [
+      1
+    ],
+	"fitting_net" : {
+		"type": "dipole",
+		"neuron": [100,100,100],
+		"resnet_dt": true,
+		"seed": 1,
+	},
+```
+
+The JSON of `polar` type should be provided like
+
+```json
+	"atom_exclude_types": [
+      1
+    ],
+	"fitting_net" : {
+	   	"type": "polar",
+		"neuron": [100,100,100],
+		"resnet_dt": true,
+		"seed": 1,
+	},
+```
+
+- `type` specifies which type of fitting net should be used. It should be either `dipole` or `polar`. Note that `global_polar` mode in version 1.x is already **deprecated** and is merged into `polar`. To specify whether a system is global or atomic, please see [here](train-se-e2-a.md).
+- `atom_exclude_types` is a list specifying the which type of atoms have the quantity you want to set to zero. For example, in the water system, `atom_exclude_types` is `[1]` since `1` represents atom `H`.
+- The rest arguments have the same meaning as they do in `ener` mode.
+  :::
+
+::::
+
+## Loss
+
+DP supports a combinational training of the global system (only a global `tensor` label, i.e. dipole or polar, is provided in a frame) and atomic system (labels for **each** atom included in `sel_type`/ not included in `atom_exclude_types` are provided). In a global system, each frame has just **one** `tensor` label. For example, when fitting `polar`, each frame will just provide a `1 x 9` vector which gives the elements of the polarizability tensor of that frame in order XX, XY, XZ, YX, YY, YZ, XZ, ZY, ZZ. By contrast, in an atomic system, each atom in `sel_type` has a `tensor` label. For example, when fitting a dipole, each frame will provide a `#sel_atom x 3` matrices, where `#sel_atom` is the number of atoms whose type are in `sel_type`.
+
+The {ref}`loss <loss>` section tells DP the weight of these two kinds of loss, i.e.
+
+```python
+loss = pref * global_loss + pref_atomic * atomic_loss
+```
+
+The loss section should be provided like
+
+```json
+	"loss" : {
+		"type":		"tensor",
+		"pref":		1.0,
+		"pref_atomic":	1.0
+	},
+```
+
+- {ref}`type <loss/type>` should be written as `tensor` as a distinction from `ener` mode.
+- {ref}`pref <loss[tensor]/pref>` and {ref}`pref_atomic <loss[tensor]/pref_atomic>` respectively specify the weight of global loss and atomic loss. It can not be left unset. If set to 0, the corresponding label will NOT be included in the training process.
+
+## Training Data Preparation
+
+In tensor mode, the identification of the label's type (global or atomic) is derived from the file name. The global label should be named `dipole.npy/raw` or `polarizability.npy/raw`, while the atomic label should be named `atomic_dipole.npy/raw` or `atomic_polarizability.npy/raw`. If wrongly named, DP will report an error
+
+```bash
+ValueError: cannot reshape array of size xxx into shape (xx,xx). This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`.
+```
+
+In this case, please check the file name of the label.
+
+## Train the Model
+
+The training command is the same as `ener` mode, i.e.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+```bash
+dp train input.json
+```
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```bash
+dp --pt train input.json
+```
+
+:::
+
+::::
+
+The detailed loss can be found in `lcurve.out`:
+
+```
+#  step    rmse_val   rmse_trn  rmse_lc_val rmse_lc_trn rmse_gl_val rmse_gl_trn  lr
+     0     8.34e+00   8.26e+00   8.34e+00   8.26e+00    0.00e+00    0.00e+00   1.0e-02
+   100     3.51e-02   8.55e-02   0.00e+00   8.55e-02    4.38e-03    0.00e+00   5.0e-03
+   200     4.77e-02   5.61e-02   0.00e+00   5.61e-02    5.96e-03    0.00e+00   2.5e-03
+   300     5.68e-02   1.47e-02   0.00e+00   0.00e+00    7.10e-03    1.84e-03   1.3e-03
+   400     3.73e-02   3.48e-02   1.99e-02   0.00e+00    2.18e-03    4.35e-03   6.3e-04
+   500     2.77e-02   5.82e-02   1.08e-02   5.82e-02    2.11e-03    0.00e+00   3.2e-04
+   600     2.81e-02   5.43e-02   2.01e-02   0.00e+00    1.01e-03    6.79e-03   1.6e-04
+   700     2.97e-02   3.28e-02   2.03e-02   0.00e+00    1.17e-03    4.10e-03   7.9e-05
+   800     2.25e-02   6.19e-02   9.05e-03   0.00e+00    1.68e-03    7.74e-03   4.0e-05
+   900     3.18e-02   5.54e-02   9.93e-03   5.54e-02    2.74e-03    0.00e+00   2.0e-05
+  1000     2.63e-02   5.02e-02   1.02e-02   5.02e-02    2.01e-03    0.00e+00   1.0e-05
+  1100     3.27e-02   5.89e-02   2.13e-02   5.89e-02    1.43e-03    0.00e+00   5.0e-06
+  1200     2.85e-02   2.42e-02   2.85e-02   0.00e+00    0.00e+00    3.02e-03   2.5e-06
+  1300     3.47e-02   5.71e-02   1.07e-02   5.71e-02    3.00e-03    0.00e+00   1.3e-06
+  1400     3.13e-02   5.76e-02   3.13e-02   5.76e-02    0.00e+00    0.00e+00   6.3e-07
+  1500     3.34e-02   1.11e-02   2.09e-02   0.00e+00    1.57e-03    1.39e-03   3.2e-07
+  1600     3.11e-02   5.64e-02   3.11e-02   5.64e-02    0.00e+00    0.00e+00   1.6e-07
+  1700     2.97e-02   5.05e-02   2.97e-02   5.05e-02    0.00e+00    0.00e+00   7.9e-08
+  1800     2.64e-02   7.70e-02   1.09e-02   0.00e+00    1.94e-03    9.62e-03   4.0e-08
+  1900     3.28e-02   2.56e-02   3.28e-02   0.00e+00    0.00e+00    3.20e-03   2.0e-08
+  2000     2.59e-02   5.71e-02   1.03e-02   5.71e-02    1.94e-03    0.00e+00   1.0e-08
+```
+
+One may notice that in each step, some of the local loss and global loss will be `0.0`. This is because our training data and validation data consist of the global system and atomic system, i.e.
+
+```
+	--training_data
+		>atomic_system
+		>global_system
+	--validation_data
+		>atomic_system
+		>global_system
+```
+
+During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros.
diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md
index 58b66f25e0..c0a55d9eb5 100644
--- a/doc/model/train-hybrid.md
+++ b/doc/model/train-hybrid.md
@@ -1,10 +1,15 @@
-# Descriptor `"hybrid"`
+# Descriptor `"hybrid"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+:::
 
 This descriptor hybridizes multiple descriptors to form a new descriptor. For example, we have a list of descriptors denoted by $\mathcal D_1$, $\mathcal D_2$, ..., $\mathcal D_N$, the hybrid descriptor this the concatenation of the list, i.e. $\mathcal D = (\mathcal D_1, \mathcal D_2, \cdots, \mathcal D_N)$.
 
 ## Theory
 
 A hybrid descriptor $\mathcal{D}^i_\text{hyb}$ concatenates multiple kinds of descriptors into one descriptor:
+
 ```math
     \mathcal{D}^{i}_\text{hyb} = \{
     \begin{array}{cccc}
@@ -12,14 +17,16 @@ A hybrid descriptor $\mathcal{D}^i_\text{hyb}$ concatenates multiple kinds of de
     \end{array}
     \}.
 ```
+
 The list of descriptors can be different types or the same descriptors with different parameters.
 This way, one can set the different cutoff radii for different descriptors.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model/descriptor/type>` to {ref}`hybrid <model/descriptor[hybrid]>`, then provide the definitions of the descriptors by the items in the `list`,
+
 ```json
         "descriptor" :{
             "type": "hybrid",
@@ -37,6 +44,7 @@ To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model/descr
 ```
 
 A complete training input script of this example can be found in the directory
+
 ```bash
 $deepmd_source_dir/examples/water/hybrid/input.json
 ```
diff --git a/doc/model/train-se-a-mask.md b/doc/model/train-se-a-mask.md
index 17c211ec73..6757fbefbd 100644
--- a/doc/model/train-se-a-mask.md
+++ b/doc/model/train-se-a-mask.md
@@ -1,5 +1,8 @@
-# Descriptor `"se_a_mask"`
+# Descriptor `"se_a_mask"` {{ tensorflow_icon }}
 
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 Descriptor `se_a_mask` is a concise implementation of the descriptor `se_e2_a`,
 but functions slightly differently.
@@ -15,15 +18,19 @@ which is also the origin of the name `se_a_mask`.
 In this example, we will train a DP Mask model for zinc protein interactions.
 The input systems are the collection of zinc and its coordinates residues.
 A sample input system that contains 2 frames is included in the directory.
+
 ```bash
 $deepmd_source_dir/examples/zinc_protein/data_dp_mask
 ```
+
 A complete training input script of this example can be found in the directory.
+
 ```bash
 $deepmd_source_dir/examples/zinc_protein/zinc_se_a_mask.json
 ```
 
 The construction of the descriptor is given by section {ref}`descriptor <model/descriptor>`. An example of the descriptor is provided as follows
+
 ```json
 	"descriptor" :{
 	    "type":	"se_a_mask",
@@ -35,15 +42,17 @@ The construction of the descriptor is given by section {ref}`descriptor <model/d
 	    "seed":		1
 	}
 ```
-* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_a_mask"`.
-* {ref}`sel <model/descriptor[se_a_mask]/sel>` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`.
-* The {ref}`neuron <model/descriptor[se_a_mask]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* The {ref}`axis_neuron <model/descriptor[se_a_mask]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-* If the option {ref}`type_one_side <model/descriptor[se_a_mask]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
-* If the option {ref}`resnet_dt <model/descriptor[se_a_mask]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-* {ref}`seed <model/descriptor[se_a_mask]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+
+- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_a_mask"`.
+- {ref}`sel <model/descriptor[se_a_mask]/sel>` gives the maximum number of atoms in input coordinates. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum number of atoms with type `i`.
+- The {ref}`neuron <model/descriptor[se_a_mask]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- The {ref}`axis_neuron <model/descriptor[se_a_mask]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`type_one_side <model/descriptor[se_a_mask]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
+- If the option {ref}`resnet_dt <model/descriptor[se_a_mask]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model/descriptor[se_a_mask]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitting_net` section are needed.
+
 ```json
 	"fitting_net" :{
 	    "neuron": [240, 240, 240],
@@ -53,14 +62,16 @@ To make the `aparam.npy` used for descriptor `se_a_mask`, two variables in `fitt
       	"use_aparam_as_mask": true
 	}
 ```
-* `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net <model/fitting_net[ener]>` section for fitting energy.
-* {ref}`numb_aparam <model/fitting_net[ener]/numb_aparam>` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
-* {ref}`use_aparam_as_mask <model/fitting_net[ener]/use_aparam_as_mask>` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`.
+
+- `neuron`, `resnet_dt` and `seed` are the same as the {ref}`fitting_net <model/fitting_net[ener]>` section for fitting energy.
+- {ref}`numb_aparam <model/fitting_net[ener]/numb_aparam>` gives the dimesion of the `aparam.npy` file. In this example, it is set to 1 and stores the real/virtual sign of the atoms. For real/virtual atoms, the corresponding sign in `aparam.npy` is set to 1/0.
+- {ref}`use_aparam_as_mask <model/fitting_net[ener]/use_aparam_as_mask>` is set to `true` to use the `aparam.npy` as the mask of the atoms in the descriptor `se_a_mask`.
 
 Finally, to make a reasonable fitting task with `se_a_mask` descriptor for DP/MM simulations, the loss function with `se_a_mask` is designed to include the atomic forces difference in specific atoms of the input particles only.
 More details about the selection of the specific atoms can be found in paper [DP/MM](left to be filled).
 Thus, `atom_pref.npy` ( [ nframes * natoms ] ) is required as the indicator of the specific atoms in the input particles.
 And the `loss` section in the training input script should be set as follows.
+
 ```json
 "loss": {
     "type": "ener",
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 7480ddbc12..364d35805b 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -1,4 +1,8 @@
-# Descriptor `"se_atten"`
+# Descriptor `"se_atten"` {{ tensorflow_icon }} {{ pytorch_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+:::
 
 ## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation
 
@@ -15,43 +19,53 @@ Attention-based descriptor $\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}$, whic
 ```math
     \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<,
 ```
+
 where $\hat{\mathcal{G}}^i$ represents the embedding matrix $\mathcal{G}^i$ after additional self-attention mechanism and $\mathcal{R}^i$ is defined by the full case in the [`se_e2_a`](./train-se-e2-a.md).
 Note that we obtain $\mathcal{G}^i$ using the type embedding method by default in this descriptor.
 
 To perform the self-attention mechanism, the queries $\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, keys $\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, and values $\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}$ are first obtained:
+
 ```math
     \left(\mathcal{Q}^{i,l}\right)_{j}=Q_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
 ```
+
 ```math
     \left(\mathcal{K}^{i,l}\right)_{j}=K_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
 ```
+
 ```math
     \left(\mathcal{V}^{i,l}\right)_{j}=V_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right),
 ```
+
 where $Q_{l}$, $K_{l}$, $V_{l}$ represent three trainable linear transformations that output the queries and keys of dimension $d_k$ and values of dimension $d_v$, and $l$ is the index of the attention layer.
-The input embedding matrix to the attention layers,  denoted by $\mathcal{G}^{i,0}$, is chosen as the two-body embedding matrix.
+The input embedding matrix to the attention layers, denoted by $\mathcal{G}^{i,0}$, is chosen as the two-body embedding matrix.
 
 Then the scaled dot-product attention method is adopted:
+
 ```math
 A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})=\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right)\mathcal{V}^{i,l},
 ```
+
 where $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) \in \mathbb{R}^{N_c\times N_c}$ is attention weights.
 In the original attention method, one typically has $\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}\right)=\mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right)$, with $\sqrt{d_{k}}$ being the normalization temperature.
 This is slightly modified to incorporate the angular information:
+
 ```math
 \varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) = \mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right) \odot \hat{\mathcal{R}}^{i}(\hat{\mathcal{R}}^{i})^{T},
 ```
+
 where $\hat{\mathcal{R}}^{i} \in \mathbb{R}^{N_c\times 3}$ denotes normalized relative coordinates , $\hat{\mathcal{R}}^{i}_{j} = \frac{\boldsymbol{r}_{ij}}{\lVert \boldsymbol{r}_{ij} \lVert}$ and $\odot$ means element-wise multiplication.
 
 Then layer normalization is added in a residual way to finally obtain the self-attention local embedding matrix $\hat{\mathcal{G}}^{i} = \mathcal{G}^{i,L_a}$ after $L_a$ attention layers:[^1]
+
 ```math
 \mathcal{G}^{i,l} = \mathcal{G}^{i,l-1} + \mathrm{LayerNorm}(A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})).
 ```
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
-
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Introduction to new features of DPA-1
+
 Next, we will list the detailed settings in input.json and the data format, especially for large systems with dozens of elements. An example of DPA-1 input can be found [here](../../examples/water/se_atten/input.json).
 
 ### Descriptor `"se_atten"`
@@ -59,13 +73,20 @@ Next, we will list the detailed settings in input.json and the data format, espe
 The notation of `se_atten` is short for the smooth edition of Deep Potential with an attention mechanism.
 This descriptor was described in detail in [the DPA-1 paper](https://arxiv.org/abs/2208.08236) and the images above.
 
-In this example, we will train a DPA-1 model for a water system.  A complete training input script of this example can be found in the directory:
+In this example, we will train a DPA-1 model for a water system. A complete training input script of this example can be found in the directory:
+
 ```bash
 $deepmd_source_dir/examples/water/se_atten/input.json
 ```
+
 With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory.
 
 An example of the DPA-1 descriptor is provided as follows
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
 ```json
 	"descriptor" :{
           "type":		"se_atten",
@@ -82,33 +103,76 @@ An example of the DPA-1 descriptor is provided as follows
           "seed":	1
 	}
 ```
-* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures.
-* {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
-* **{ref}`sel <model/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
-* The {ref}`neuron <model/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-* If the option {ref}`resnet_dt <model/descriptor[se_atten]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-* {ref}`seed <model/descriptor[se_atten]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
-* {ref}`attn <model/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
-* {ref}`attn_layer <model/descriptor[se_atten]/attn_layer>` sets the number of layers in attention mechanism.
-* {ref}`attn_mask <model/descriptor[se_atten]/attn_mask>` determines whether to mask the diagonal in the attention weights and False is recommended.
-* {ref}`attn_dotr <model/descriptor[se_atten]/attn_dotr>` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended.
+
+- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures.
+- {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
+- **{ref}`sel <model/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
+- The {ref}`neuron <model/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`resnet_dt <model/descriptor[se_atten]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model/descriptor[se_atten]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+- {ref}`attn <model/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
+- {ref}`attn_layer <model/descriptor[se_atten]/attn_layer>` sets the number of layers in attention mechanism.
+- {ref}`attn_mask <model/descriptor[se_atten]/attn_mask>` determines whether to mask the diagonal in the attention weights and False is recommended.
+- {ref}`attn_dotr <model/descriptor[se_atten]/attn_dotr>` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended.
+
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```json
+	"descriptor" :{
+          "type":		"dpa1",
+          "rcut_smth":	0.50,
+          "rcut":		6.00,
+          "sel":		120,
+          "neuron":		[25, 50, 100],
+          "tebd_dim": 8,
+          "axis_neuron":	16,
+          "attn":	128,
+          "attn_layer":	2,
+          "attn_mask": false,
+          "attn_dotr": true,
+          "post_ln": true
+	}
+```
+
+- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"dpa1"`, which will use DPA-1 structures.
+- {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
+- **{ref}`sel <model/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
+- The {ref}`neuron <model/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- The {ref}`tebd_dim <model/descriptor[se_atten]/tebd_dim>` specifies the dimension of the type embedding.
+- The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- {ref}`attn <model/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
+- {ref}`attn_layer <model/descriptor[se_atten]/attn_layer>` sets the number of layers in attention mechanism.
+- {ref}`attn_mask <model/descriptor[se_atten]/attn_mask>` determines whether to mask the diagonal in the attention weights and False is recommended.
+- {ref}`attn_dotr <model/descriptor[se_atten]/attn_dotr>` determines whether to dot the relative coordinates on the attention weights as a gated scheme, True is recommended.
+- {ref}`post_ln <model/descriptor[se_atten]/post_ln>` determines whether to perform post layer norm.
+
+:::
+
+::::
 
 ### Descriptor `"se_atten_v2"`
+
 We highly recommend using the version 2.0 of the attention-based descriptor `"se_atten_v2"`, which is inherited from `"se_atten"` but with the following parameter modifications:
+
 ```json
       "stripped_type_embedding": true,
       "smooth_type_embdding": true,
       "set_davg_zero": false
 ```
-Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
+Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
 ### Fitting `"ener"`
+
 DPA-1 only supports `"ener"` fitting type, and you can refer [here](train-energy.md) for detailed information.
 
 ### Type embedding
+
 DPA-1 only supports models with type embeddings. And the default setting is as follows:
+
 ```json
 "type_embedding":{
             "neuron":           [8],
@@ -116,11 +180,13 @@ DPA-1 only supports models with type embeddings. And the default setting is as f
             "seed":             1
         }
 ```
-You can add these settings in input.json if you want to change the default ones, see [here](train-se-e2-a-tebd.md) for detailed information.
 
+You can add these settings in input.json if you want to change the default ones, see [here](train-se-e2-a-tebd.md) for detailed information.
 
 ### Type map
+
 For training large systems, especially those with dozens of elements, the {ref}`type <model/type_map>` determines the element index of training data:
+
 ```json
 "type_map": [
    "Mg",
@@ -128,8 +194,11 @@ For training large systems, especially those with dozens of elements, the {ref}`
    "Cu"
   ]
 ```
+
 which should include all the elements in the dataset you want to train on.
+
 ## Data format
+
 DPA-1 supports the standard data format, which is detailed in [data-conv.md](../data/data-conv.md) and [system.md](../data/system.md).
 Note that in this format, only those frames with the same fingerprint (i.e. the number of atoms of different elements) can be put together as a unified system.
 This may lead to sparse frame numbers in those rare systems.
@@ -137,6 +206,7 @@ This may lead to sparse frame numbers in those rare systems.
 An ideal way is to put systems with the same total number of atoms together, which is the way we trained DPA-1 on [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md).
 This system format, which is called `mixed_type`, is proper to put frame-sparse systems together and is slightly different from the standard one.
 Take an example, a `mixed_type` may contain the following files:
+
 ```
 type.raw
 type_map.raw
@@ -146,13 +216,14 @@ set.*/energy.npy
 set.*/force.npy
 set.*/real_atom_types.npy
 ```
+
 This system contains `Nframes` frames with the same atom number `Natoms`, the total number of element types contained in all frames is `Ntypes`. Most files are the same as those in [standard formats](../data/system.md), here we only list the distinct ones:
 
-ID             | Property                         | File                | Required/Optional    | Shape                    | Description
-----------     | -------------------------------- | ------------------- | -------------------- | -----------------------  | -----------
-/              | Atom type indexes (place holder) | type.raw            | Required             | Natoms                   | All zeros to fake the type input
-type_map       | Atom type names                  | type_map.raw        | Required             | Ntypes                   | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table
-type           | Atom type indexes of each frame  | real_atom_types.npy | Required             | Nframes \* Natoms        | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms.
+| ID       | Property                         | File                | Required/Optional | Shape             | Description                                                                                                              |
+| -------- | -------------------------------- | ------------------- | ----------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| /        | Atom type indexes (place holder) | type.raw            | Required          | Natoms            | All zeros to fake the type input                                                                                         |
+| type_map | Atom type names                  | type_map.raw        | Required          | Ntypes            | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table |
+| type     | Atom type indexes of each frame  | real_atom_types.npy | Required          | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms.         |
 
 With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor.
 
@@ -161,6 +232,7 @@ To put frames with different `Natoms` into the same system, one can pad systems
 The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience.
 
 ## Training example
+
 Here we upload the AlMgCu example shown in the paper, you can download it here:
 [Baidu disk](https://pan.baidu.com/s/1Mk9CihPHCmf8quwaMhT-nA?pwd=d586);
 [Google disk](https://drive.google.com/file/d/11baEpRrvHoqxORFPSdJiGWusb3Y4AnRE/view?usp=sharing).
diff --git a/doc/model/train-se-e2-a-tebd.md b/doc/model/train-se-e2-a-tebd.md
index cb6ce6674f..a6291bb238 100644
--- a/doc/model/train-se-e2-a-tebd.md
+++ b/doc/model/train-se-e2-a-tebd.md
@@ -1,4 +1,8 @@
-# Type embedding approach
+# Type embedding approach {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 We generate specific a type embedding vector for each atom type so that we can share one descriptor embedding net and one fitting net in total, which decline training complexity largely.
 
@@ -12,6 +16,7 @@ Usually, when the type embedding approach is not enabled, for a system with mult
     (\mathcal{G}^i)_j = \mathcal{N}^{\alpha_i, \alpha_j}_{e,2}(s(r_{ij})) \quad \mathrm{or}\quad
     (\mathcal{G}^i)_j = \mathcal{N}^{ \alpha_j}_{e,2}(s(r_{ij})),
 ```
+
 ```math
     (\mathcal{G}^i)_{jk} =\mathcal{N}^{\alpha_j, \alpha_k}_{e,3}((\theta_i)_{jk}).
 ```
@@ -24,6 +29,7 @@ The limitation of this approach is that when there are large numbers of chemical
 Similar to the embedding networks, if the type embedding approach is not used, the fitting network parameters are chemical-species-wise, and there are $N_t$ sets of fitting network parameters.
 For performance, atoms are sorted by their chemical species $\alpha_i$ in advance.
 Take an example, the atomic energy $E_i$ is represented as follows:
+
 ```math
 E_i=\mathcal{F}_0^{\alpha_i}(\mathcal{D}^i).
 ```
@@ -42,21 +48,25 @@ The type embeddings of central and neighboring atoms $\mathcal{A}^i$ and $\mathc
     (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^i, \mathcal{A}^j\})  \quad \mathrm{or}\quad
     (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^j\}) ,
 ```
+
 ```math
     (\mathcal{G}^i)_{jk} =\mathcal{N}_{e,3}(\{(\theta_i)_{jk}, \mathcal{A}^j, \mathcal{A}^k\}).
 ```
 
 In fitting networks, the type embedding is inserted into the input of the fitting networks:
+
 ```math
 E_i=\mathcal{F}_0(\{\mathcal{D}^i, \mathcal{A}^i\}).
 ```
 
 In this way, all chemical species share the same network parameters through the type embedding.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
+
 The {ref}`model <model>` defines how the model is constructed, adding a section of type embedding net:
+
 ```json
     "model": {
 	"type_map":	["O", "H"],
@@ -71,9 +81,11 @@ The {ref}`model <model>` defines how the model is constructed, adding a section
 	}
     }
 ```
+
 The model will automatically apply the type embedding approach and generate type embedding vectors. If the type embedding vector is detected, the descriptor and fitting net would take it as a part of the input.
 
 The construction of type embedding net is given by {ref}`type_embedding <model/type_embedding>`. An example of {ref}`type_embedding <model/type_embedding>` is provided as follows
+
 ```json
 	"type_embedding":{
 	    "neuron":		[2, 4, 8],
@@ -81,15 +93,17 @@ The construction of type embedding net is given by {ref}`type_embedding <model/t
 	    "seed":		1
 	}
 ```
-* The {ref}`neuron <model/type_embedding/neuron>` specifies the size of the type embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. It takes a one-hot vector as input and output dimension equals to the last dimension of the {ref}`neuron <model/type_embedding/neuron>` list. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* If the option {ref}`resnet_dt <model/type_embedding/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-* {ref}`seed <model/type_embedding/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
+- The {ref}`neuron <model/type_embedding/neuron>` specifies the size of the type embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. It takes a one-hot vector as input and output dimension equals to the last dimension of the {ref}`neuron <model/type_embedding/neuron>` list. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- If the option {ref}`resnet_dt <model/type_embedding/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model/type_embedding/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 A complete training input script of this example can be found in the directory.
+
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a_tebd/input.json
 ```
+
 See [here](../development/type-embedding.md) for further explanation of `type embedding`.
 
 :::{note}
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index 537253a6d9..2412bbc64e 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -1,4 +1,8 @@
-# Descriptor `"se_e2_a"`
+# Descriptor `"se_e2_a"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+:::
 
 The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The `e2` stands for the embedding with two-atoms information. This descriptor was described in detail in [the DeepPot-SE paper](https://arxiv.org/abs/1805.09003).
 
@@ -39,10 +43,10 @@ where $\boldsymbol{r}_{ij}=\boldsymbol{r}_j-\boldsymbol{r}_i = (x_{ij}, y_{ij},
     \end{cases}
 ```
 
-where $x=\frac{r - r_s}{ r_c - r_s}$  switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
+where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
 The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous.
 
-Each row of the embedding matrix  $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
+Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
 
 ```math
     (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})),
@@ -54,17 +58,20 @@ $\mathcal{G}^i_< \in \mathbb{R}^{N_c \times M_<}$ only takes first $M_<$ columns
 $r_s$, $r_c$, $M$ and $M_<$ are hyperparameters provided by the user.
 The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
-In this example, we will train a DeepPot-SE model for a water system.  A complete training input script of this example can be found in the directory.
+In this example, we will train a DeepPot-SE model for a water system. A complete training input script of this example can be found in the directory.
+
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a/input.json
 ```
+
 With the training input script, data are also provided in the example directory. One may train the model with the DeePMD-kit from the directory.
 
 The construction of the descriptor is given by section {ref}`descriptor <model/descriptor>`. An example of the descriptor is provided as follows
+
 ```json
 	"descriptor" :{
 	    "type":		"se_e2_a",
@@ -78,11 +85,12 @@ The construction of the descriptor is given by section {ref}`descriptor <model/d
 	    "seed":		1
 	}
 ```
-* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_e2_a"`.
-* {ref}`rcut <model/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts.
-* {ref}`sel <model/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`.
-* The {ref}`neuron <model/descriptor[se_e2_a]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* If the option {ref}`type_one_side <model/descriptor[se_e2_a]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
-* The {ref}`axis_neuron <model/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
-* If the option {ref}`resnet_dt <model/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
-* {ref}`seed <model/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
+
+- The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_e2_a"`.
+- {ref}`rcut <model/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts.
+- {ref}`sel <model/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`.
+- The {ref}`neuron <model/descriptor[se_e2_a]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+- If the option {ref}`type_one_side <model/descriptor[se_e2_a]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
+- The {ref}`axis_neuron <model/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
+- If the option {ref}`resnet_dt <model/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
+- {ref}`seed <model/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
index f2f990b16a..f427310196 100644
--- a/doc/model/train-se-e2-r.md
+++ b/doc/model/train-se-e2-r.md
@@ -1,4 +1,8 @@
-# Descriptor `"se_e2_r"`
+# Descriptor `"se_e2_r"` {{ tensorflow_icon }} {{ pytorch_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }}
+:::
 
 The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information.
 
@@ -14,7 +18,7 @@ where
 $N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames.
 A matrix with a dimension of $N_c$ will be padded if the number of neighboring atoms is less than $N_c$.
 
-Each row of the embedding matrix  $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
+Each row of the embedding matrix $\mathcal{G}^i \in \mathbb{R}^{N_c \times M}$ consists of $M$ nodes from the output layer of an NN function $\mathcal{N}_ {g}$ of $s(r_{ij})$:
 
 ```math
     (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})),
@@ -31,23 +35,25 @@ where $\boldsymbol{r}_ {ij}=\boldsymbol{r}_ j-\boldsymbol{r}_ i = (x_{ij}, y_{ij
     \end{cases}
 ```
 
-where $x=\frac{r - r_s}{ r_c - r_s}$  switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
+where $x=\frac{r - r_s}{ r_c - r_s}$ switches from 1 at $r_s$ to 0 at the cutoff radius $r_c$.
 The switching function $s(r)$ is smooth in the sense that the second-order derivative is continuous.
 
 In the above equations, the network parameters are not explicitly written.
 $r_s$, $r_c$ and $M$ are hyperparameters provided by the user.
 The DeepPot-SE is continuous up to the second-order derivative in its domain.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 A complete training input script of this example can be found in the directory
+
 ```bash
 $deepmd_source_dir/examples/water/se_e2_r/input.json
 ```
 
 The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the {ref}`descriptor <model/descriptor>` section
+
 ```json
 	"descriptor": {
 	    "type":		"se_e2_r",
@@ -55,9 +61,11 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 	    "rcut_smth":	0.50,
 	    "rcut":		6.00,
 	    "neuron":		[5, 10, 20],
+        "type_one_side": true,
 	    "resnet_dt":	false,
 	    "seed":		1,
 	    "_comment": " that's all"
 	},
 ```
+
 The type of the descriptor is set by the key {ref}`type <model/descriptor/type>`.
diff --git a/doc/model/train-se-e3.md b/doc/model/train-se-e3.md
index 5b0710a389..3a0c1a9547 100644
--- a/doc/model/train-se-e3.md
+++ b/doc/model/train-se-e3.md
@@ -1,13 +1,19 @@
-# Descriptor `"se_e3"`
+# Descriptor `"se_e3"` {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 The notation of `se_e3` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from all information (both angular and radial) of atomic configurations. The embedding takes bond angles between a central atom and its two neighboring atoms as input (denoted by `e3`).
 
 ## Theory
 
 The three-body embedding DeepPot-SE descriptor incorporates bond-angle information, making the model more accurate. The descriptor $\mathcal{D}^i$ can be represented as
+
 ```math
     \mathcal{D}^i = \frac{1}{N_c^2}(\mathcal{R}^i(\mathcal{R}^i)^T):\mathcal{G}^i,
 ```
+
 where
 $N_c$ is the expected maximum number of neighboring atoms, which is the same constant for all atoms over all frames.
 $\mathcal{R}^i$ is constructed as
@@ -20,6 +26,7 @@ $\mathcal{R}^i$ is constructed as
     \end{array}
     \},
 ```
+
 Currently, only the full information case of $\mathcal{R}^i$ is supported by the three-body embedding.
 Each element of $\mathcal{G}^i \in \mathbb{R}^{N_c \times N_c \times M}$ comes from $M$ nodes from the output layer of an NN $\mathcal{N}_{e,3}$ function:
 
@@ -30,16 +37,18 @@ Each element of $\mathcal{G}^i \in \mathbb{R}^{N_c \times N_c \times M}$ comes f
 where $(\theta_i)_ {jk} = (\mathcal{R}^i)_ {j,\\{2,3,4\\}}\cdot (\mathcal{R}^i)_ {k,\\{2,3,4\\}}$ considers the angle form of two neighbours ($j$ and $k$).
 The notation $:$ in the equation indicates the contraction between matrix $\mathcal{R}^i(\mathcal{R}^i)^T$ and the first two dimensions of tensor $\mathcal{G}^i$.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 A complete training input script of this example can be found in the directory
+
 ```bash
 $deepmd_source_dir/examples/water/se_e3/input.json
 ```
 
 The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.md). The only difference lies in the `descriptor <model/descriptor>` section
+
 ```json
 	"descriptor": {
 	    "type":		"se_e3",
@@ -52,4 +61,5 @@ The training input script is very similar to that of [`se_e2_a`](train-se-e2-a.m
 	    "_comment":		" that's all"
 	},
 ```
+
 The type of the descriptor is set by the key {ref}`type <model/descriptor/type>`.
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index c11fee0bc9..67cfb5e22d 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -1,4 +1,8 @@
-# Introduction
+# Introduction {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 NVNMD stands for non-von Neumann molecular dynamics.
 
@@ -29,7 +33,6 @@ where `$dataset` is the path to the data set and `$workspace` is the path to the
 
 Create and go to the training directory.
 
-
 ```bash
 mkdir train
 cd train
@@ -46,10 +49,10 @@ The structure of the input script is as follows
 
 ```json
 {
-    "nvnmd" : {},
-    "learning_rate" : {},
-    "loss" : {},
-    "training": {}
+  "nvnmd": {},
+  "learning_rate": {},
+  "loss": {},
+  "training": {}
 }
 ```
 
@@ -59,29 +62,30 @@ The "nvnmd" section is defined as
 
 ```json
 {
-    "version": 0,
-    "max_nnei":128,
-    "net_size":128,
-    "sel":[60, 60],
-    "rcut":6.0,
-    "rcut_smth":0.5,
-    "type_map": ["Ge", "Te"]
+  "version": 0,
+  "max_nnei": 128,
+  "net_size": 128,
+  "sel": [60, 60],
+  "rcut": 6.0,
+  "rcut_smth": 0.5,
+  "type_map": ["Ge", "Te"]
 }
 ```
 
 where items are defined as:
 
-| Item      | Mean                        | Optional Value                                |
-| --------- | --------------------------- | --------------------------------------------- |
-| version | the version of network structure | 0 or 1 |
-| max_nnei  | the maximum number of neighbors that do not distinguish element types | 128  or 256 |
-| net_size  | the size of nueral network  | 128                                     |
-| sel       | the number of neighbors     | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer |
-| rcut      | the cutoff radial           | (0, 8.0]                                      |
-| rcut_smth | the smooth cutoff parameter | (0, 8.0]                                      |
-| type_map | mapping atom type to the name (str) of the type | string list, optional |
+| Item      | Mean                                                                  | Optional Value                                                               |
+| --------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
+| version   | the version of network structure                                      | 0 or 1                                                                       |
+| max_nnei  | the maximum number of neighbors that do not distinguish element types | 128 or 256                                                                   |
+| net_size  | the size of nueral network                                            | 128                                                                          |
+| sel       | the number of neighbors                                               | version 0: integer list of lengths 1 to 4 are acceptable; version 1: integer |
+| rcut      | the cutoff radial                                                     | (0, 8.0]                                                                     |
+| rcut_smth | the smooth cutoff parameter                                           | (0, 8.0]                                                                     |
+| type_map  | mapping atom type to the name (str) of the type                       | string list, optional                                                        |
 
 Multiple versions of the nvnmd model correspond to different network structures. `nvnmd-v0` and `nvnmd-v1` differ in the following ways:
+
 1. `nvnmd-v0` and `nvnmd-v1` use the `se_a` descriptor and `se_atten` descriptor, respectively
 2. `nvnmd-v0` has 1 set of parameters for each element and supports up to 4 element types. `nvnmd-v1` shares 1 set of parameters for each element and supports up to 31 types.
 3. `nvnmd-v0` distinguishes between neighboring atoms, so `sel` is a list of integers. `nvnmd-v1` does not distinguish between neighboring atoms, so `sel` is an integer.
@@ -92,20 +96,20 @@ The "learning_rate" section is defined as
 
 ```json
 {
-    "type":"exp",
-    "start_lr": 1e-3,
-    "stop_lr": 3e-8,
-    "decay_steps": 5000
+  "type": "exp",
+  "start_lr": 1e-3,
+  "stop_lr": 3e-8,
+  "decay_steps": 5000
 }
 ```
 
 where items are defined as:
 
-| Item        | Mean                                                         | Optional Value         |
-| ----------- | ------------------------------------------------------------ | ---------------------- |
-| type        | learning rate variant type                                   | exp                    |
-| start_lr    | the learning rate at the beginning of the training           | a positive real number |
-| stop_lr     | the desired learning rate at the end of the training         | a positive real number |
+| Item        | Mean                                                             | Optional Value         |
+| ----------- | ---------------------------------------------------------------- | ---------------------- |
+| type        | learning rate variant type                                       | exp                    |
+| start_lr    | the learning rate at the beginning of the training               | a positive real number |
+| stop_lr     | the desired learning rate at the end of the training             | a positive real number |
 | decay_stops | the learning rate is decaying every {decay_stops} training steps | a positive integer     |
 
 ### loss
@@ -114,12 +118,12 @@ The "loss" section is defined as
 
 ```json
 {
-    "start_pref_e": 0.02,
-    "limit_pref_e": 2,
-    "start_pref_f": 1000,
-    "limit_pref_f": 1,
-    "start_pref_v": 0,
-    "limit_pref_v": 0
+  "start_pref_e": 0.02,
+  "limit_pref_e": 2,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0
 }
 ```
 
@@ -141,17 +145,17 @@ The "training" section is defined as
 ```json
 {
   "seed": 1,
-    "stop_batch": 1000000,
-    "numb_test": 1,
-    "disp_file": "lcurve.out",
-    "disp_freq": 1000,
-    "save_ckpt": "model.ckpt",
-    "save_freq": 10000,
-    "training_data":{
-      "systems":["system1_path", "system2_path", "..."],
-      "set_prefix": "set",
-      "batch_size": ["batch_size_of_system1", "batch_size_of_system2", "..."]
-    }
+  "stop_batch": 1000000,
+  "numb_test": 1,
+  "disp_file": "lcurve.out",
+  "disp_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "save_freq": 10000,
+  "training_data": {
+    "systems": ["system1_path", "system2_path", "..."],
+    "set_prefix": "set",
+    "batch_size": ["batch_size_of_system1", "batch_size_of_system2", "..."]
+  }
 }
 ```
 
@@ -185,20 +189,19 @@ After the training process, you will get two folders: `nvnmd_cnn` and `nvnmd_qnn
 
 You can also restart the CNN training from the path prefix of checkpoint files (`nvnmd_cnn/model.ckpt`) by
 
-``` bash
+```bash
 dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1
 ```
 
 You can also initialize the CNN model and train it by
 
-``` bash
+```bash
 mv nvnmd_cnn nvnmd_cnn_bck
 cp train_cnn.json train_cnn2.json
 # please edit train_cnn2.json
 dp train-nvnmd train_cnn2.json -s s1 -i nvnmd_cnn_bck/model.ckpt
 ```
 
-
 # Testing
 
 The frozen model can be used in many ways. The most straightforward testing can be invoked by
@@ -211,6 +214,7 @@ dp test -m ./nvnmd_qnn/frozen_model.pb -s path/to/system -d ./test/detail -n 999
 where the frozen model file to import is given via the `-m` command line flag, the path to the testing data set is given via the `-s` command line flag, and the file containing details of energy, forces and virials accuracy is given via the `-d` command line flag, the amount of data for testing is given via the `-n` command line flag.
 
 # Running MD in Bohrium
+
 After CNN and QNN training, you can upload the ML model to our online NVNMD system and run MD there through Bohrium (https://bohrium.dp.tech). Bohrium is a research platfrom designed for AI for Science Era. For more information, please refer to [Bohrium Introduction](https://bohrium-doc.dp.tech/en/docs/WhatIsBohrium/).
 
 ## Registration
@@ -247,30 +251,30 @@ Then you need prepare the configuration file `job.json`, the configuration file
 
 ```json
 {
-    "job_name": "test",
-    "command": "/usr/bin/lmp_mpi < in.lmp;",
-    "log_file": "OUTCAR",
-    "machine_type": "c4_m16_cpu",
-    "job_type": "container",
-    "image_name": "lammps_dp:29Sep2021",
-    "platform": "hnugba",
-    "region": "default",
-    "project_id": 0000
+  "job_name": "test",
+  "command": "/usr/bin/lmp_mpi < in.lmp;",
+  "log_file": "OUTCAR",
+  "machine_type": "c4_m16_cpu",
+  "job_type": "container",
+  "image_name": "lammps_dp:29Sep2021",
+  "platform": "hnugba",
+  "region": "default",
+  "project_id": 0000
 }
 ```
 
 where items are defined as:
 
-| Item         | Mean                                                                                                                       | Optional Value |
-| ------------ | -------------------------------------------------------------------------------------------------------------------------- | -------------- |
-| job_name     | the name of computing job, which can be named freely                                                                       | a string       |
-| command      | the command to be executed on the computing node                                                                           | a string       |
-| log_file     | the log file that can be viewed at any time during the calculation process, which can be viewed on the Bohrium "Jobs" page | a string       |
-| machine_type | the machine type used for the job                                                                                          | "c1_m4_cpu", "c4_m16_cpu", "c8_m32_cpu"   |
-| job_type     | the job type                                                                                                               | "container"    |
-| image_name   | the image name used for the job                                                                                            | "lammps_dp:29Sep2021"|
-| platform     | resource provider                                                                                                          | "hnugba"          |
-| project_id   | the project ID to which the job belongs, which can be viewed on the "Projects" page                                        | a integer      |
+| Item         | Mean                                                                                                                       | Optional Value                          |
+| ------------ | -------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
+| job_name     | the name of computing job, which can be named freely                                                                       | a string                                |
+| command      | the command to be executed on the computing node                                                                           | a string                                |
+| log_file     | the log file that can be viewed at any time during the calculation process, which can be viewed on the Bohrium "Jobs" page | a string                                |
+| machine_type | the machine type used for the job                                                                                          | "c1_m4_cpu", "c4_m16_cpu", "c8_m32_cpu" |
+| job_type     | the job type                                                                                                               | "container"                             |
+| image_name   | the image name used for the job                                                                                            | "lammps_dp:29Sep2021"                   |
+| platform     | resource provider                                                                                                          | "hnugba"                                |
+| project_id   | the project ID to which the job belongs, which can be viewed on the "Projects" page                                        | a integer                               |
 
 Notice：The task will use 4 CPU cores for computation, so do not repeatedly use the `mpirun` command, otherwise an error will be reported. All 0000 after "project_id" need to be replaced with your own project ID, which can be viewed on the "Projects" page. Also, the JSON file format requires that no commas be added after the last field within the {}, otherwise, there will be a syntax error. Please check the [documentation](https://github.com/LiuGroupHNU/md-data/blob/master/code/doc/mdpu/hardware.md) for the latest hardware configuration information.
 
diff --git a/doc/sphinx_contrib_exhale_multiproject.py b/doc/sphinx_contrib_exhale_multiproject.py
index e05cf88ba2..e26cc158a4 100644
--- a/doc/sphinx_contrib_exhale_multiproject.py
+++ b/doc/sphinx_contrib_exhale_multiproject.py
@@ -103,11 +103,11 @@ def exhale_environment_ready(app):
         app.config.exhale_args["containmentFolder"] = os.path.realpath(
             app.config.exhale_args["containmentFolder"]
         )
-        print("=" * 75)
-        print(project)
-        print("-" * 50)
-        pprint(app.config.exhale_args)
-        print("=" * 75)
+        print("=" * 75)  # noqa: T201
+        print(project)  # noqa: T201
+        print("-" * 50)  # noqa: T201
+        pprint(app.config.exhale_args)  # noqa: T203
+        print("=" * 75)  # noqa: T201
 
         # First, setup the extension and verify all of the configurations.
         exhale.configs.apply_sphinx_configurations(app)
diff --git a/doc/test/index.md b/doc/test/index.md
deleted file mode 100644
index 4a502123d9..0000000000
--- a/doc/test/index.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Test
-
-- [Test a model](test.md)
-- [Calculate Model Deviation](model-deviation.md)
diff --git a/doc/test/model-deviation.md b/doc/test/model-deviation.md
index a59696c5ee..441d1aabc6 100644
--- a/doc/test/model-deviation.md
+++ b/doc/test/model-deviation.md
@@ -6,50 +6,61 @@ Model deviation $\epsilon_y$ is the standard deviation of properties $\boldsymbo
 The DeePMD-kit supports $\boldsymbol y$ to be the atomic force $\boldsymbol F_i$ and the virial tensor $\boldsymbol \Xi$.
 The model deviation is used to estimate the error of a model at a certain data frame, denoted by $\boldsymbol x$, containing the coordinates and chemical species of all atoms.
 We present the model deviation of the atomic force and the virial tensor
+
 ```math
     \epsilon_{\boldsymbol{F},i} (\boldsymbol x)=
     \sqrt{\langle \lVert \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k)-\langle \boldsymbol F_i(\boldsymbol x; \boldsymbol \theta_k) \rangle \rVert^2 \rangle},
 ```
+
 ```math
     \epsilon_{\boldsymbol{\Xi},{\alpha \beta}} (\boldsymbol x)=
     \frac{1}{N} \sqrt{\langle ( {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k)-\langle {\Xi}_{\alpha \beta}(\boldsymbol x; \boldsymbol \theta_k) \rangle )^2 \rangle},
 ```
+
 where $\boldsymbol \theta_k$ is the parameters of the model $\mathcal M_k$, and the ensemble average $\langle\cdot\rangle$ is estimated by
+
 ```math
     \langle \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k) \rangle
     =
     \frac{1}{n_m} \sum_{k=1}^{n_m} \boldsymbol y(\boldsymbol x; \boldsymbol \theta_k).
 ```
+
 Small $\epsilon_{\boldsymbol{F},i}$ means the model has learned the given data; otherwise, it is not covered, and the training data needs to be expanded.
 If the magnitude of $\boldsymbol F_i$ or $\boldsymbol \Xi$ is quite large,
 a relative model deviation $\epsilon_{\boldsymbol{F},i,\text{rel}}$ or $\epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}}$ can be used instead of the absolute model deviation:
+
 ```math
     \epsilon_{\boldsymbol{F},i,\text{rel}}  (\boldsymbol x)
     =
     \frac{\lvert \epsilon_{\boldsymbol{F},i} (\boldsymbol x) \lvert}
     {\lvert \langle \boldsymbol F_i (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu},
 ```
+
 ```math
     \epsilon_{\boldsymbol{\Xi},\alpha\beta,\text{rel}}  (\boldsymbol x)
     =
     \frac{ \epsilon_{\boldsymbol{\Xi},\alpha\beta} (\boldsymbol x) }
     {\lvert \langle \boldsymbol \Xi (\boldsymbol x; \boldsymbol \theta_k) \rangle \lvert + \nu},
 ```
+
 where $\nu$ is a small constant used to protect
 an atom where the magnitude of $\boldsymbol{F}_i$ or $\boldsymbol{\Xi}$ is small from having a large model deviation.
 
 Statistics of $\epsilon_{\boldsymbol{F},i}$ and $\epsilon_{\boldsymbol{\Xi},{\alpha \beta}}$ can be provided, including the maximum, average, and minimal model deviation over the atom index $i$ and over the component index $\alpha,\beta$, respectively.
 The maximum model deviation of forces $\epsilon_{\boldsymbol F,\text{max}}$ in a frame was found to be the best error indicator in a concurrent or active learning algorithm.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Instructions
 
 One can also use a subcommand to calculate the deviation of predicted forces or virials for a bunch of models in the following way:
+
 ```bash
 dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out
 ```
+
 where `-m` specifies graph files to be calculated, `-s` gives the data to be evaluated, `-o` the file to which model deviation results is dumped. Here is more information on this sub-command:
+
 ```bash
 usage: dp model-devi [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}]
                      [-l LOG_PATH] [-m MODELS [MODELS ...]] [-s SYSTEM]
diff --git a/doc/test/test.md b/doc/test/test.md
index c206e8d777..251a12c7e2 100644
--- a/doc/test/test.md
+++ b/doc/test/test.md
@@ -1,14 +1,19 @@
 # Test a model
 
 The frozen model can be used in many ways. The most straightforward test can be performed using `dp test`. A typical usage of `dp test` is
+
 ```bash
 dp test -m graph.pb -s /path/to/system -n 30
 ```
+
 where `-m` gives the tested model, `-s` the path to the tested system and `-n` the number of tested frames. Several other command line options can be passed to `dp test`, which can be checked with
+
 ```bash
 $ dp test --help
 ```
+
 An explanation will be provided
+
 ```
 usage: dp test [-h] [-m MODEL] [-s SYSTEM] [-S SET_PREFIX] [-n NUMB_TEST]
                [-r RAND_SEED] [--shuffle-test] [-d DETAIL_FILE]
diff --git a/doc/third-party/ase.md b/doc/third-party/ase.md
index ac65fc926e..76371a3197 100644
--- a/doc/third-party/ase.md
+++ b/doc/third-party/ase.md
@@ -1,6 +1,7 @@
 # Use deep potential with ASE
 
 Deep potential can be set up as a calculator with ASE to obtain potential energies and forces.
+
 ```python
 from ase import Atoms
 from deepmd.calculator import DP
@@ -16,6 +17,7 @@ print(water.get_forces())
 ```
 
 Optimization is also available:
+
 ```python
 from ase.optimize import BFGS
 
diff --git a/doc/third-party/dpdata.md b/doc/third-party/dpdata.md
new file mode 100644
index 0000000000..05e0f6fb40
--- /dev/null
+++ b/doc/third-party/dpdata.md
@@ -0,0 +1,12 @@
+# Use deep potential with dpdata
+
+DeePMD-kit provides a driver for [dpdata](https://github.com/deepmodeling/dpdata) >=0.2.7 via the plugin mechanism, making it possible to call the `predict` method for `System` class:
+
+```py
+import dpdata
+
+dsys = dpdata.LabeledSystem("OUTCAR")
+dp_sys = dsys.predict("frozen_model_compressed.pb", driver="dp")
+```
+
+By inferring with the DP model `frozen_model_compressed.pb`, dpdata will generate a new labeled system `dp_sys` with inferred energies, forces, and virials.
diff --git a/doc/third-party/gromacs.md b/doc/third-party/gromacs.md
index 672fb693b9..c9779611e7 100644
--- a/doc/third-party/gromacs.md
+++ b/doc/third-party/gromacs.md
@@ -1,10 +1,15 @@
 # Running MD with GROMACS
+
 ## DP/MM Simulation
+
 This part gives a simple tutorial on how to run a DP/MM simulation for methane in water, which means using DP for methane and TIP3P for water. All relevant files can be found in `examples/methane`.
+
 ### Topology Preparation
+
 Similar to QM/MM simulation, the internal interactions (including bond, angle, dihedrals, LJ, Columb) of the region described by a neural network potential (NNP) have to be **turned off**. In GROMACS, bonded interactions can be turned off by modifying `[ bonds ]`, `[ angles ]`, `[ dihedrals ]` and `[ pairs ]` sections. And LJ and Columb interactions must be turned off by `[ exclusions ]` section.
 
 For example, if one wants to simulate ethane in water, using DeepPotential for methane and TIP3P for water, the topology of methane should be like the following (as presented in `examples/methane/methane.itp`):
+
 ```
 [ atomtypes ]
 ;name btype  mass  charge ptype    sigma  epsilon
@@ -38,7 +43,9 @@ For example, if one wants to simulate ethane in water, using DeepPotential for m
   4    1    2    3    5
   5    1    2    3    4
 ```
+
 For comparison, the original topology file generated by `acpype` will be:
+
 ```
 ; methane_GMX.itp created by acpype (v: 2021-02-05T22:15:50CET) on Wed Sep  8 01:21:53 2021
 
@@ -75,45 +82,60 @@ For comparison, the original topology file generated by `acpype` will be:
      3      1      5      1    1.0758e+02    3.2635e+02 ;     H2 - C1     - H4
      4      1      5      1    1.0758e+02    3.2635e+02 ;     H3 - C1     - H4
 ```
+
 ### DeepMD Settings
+
 Before running simulations, we need to tell GROMACS to use DeepPotential by setting the environment variable `GMX_DEEPMD_INPUT_JSON`:
+
 ```bash
 export GMX_DEEPMD_INPUT_JSON=input.json
 ```
+
 Then, in your working directories, we have to write `input.json` file:
+
 ```json
 {
-    "graph_file": "/path/to/graph.pb",
-    "type_file": "type.raw",
-    "index_file": "index.raw",
-    "lambda": 1.0,
-    "pbc": false
+  "graph_file": "/path/to/graph.pb",
+  "type_file": "type.raw",
+  "index_file": "index.raw",
+  "lambda": 1.0,
+  "pbc": false
 }
 ```
+
 Here is an explanation for these settings:
-+ `graph_file` : The graph file (with suffix .pb) generated by `dp freeze` command
-+ `type_file` : File to specify DP atom types (in space-separated format). Here, `type.raw` looks like
+
+- `graph_file` : The graph file (with suffix .pb) generated by `dp freeze` command
+- `type_file` : File to specify DP atom types (in space-separated format). Here, `type.raw` looks like
+
 ```
 1 0 0 0 0
 ```
-+ `index_file` : File containing indices of DP atoms (in space-separated format), which should be consistent with the indices' order in .gro file but **starting from zero**. Here, `index.raw` looks like
+
+- `index_file` : File containing indices of DP atoms (in space-separated format), which should be consistent with the indices' order in .gro file but **starting from zero**. Here, `index.raw` looks like
+
 ```
 0 1 2 3 4
 ```
-+ `lambda`: Optional, default 1.0. Used in alchemical calculations.
-+ `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeepMD.
+
+- `lambda`: Optional, default 1.0. Used in alchemical calculations.
+- `pbc`: Optional, default true. If true, the GROMACS periodic condition is passed to DeepMD.
 
 ### Run Simulation
+
 Finally, you can run GROMACS using `gmx mdrun` as usual.
 
 ## All-atom DP Simulation
+
 This part gives an example of how to simulate all atoms described by a DeepPotential with Gromacs, taking water as an example. Instead of using `[ exclusions ]` to turn off the non-bonded energies, we can simply do this by setting LJ parameters (i.e. epsilon and sigma) and partial charges to 0, as shown in `examples/water/gmx/water.top`:
+
 ```
 [ atomtypes ]
 ; name      at.num  mass     charge ptype  sigma      epsilon
 HW           1       1.008   0.0000  A   0.00000e+00  0.00000e+00
 OW           8      16.00    0.0000  A   0.00000e+00  0.00000e+00
 ```
+
 As mentioned in the above section, `input.json` and relevant files (`index.raw`, `type.raw`) should also be created. Then, we can start the simulation under the NVT ensemble and plot the radial distribution function (RDF) by `gmx rdf` command. We can see that the RDF given by Gromacs+DP matches perfectly with Lammps+DP, which further provides an evidence on the validity of our simulation.
 ![rdf](../../examples/water/gmx/rdf.png)
 
diff --git a/doc/third-party/index.md b/doc/third-party/index.md
deleted file mode 100644
index 235337974c..0000000000
--- a/doc/third-party/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Integrate with third-party packages
-
-Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details.
-
-- [Use deep potential with ASE](ase.md)
-- [Run MD with LAMMPS](lammps-command.md)
-- [Run path-integral MD with i-PI](ipi.md)
-- [Run MD with GROMACS](gromacs.md)
-- [Interfaces out of DeePMD-kit](out-of-deepmd-kit.md)
diff --git a/doc/third-party/index.rst b/doc/third-party/index.rst
index f88a477fc7..cd0726a4bb 100644
--- a/doc/third-party/index.rst
+++ b/doc/third-party/index.rst
@@ -6,6 +6,7 @@ Note that the model for inference is required to be compatible with the DeePMD-k
 .. toctree::
    :maxdepth: 1
 
+   dpdata
    ase
    lammps-command
    ipi
diff --git a/doc/third-party/ipi.md b/doc/third-party/ipi.md
index 59decdf3bb..84a972d885 100644
--- a/doc/third-party/ipi.md
+++ b/doc/third-party/ipi.md
@@ -1,30 +1,36 @@
 # Run path-integral MD with i-PI
+
 The i-PI works in a client-server model. The i-PI provides the server for integrating the replica positions of atoms, while the DeePMD-kit provides a client named `dp_ipi` that computes the interactions (including energy, forces and virials). The server and client communicate via the Unix domain socket or the Internet socket. Installation instructions for i-PI can be found [here](../install/install-ipi.md). The client can be started by
+
 ```bash
 i-pi input.xml &
 dp_ipi water.json
 ```
+
 It is noted that multiple instances of the client allow for computing, in parallel, the interactions of multiple replicas of the path-integral MD.
 
 `water.json` is the parameter file for the client `dp_ipi`, and an example is provided:
+
 ```json
 {
-    "verbose":		false,
-    "use_unix":		true,
-    "port":		31415,
-    "host":		"localhost",
-    "graph_file":	"graph.pb",
-    "coord_file":	"conf.xyz",
-    "atom_type" : {
-	"OW":		0,
-	"HW1":		1,
-	"HW2":		1
-    }
+  "verbose": false,
+  "use_unix": true,
+  "port": 31415,
+  "host": "localhost",
+  "graph_file": "graph.pb",
+  "coord_file": "conf.xyz",
+  "atom_type": {
+    "OW": 0,
+    "HW1": 1,
+    "HW2": 1
+  }
 }
 ```
+
 The option **`use_unix`** is set to `true` to activate the Unix domain socket, otherwise, the Internet socket is used.
 
 The option **`port`** should be the same as that in input.xml:
+
 ```xml
 <port>31415</port>
 ```
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 150d755795..63f9d8e3bd 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -1,6 +1,7 @@
 # Run MD with LAMMPS
 
 ## units
+
 All units in LAMMPS except `lj` are supported. `lj` is not supported.
 
 The most commonly used units are `metal`, since the internal units of distance, energy, force, and charge in DeePMD-kit are `\AA`, `eV`, `eV / \AA`, and `proton charge`, respectively. These units are consistent with the `metal` units in LAMMPS.
@@ -34,11 +35,12 @@ The DeePMD-kit package provides the pair_style `deepmd`
 ```lammps
 pair_style deepmd models ... keyword value ...
 ```
+
 - deepmd = style of this pair_style
 - models = frozen model(s) to compute the interaction.
-If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics,
-and the model deviation will be computed among all models every `out_freq` timesteps.
-- keyword = *out_file* or *out_freq* or *fparam* or *fparam_from_compute* or *aparam_from_compute* or *atomic* or *relative* or *relative_v* or *aparam* or *ttm*
+  If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics,
+  and the model deviation will be computed among all models every `out_freq` timesteps.
+- keyword = _out_file_ or _out_freq_ or _fparam_ or _fparam_from_compute_ or _aparam_from_compute_ or _atomic_ or _relative_ or _relative_v_ or _aparam_ or _ttm_
 <pre>
     <i>out_file</i> value = filename
         filename = The file name for the model deviation output. Default is model_devi.out
@@ -63,6 +65,7 @@ and the model deviation will be computed among all models every `out_freq` times
 </pre>
 
 ### Examples
+
 ```lammps
 pair_style deepmd graph.pb
 pair_style deepmd graph.pb fparam 1.2
@@ -77,6 +80,7 @@ compute    1 all ke/atom
 ```
 
 ### Description
+
 Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Potential Smooth Edition][DP-SE]. It is noticed that deep potential is not a "pairwise" interaction, but a multi-body interaction.
 
 This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit), which can have either double or single float precision interface.
@@ -107,8 +111,8 @@ If the training parameter {ref}`type_map <model/type_map>` is not set, atom name
 Spin is specified by keywords `virtual_len` and `spin_norm`. If the keyword `virtual_len` is set, the distance between virtual atom and its corresponding real atom for each type of magnetic atoms will be fed to the model as the spin parameters. If the keyword `spin_norm` is set, the magnitude of the magnetic moment for each type of magnetic atoms will be fed to the model as the spin parameters.
 
 ### Restrictions
-- The `deepmd` pair style is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information.
 
+- The `deepmd` pair style is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information.
 
 ## Compute tensorial properties
 
@@ -117,6 +121,7 @@ The DeePMD-kit package provides the compute `deeptensor/atom` for computing atom
 ```lammps
 compute ID group-ID deeptensor/atom model_file
 ```
+
 - ID: user-assigned name of the computation
 - group-ID: ID of the group of atoms to compute
 - deeptensor/atom: the style of this compute
@@ -125,27 +130,33 @@ compute ID group-ID deeptensor/atom model_file
 At this time, the training parameter {ref}`type_map <model/type_map>` will be mapped to LAMMPS atom types.
 
 ### Examples
+
 ```lammps
 compute         dipole all deeptensor/atom dipole.pb
 ```
+
 The result of the compute can be dumped to trajectory file by
+
 ```lammps
 dump            1 all custom 100 water.dump id type c_dipole[1] c_dipole[2] c_dipole[3]
 ```
 
 ### Restrictions
+
 - The `deeptensor/atom` compute is provided in the USER-DEEPMD package, which is compiled from the DeePMD-kit, visit the [DeePMD-kit website](https://github.com/deepmodeling/deepmd-kit) for more information.
 - For the issue of using a unit style for `compute deeptensor/atom`, refer to the discussions in [units](#units) of this page.
 
-
 ## Long-range interaction
+
 The reciprocal space part of the long-range interaction can be calculated by LAMMPS command `kspace_style`. To use it with DeePMD-kit, one writes
+
 ```lammps
 pair_style	deepmd graph.pb
 pair_coeff  * *
 kspace_style	pppm 1.0e-5
 kspace_modify	gewald 0.45
 ```
+
 Please notice that the DeePMD does nothing to the direct space part of the electrostatic interaction, because this part is assumed to be fitted in the DeePMD model (the direct space cut-off is thus the cut-off of the DeePMD model). The splitting parameter `gewald` is modified by the `kspace_modify` command.
 
 ## Use of the centroid/stress/atom to get the full 3x3 "atomic-virial"
@@ -157,9 +168,11 @@ $$dvatom=-\sum_{m}( \mathbf{r}_n- \mathbf{r}_m) \frac{de_m}{d\mathbf{r}_n}$$
 Where $\mathbf{r}_n$ is the atomic position of nth atom, $\mathbf{v}_n$ velocity of the atom and $\frac{de_m}{d\mathbf{r}_n}$ the derivative of the atomic energy.
 
 In LAMMPS one can get the per-atom stress using the command `centroid/stress/atom`:
+
 ```lammps
 compute ID group-ID centroid/stress/atom NULL virial
 ```
+
 see [LAMMPS doc page](https://docs.lammps.org/compute_stress_atom.html#thompson2) for more details on the meaning of the keywords.
 
 :::{versionchanged} v2.2.3
@@ -167,20 +180,25 @@ v2.2.2 or previous versions passed per-atom stress (`cvatom`) with the per-atom
 :::
 
 ### Examples
+
 In order of computing the 9-component per-atom stress
+
 ```lammps
 compute stress all centroid/stress/atom NULL virial
 ```
+
 Thus `c_stress` is an array with 9 components in the order `xx,yy,zz,xy,xz,yz,yx,zx,zy`.
 
 If you use this feature please cite [D. Tisi, L. Zhang, R. Bertossa, H. Wang, R. Car, S. Baroni - arXiv preprint arXiv:2108.10850, 2021](https://arxiv.org/abs/2108.10850)
 
 ## Computation of heat flux
+
 Using a per-atom stress tensor one can, for example, compute the heat flux defined as:
 
 $$\mathbf J = \sum_n e_n \mathbf v_n + \sum_{n,m} ( \mathbf r_m- \mathbf r_n) \frac{de_m}{d\mathbf r_n} \mathbf v_n$$
 
 to compute the heat flux with LAMMPS:
+
 ```lammps
 compute ke_ID all ke/atom
 compute pe_ID all pe/atom
@@ -196,10 +214,10 @@ compute pe all pe/atom
 compute stress all centroid/stress/atom NULL virial
 compute flux all heat/flux ke pe stress
 ```
+
 `c_flux` is a global vector of length 6. The first three components are the $x$, $y$ and $z$ components of the full heat flux vector. The others are the components of the so-called convective portion, see [LAMMPS doc page](https://docs.lammps.org/compute_heat_flux.html) for more detailes.
 
 If you use these features please cite [D. Tisi, L. Zhang, R. Bertossa, H. Wang, R. Car, S. Baroni - arXiv preprint arXiv:2108.10850, 2021](https://arxiv.org/abs/2108.10850)
 
-
-[DP]:https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001
-[DP-SE]:https://dl.acm.org/doi/10.5555/3327345.3327356
+[DP]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001
+[DP-SE]: https://dl.acm.org/doi/10.5555/3327345.3327356
diff --git a/doc/third-party/out-of-deepmd-kit.md b/doc/third-party/out-of-deepmd-kit.md
index 71dc9adb23..3eb722c040 100644
--- a/doc/third-party/out-of-deepmd-kit.md
+++ b/doc/third-party/out-of-deepmd-kit.md
@@ -2,19 +2,6 @@
 
 The codes of the following interfaces are not a part of the DeePMD-kit package and maintained by other repositories. We list these interfaces here for user convenience.
 
-## dpdata
-
-[dpdata](https://github.com/deepmodeling/dpdata) provides the `predict` method for `System` class:
-
-```py
-import dpdata
-
-dsys = dpdata.LabeledSystem("OUTCAR")
-dp_sys = dsys.predict("frozen_model_compressed.pb")
-```
-
-By inferring with the DP model `frozen_model_compressed.pb`, dpdata will generate a new labeled system `dp_sys` with inferred energies, forces, and virials.
-
 ## OpenMM plugin for DeePMD-kit
 
 An [OpenMM](https://github.com/openmm/openmm) plugin is provided from [JingHuangLab/openmm_deepmd_plugin](https://github.com/JingHuangLab/openmm_deepmd_plugin), written by the [Huang Lab](http://www.compbiophysics.org/) at Westlake University.
@@ -24,6 +11,7 @@ An [OpenMM](https://github.com/openmm/openmm) plugin is provided from [JingHuang
 Starting from [AmberTools24](https://ambermd.org/), `sander` includes an interface to the DeePMD-kit, which implements the [Deep Potential Range Corrected (DPRc) correction](../model/dprc.md).
 The DPRc model and the interface were developed by the [York Lab](https://theory.rutgers.edu/) from Rutgers University.
 More details are available in
+
 - [Amber Reference Manuals](https://ambermd.org/Manuals.php), providing documentation for how to enable the interface and the `&dprc` namelist;
 - [GitLab RutgersLBSR/AmberDPRc](https://gitlab.com/RutgersLBSR/AmberDPRc/), providing examples mdin files;
 - [DP-Amber](https://github.com/njzjz/dpamber/), a tiny tool to convert Amber trajectory to DPRc training data;
diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst
deleted file mode 100644
index a3b69eade9..0000000000
--- a/doc/train-input-auto.rst
+++ /dev/null
@@ -1,1502 +0,0 @@
-.. _`model`:
-
-model:
-    | type: ``dict``
-    | argument path: ``model``
-
-    .. _`model/type_map`:
-
-    type_map:
-        | type: ``list``, optional
-        | argument path: ``model/type_map``
-
-        A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment.
-
-    .. _`model/data_stat_nbatch`:
-
-    data_stat_nbatch:
-        | type: ``int``, optional, default: ``10``
-        | argument path: ``model/data_stat_nbatch``
-
-        The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics.
-
-    .. _`model/data_stat_protect`:
-
-    data_stat_protect:
-        | type: ``float``, optional, default: ``0.01``
-        | argument path: ``model/data_stat_protect``
-
-        Protect parameter for atomic energy regression.
-
-    .. _`model/use_srtab`:
-
-    use_srtab:
-        | type: ``str``, optional
-        | argument path: ``model/use_srtab``
-
-        The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
-
-    .. _`model/smin_alpha`:
-
-    smin_alpha:
-        | type: ``float``, optional
-        | argument path: ``model/smin_alpha``
-
-        The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
-
-    .. _`model/sw_rmin`:
-
-    sw_rmin:
-        | type: ``float``, optional
-        | argument path: ``model/sw_rmin``
-
-        The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
-
-    .. _`model/sw_rmax`:
-
-    sw_rmax:
-        | type: ``float``, optional
-        | argument path: ``model/sw_rmax``
-
-        The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
-
-    .. _`model/type_embedding`:
-
-    type_embedding:
-        | type: ``dict``, optional
-        | argument path: ``model/type_embedding``
-
-        The type embedding.
-
-        .. _`model/type_embedding/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[2, 4, 8]``
-            | argument path: ``model/type_embedding/neuron``
-
-            Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
-
-        .. _`model/type_embedding/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/type_embedding/activation_function``
-
-            The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/type_embedding/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/type_embedding/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/type_embedding/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/type_embedding/precision``
-
-            The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/type_embedding/trainable`:
-
-        trainable:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/type_embedding/trainable``
-
-            If the parameters in the embedding net are trainable
-
-        .. _`model/type_embedding/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/type_embedding/seed``
-
-            Random seed for parameter initialization
-
-    .. _`model/descriptor`:
-
-    descriptor:
-        | type: ``dict``
-        | argument path: ``model/descriptor``
-
-        The descriptor of atomic environment.
-
-
-        Depending on the value of *type*, different sub args are accepted.
-
-        .. _`model/descriptor/type`:
-
-        type:
-            | type: ``str`` (flag key)
-            | argument path: ``model/descriptor/type``
-            | possible choices: |code:model/descriptor[loc_frame]|_, |code:model/descriptor[se_e2_a]|_, |code:model/descriptor[se_e2_r]|_, |code:model/descriptor[se_e3]|_, |code:model/descriptor[se_a_tpe]|_, |code:model/descriptor[hybrid]|_
-
-            The type of the descritpor. See explanation below.
-
-            - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.
-
-            - `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.
-
-            - `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.
-
-            - `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.
-
-            - `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.
-
-            - `hybrid`: Concatenate of a list of descriptors as a new descriptor.
-
-            .. |code:model/descriptor[loc_frame]| replace:: ``loc_frame``
-            .. _`code:model/descriptor[loc_frame]`: `model/descriptor[loc_frame]`_
-            .. |code:model/descriptor[se_e2_a]| replace:: ``se_e2_a``
-            .. _`code:model/descriptor[se_e2_a]`: `model/descriptor[se_e2_a]`_
-            .. |code:model/descriptor[se_e2_r]| replace:: ``se_e2_r``
-            .. _`code:model/descriptor[se_e2_r]`: `model/descriptor[se_e2_r]`_
-            .. |code:model/descriptor[se_e3]| replace:: ``se_e3``
-            .. _`code:model/descriptor[se_e3]`: `model/descriptor[se_e3]`_
-            .. |code:model/descriptor[se_a_tpe]| replace:: ``se_a_tpe``
-            .. _`code:model/descriptor[se_a_tpe]`: `model/descriptor[se_a_tpe]`_
-            .. |code:model/descriptor[hybrid]| replace:: ``hybrid``
-            .. _`code:model/descriptor[hybrid]`: `model/descriptor[hybrid]`_
-
-        .. |flag:model/descriptor/type| replace:: *type*
-        .. _`flag:model/descriptor/type`: `model/descriptor/type`_
-
-
-        .. _`model/descriptor[loc_frame]`:
-
-        When |flag:model/descriptor/type|_ is set to ``loc_frame``:
-
-        .. _`model/descriptor[loc_frame]/sel_a`:
-
-        sel_a:
-            | type: ``list``
-            | argument path: ``model/descriptor[loc_frame]/sel_a``
-
-            A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor.
-
-        .. _`model/descriptor[loc_frame]/sel_r`:
-
-        sel_r:
-            | type: ``list``
-            | argument path: ``model/descriptor[loc_frame]/sel_r``
-
-            A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.
-
-        .. _`model/descriptor[loc_frame]/rcut`:
-
-        rcut:
-            | type: ``float``, optional, default: ``6.0``
-            | argument path: ``model/descriptor[loc_frame]/rcut``
-
-            The cut-off radius. The default value is 6.0
-
-        .. _`model/descriptor[loc_frame]/axis_rule`:
-
-        axis_rule:
-            | type: ``list``
-            | argument path: ``model/descriptor[loc_frame]/axis_rule``
-
-            A list of integers. The length should be 6 times of the number of types.
-
-            - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.
-
-            - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.
-
-            - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.
-
-            - axis_rule[i*6+3]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.
-
-            - axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.
-
-            - axis_rule[i*6+5]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.
-
-
-        .. _`model/descriptor[se_e2_a]`:
-
-        When |flag:model/descriptor/type|_ is set to ``se_e2_a`` (or its alias ``se_a``):
-
-        .. _`model/descriptor[se_e2_a]/sel`:
-
-        sel:
-            | type: ``list`` | ``str``, optional, default: ``auto``
-            | argument path: ``model/descriptor[se_e2_a]/sel``
-
-            This parameter set the number of selected neighbors for each type of atom. It can be:
-
-                - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
-
-                - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
-
-        .. _`model/descriptor[se_e2_a]/rcut`:
-
-        rcut:
-            | type: ``float``, optional, default: ``6.0``
-            | argument path: ``model/descriptor[se_e2_a]/rcut``
-
-            The cut-off radius.
-
-        .. _`model/descriptor[se_e2_a]/rcut_smth`:
-
-        rcut_smth:
-            | type: ``float``, optional, default: ``0.5``
-            | argument path: ``model/descriptor[se_e2_a]/rcut_smth``
-
-            Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
-
-        .. _`model/descriptor[se_e2_a]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[10, 20, 40]``
-            | argument path: ``model/descriptor[se_e2_a]/neuron``
-
-            Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
-
-        .. _`model/descriptor[se_e2_a]/axis_neuron`:
-
-        axis_neuron:
-            | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron*
-            | argument path: ``model/descriptor[se_e2_a]/axis_neuron``
-
-            Size of the submatrix of G (embedding matrix).
-
-        .. _`model/descriptor[se_e2_a]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/descriptor[se_e2_a]/activation_function``
-
-            The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/descriptor[se_e2_a]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_a]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/descriptor[se_e2_a]/type_one_side`:
-
-        type_one_side:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_a]/type_one_side``
-
-            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-
-        .. _`model/descriptor[se_e2_a]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/descriptor[se_e2_a]/precision``
-
-            The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/descriptor[se_e2_a]/trainable`:
-
-        trainable:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/descriptor[se_e2_a]/trainable``
-
-            If the parameters in the embedding net is trainable
-
-        .. _`model/descriptor[se_e2_a]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/descriptor[se_e2_a]/seed``
-
-            Random seed for parameter initialization
-
-        .. _`model/descriptor[se_e2_a]/exclude_types`:
-
-        exclude_types:
-            | type: ``list``, optional, default: ``[]``
-            | argument path: ``model/descriptor[se_e2_a]/exclude_types``
-
-            The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
-
-        .. _`model/descriptor[se_e2_a]/set_davg_zero`:
-
-        set_davg_zero:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_a]/set_davg_zero``
-
-            Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
-
-
-        .. _`model/descriptor[se_e2_r]`:
-
-        When |flag:model/descriptor/type|_ is set to ``se_e2_r`` (or its alias ``se_r``):
-
-        .. _`model/descriptor[se_e2_r]/sel`:
-
-        sel:
-            | type: ``list`` | ``str``, optional, default: ``auto``
-            | argument path: ``model/descriptor[se_e2_r]/sel``
-
-            This parameter set the number of selected neighbors for each type of atom. It can be:
-
-                - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
-
-                - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
-
-        .. _`model/descriptor[se_e2_r]/rcut`:
-
-        rcut:
-            | type: ``float``, optional, default: ``6.0``
-            | argument path: ``model/descriptor[se_e2_r]/rcut``
-
-            The cut-off radius.
-
-        .. _`model/descriptor[se_e2_r]/rcut_smth`:
-
-        rcut_smth:
-            | type: ``float``, optional, default: ``0.5``
-            | argument path: ``model/descriptor[se_e2_r]/rcut_smth``
-
-            Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
-
-        .. _`model/descriptor[se_e2_r]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[10, 20, 40]``
-            | argument path: ``model/descriptor[se_e2_r]/neuron``
-
-            Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
-
-        .. _`model/descriptor[se_e2_r]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/descriptor[se_e2_r]/activation_function``
-
-            The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/descriptor[se_e2_r]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_r]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/descriptor[se_e2_r]/type_one_side`:
-
-        type_one_side:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_r]/type_one_side``
-
-            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-
-        .. _`model/descriptor[se_e2_r]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/descriptor[se_e2_r]/precision``
-
-            The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/descriptor[se_e2_r]/trainable`:
-
-        trainable:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/descriptor[se_e2_r]/trainable``
-
-            If the parameters in the embedding net are trainable
-
-        .. _`model/descriptor[se_e2_r]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/descriptor[se_e2_r]/seed``
-
-            Random seed for parameter initialization
-
-        .. _`model/descriptor[se_e2_r]/exclude_types`:
-
-        exclude_types:
-            | type: ``list``, optional, default: ``[]``
-            | argument path: ``model/descriptor[se_e2_r]/exclude_types``
-
-            The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
-
-        .. _`model/descriptor[se_e2_r]/set_davg_zero`:
-
-        set_davg_zero:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e2_r]/set_davg_zero``
-
-            Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
-
-
-        .. _`model/descriptor[se_e3]`:
-
-        When |flag:model/descriptor/type|_ is set to ``se_e3`` (or its aliases ``se_at``, ``se_a_3be``, ``se_t``):
-
-        .. _`model/descriptor[se_e3]/sel`:
-
-        sel:
-            | type: ``list`` | ``str``, optional, default: ``auto``
-            | argument path: ``model/descriptor[se_e3]/sel``
-
-            This parameter set the number of selected neighbors for each type of atom. It can be:
-
-                - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
-
-                - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
-
-        .. _`model/descriptor[se_e3]/rcut`:
-
-        rcut:
-            | type: ``float``, optional, default: ``6.0``
-            | argument path: ``model/descriptor[se_e3]/rcut``
-
-            The cut-off radius.
-
-        .. _`model/descriptor[se_e3]/rcut_smth`:
-
-        rcut_smth:
-            | type: ``float``, optional, default: ``0.5``
-            | argument path: ``model/descriptor[se_e3]/rcut_smth``
-
-            Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
-
-        .. _`model/descriptor[se_e3]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[10, 20, 40]``
-            | argument path: ``model/descriptor[se_e3]/neuron``
-
-            Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
-
-        .. _`model/descriptor[se_e3]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/descriptor[se_e3]/activation_function``
-
-            The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/descriptor[se_e3]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e3]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/descriptor[se_e3]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/descriptor[se_e3]/precision``
-
-            The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/descriptor[se_e3]/trainable`:
-
-        trainable:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/descriptor[se_e3]/trainable``
-
-            If the parameters in the embedding net are trainable
-
-        .. _`model/descriptor[se_e3]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/descriptor[se_e3]/seed``
-
-            Random seed for parameter initialization
-
-        .. _`model/descriptor[se_e3]/set_davg_zero`:
-
-        set_davg_zero:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_e3]/set_davg_zero``
-
-            Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
-
-
-        .. _`model/descriptor[se_a_tpe]`:
-
-        When |flag:model/descriptor/type|_ is set to ``se_a_tpe`` (or its alias ``se_a_ebd``):
-
-        .. _`model/descriptor[se_a_tpe]/sel`:
-
-        sel:
-            | type: ``list`` | ``str``, optional, default: ``auto``
-            | argument path: ``model/descriptor[se_a_tpe]/sel``
-
-            This parameter set the number of selected neighbors for each type of atom. It can be:
-
-                - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
-
-                - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
-
-        .. _`model/descriptor[se_a_tpe]/rcut`:
-
-        rcut:
-            | type: ``float``, optional, default: ``6.0``
-            | argument path: ``model/descriptor[se_a_tpe]/rcut``
-
-            The cut-off radius.
-
-        .. _`model/descriptor[se_a_tpe]/rcut_smth`:
-
-        rcut_smth:
-            | type: ``float``, optional, default: ``0.5``
-            | argument path: ``model/descriptor[se_a_tpe]/rcut_smth``
-
-            Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
-
-        .. _`model/descriptor[se_a_tpe]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[10, 20, 40]``
-            | argument path: ``model/descriptor[se_a_tpe]/neuron``
-
-            Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
-
-        .. _`model/descriptor[se_a_tpe]/axis_neuron`:
-
-        axis_neuron:
-            | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron*
-            | argument path: ``model/descriptor[se_a_tpe]/axis_neuron``
-
-            Size of the submatrix of G (embedding matrix).
-
-        .. _`model/descriptor[se_a_tpe]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/descriptor[se_a_tpe]/activation_function``
-
-            The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/descriptor[se_a_tpe]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_a_tpe]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/descriptor[se_a_tpe]/type_one_side`:
-
-        type_one_side:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_a_tpe]/type_one_side``
-
-            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
-
-        .. _`model/descriptor[se_a_tpe]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/descriptor[se_a_tpe]/precision``
-
-            The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/descriptor[se_a_tpe]/trainable`:
-
-        trainable:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/descriptor[se_a_tpe]/trainable``
-
-            If the parameters in the embedding net is trainable
-
-        .. _`model/descriptor[se_a_tpe]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/descriptor[se_a_tpe]/seed``
-
-            Random seed for parameter initialization
-
-        .. _`model/descriptor[se_a_tpe]/exclude_types`:
-
-        exclude_types:
-            | type: ``list``, optional, default: ``[]``
-            | argument path: ``model/descriptor[se_a_tpe]/exclude_types``
-
-            The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
-
-        .. _`model/descriptor[se_a_tpe]/set_davg_zero`:
-
-        set_davg_zero:
-            | type: ``bool``, optional, default: ``False``
-            | argument path: ``model/descriptor[se_a_tpe]/set_davg_zero``
-
-            Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
-
-        .. _`model/descriptor[se_a_tpe]/type_nchanl`:
-
-        type_nchanl:
-            | type: ``int``, optional, default: ``4``
-            | argument path: ``model/descriptor[se_a_tpe]/type_nchanl``
-
-            number of channels for type embedding
-
-        .. _`model/descriptor[se_a_tpe]/type_nlayer`:
-
-        type_nlayer:
-            | type: ``int``, optional, default: ``2``
-            | argument path: ``model/descriptor[se_a_tpe]/type_nlayer``
-
-            number of hidden layers of type embedding net
-
-        .. _`model/descriptor[se_a_tpe]/numb_aparam`:
-
-        numb_aparam:
-            | type: ``int``, optional, default: ``0``
-            | argument path: ``model/descriptor[se_a_tpe]/numb_aparam``
-
-            dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded.
-
-
-        .. _`model/descriptor[hybrid]`:
-
-        When |flag:model/descriptor/type|_ is set to ``hybrid``:
-
-        .. _`model/descriptor[hybrid]/list`:
-
-        list:
-            | type: ``list``
-            | argument path: ``model/descriptor[hybrid]/list``
-
-            A list of descriptor definitions
-
-    .. _`model/fitting_net`:
-
-    fitting_net:
-        | type: ``dict``
-        | argument path: ``model/fitting_net``
-
-        The fitting of physical properties.
-
-
-        Depending on the value of *type*, different sub args are accepted.
-
-        .. _`model/fitting_net/type`:
-
-        type:
-            | type: ``str`` (flag key), default: ``ener``
-            | argument path: ``model/fitting_net/type``
-            | possible choices: |code:model/fitting_net[ener]|_, |code:model/fitting_net[dipole]|_, |code:model/fitting_net[polar]|_
-
-            The type of the fitting. See explanation below.
-
-            - `ener`: Fit an energy model (potential energy surface).
-
-            - `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.
-
-            - `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.
-
-
-
-            .. |code:model/fitting_net[ener]| replace:: ``ener``
-            .. _`code:model/fitting_net[ener]`: `model/fitting_net[ener]`_
-            .. |code:model/fitting_net[dipole]| replace:: ``dipole``
-            .. _`code:model/fitting_net[dipole]`: `model/fitting_net[dipole]`_
-            .. |code:model/fitting_net[polar]| replace:: ``polar``
-            .. _`code:model/fitting_net[polar]`: `model/fitting_net[polar]`_
-
-        .. |flag:model/fitting_net/type| replace:: *type*
-        .. _`flag:model/fitting_net/type`: `model/fitting_net/type`_
-
-
-        .. _`model/fitting_net[ener]`:
-
-        When |flag:model/fitting_net/type|_ is set to ``ener``:
-
-        .. _`model/fitting_net[ener]/numb_fparam`:
-
-        numb_fparam:
-            | type: ``int``, optional, default: ``0``
-            | argument path: ``model/fitting_net[ener]/numb_fparam``
-
-            The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.
-
-        .. _`model/fitting_net[ener]/numb_aparam`:
-
-        numb_aparam:
-            | type: ``int``, optional, default: ``0``
-            | argument path: ``model/fitting_net[ener]/numb_aparam``
-
-            The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.
-
-        .. _`model/fitting_net[ener]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
-            | argument path: ``model/fitting_net[ener]/neuron``
-
-            The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
-
-        .. _`model/fitting_net[ener]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/fitting_net[ener]/activation_function``
-
-            The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/fitting_net[ener]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/fitting_net[ener]/precision``
-
-            The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/fitting_net[ener]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[ener]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/fitting_net[ener]/trainable`:
-
-        trainable:
-            | type: ``list`` | ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[ener]/trainable``
-
-            Whether the parameters in the fitting net are trainable. This option can be
-
-            - bool: True if all parameters of the fitting net are trainable, False otherwise.
-
-            - list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1.
-
-        .. _`model/fitting_net[ener]/rcond`:
-
-        rcond:
-            | type: ``float``, optional, default: ``0.001``
-            | argument path: ``model/fitting_net[ener]/rcond``
-
-            The condition number used to determine the inital energy shift for each type of atoms.
-
-        .. _`model/fitting_net[ener]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/fitting_net[ener]/seed``
-
-            Random seed for parameter initialization of the fitting net
-
-        .. _`model/fitting_net[ener]/atom_ener`:
-
-        atom_ener:
-            | type: ``list``, optional, default: ``[]``
-            | argument path: ``model/fitting_net[ener]/atom_ener``
-
-            Specify the atomic energy in vacuum for each type
-
-
-        .. _`model/fitting_net[dipole]`:
-
-        When |flag:model/fitting_net/type|_ is set to ``dipole``:
-
-        .. _`model/fitting_net[dipole]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
-            | argument path: ``model/fitting_net[dipole]/neuron``
-
-            The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
-
-        .. _`model/fitting_net[dipole]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/fitting_net[dipole]/activation_function``
-
-            The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/fitting_net[dipole]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[dipole]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/fitting_net[dipole]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/fitting_net[dipole]/precision``
-
-            The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/fitting_net[dipole]/sel_type`:
-
-        sel_type:
-            | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *dipole_type*
-            | argument path: ``model/fitting_net[dipole]/sel_type``
-
-            The atom types for which the atomic dipole will be provided. If not set, all types will be selected.
-
-        .. _`model/fitting_net[dipole]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/fitting_net[dipole]/seed``
-
-            Random seed for parameter initialization of the fitting net
-
-
-        .. _`model/fitting_net[polar]`:
-
-        When |flag:model/fitting_net/type|_ is set to ``polar``:
-
-        .. _`model/fitting_net[polar]/neuron`:
-
-        neuron:
-            | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
-            | argument path: ``model/fitting_net[polar]/neuron``
-
-            The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
-
-        .. _`model/fitting_net[polar]/activation_function`:
-
-        activation_function:
-            | type: ``str``, optional, default: ``tanh``
-            | argument path: ``model/fitting_net[polar]/activation_function``
-
-            The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
-
-        .. _`model/fitting_net[polar]/resnet_dt`:
-
-        resnet_dt:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[polar]/resnet_dt``
-
-            Whether to use a "Timestep" in the skip connection
-
-        .. _`model/fitting_net[polar]/precision`:
-
-        precision:
-            | type: ``str``, optional, default: ``float64``
-            | argument path: ``model/fitting_net[polar]/precision``
-
-            The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
-
-        .. _`model/fitting_net[polar]/fit_diag`:
-
-        fit_diag:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[polar]/fit_diag``
-
-            Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.
-
-        .. _`model/fitting_net[polar]/scale`:
-
-        scale:
-            | type: ``float`` | ``list``, optional, default: ``1.0``
-            | argument path: ``model/fitting_net[polar]/scale``
-
-            The output of the fitting net (polarizability matrix) will be scaled by ``scale``
-
-        .. _`model/fitting_net[polar]/shift_diag`:
-
-        shift_diag:
-            | type: ``bool``, optional, default: ``True``
-            | argument path: ``model/fitting_net[polar]/shift_diag``
-
-            Whether to shift the diagonal of polar, which is beneficial to training. Default is true.
-
-        .. _`model/fitting_net[polar]/sel_type`:
-
-        sel_type:
-            | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *pol_type*
-            | argument path: ``model/fitting_net[polar]/sel_type``
-
-            The atom types for which the atomic polarizability will be provided. If not set, all types will be selected.
-
-        .. _`model/fitting_net[polar]/seed`:
-
-        seed:
-            | type: ``int`` | ``NoneType``, optional
-            | argument path: ``model/fitting_net[polar]/seed``
-
-            Random seed for parameter initialization of the fitting net
-
-    .. _`model/modifier`:
-
-    modifier:
-        | type: ``dict``, optional
-        | argument path: ``model/modifier``
-
-        The modifier of model output.
-
-
-        Depending on the value of *type*, different sub args are accepted.
-
-        .. _`model/modifier/type`:
-
-        type:
-            | type: ``str`` (flag key)
-            | argument path: ``model/modifier/type``
-            | possible choices: |code:model/modifier[dipole_charge]|_
-
-            The type of modifier. See explanation below.
-
-            -`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction
-
-            .. |code:model/modifier[dipole_charge]| replace:: ``dipole_charge``
-            .. _`code:model/modifier[dipole_charge]`: `model/modifier[dipole_charge]`_
-
-        .. |flag:model/modifier/type| replace:: *type*
-        .. _`flag:model/modifier/type`: `model/modifier/type`_
-
-
-        .. _`model/modifier[dipole_charge]`:
-
-        When |flag:model/modifier/type|_ is set to ``dipole_charge``:
-
-        .. _`model/modifier[dipole_charge]/model_name`:
-
-        model_name:
-            | type: ``str``
-            | argument path: ``model/modifier[dipole_charge]/model_name``
-
-            The name of the frozen dipole model file.
-
-        .. _`model/modifier[dipole_charge]/model_charge_map`:
-
-        model_charge_map:
-            | type: ``list``
-            | argument path: ``model/modifier[dipole_charge]/model_charge_map``
-
-            The charge of the WFCC. The list length should be the same as the `sel_type <model/fitting_net[dipole]/sel_type_>`_.
-
-        .. _`model/modifier[dipole_charge]/sys_charge_map`:
-
-        sys_charge_map:
-            | type: ``list``
-            | argument path: ``model/modifier[dipole_charge]/sys_charge_map``
-
-            The charge of real atoms. The list length should be the same as the `type_map <model/type_map_>`_
-
-        .. _`model/modifier[dipole_charge]/ewald_beta`:
-
-        ewald_beta:
-            | type: ``float``, optional, default: ``0.4``
-            | argument path: ``model/modifier[dipole_charge]/ewald_beta``
-
-            The splitting parameter of Ewald sum. Unit is A^-1
-
-        .. _`model/modifier[dipole_charge]/ewald_h`:
-
-        ewald_h:
-            | type: ``float``, optional, default: ``1.0``
-            | argument path: ``model/modifier[dipole_charge]/ewald_h``
-
-            The grid spacing of the FFT grid. Unit is A
-
-    .. _`model/compress`:
-
-    compress:
-        | type: ``dict``, optional
-        | argument path: ``model/compress``
-
-        Model compression configurations
-
-
-        Depending on the value of *type*, different sub args are accepted.
-
-        .. _`model/compress/type`:
-
-        type:
-            | type: ``str`` (flag key), default: ``se_e2_a``
-            | argument path: ``model/compress/type``
-            | possible choices: |code:model/compress[se_e2_a]|_
-
-            The type of model compression, which should be consistent with the descriptor type.
-
-            .. |code:model/compress[se_e2_a]| replace:: ``se_e2_a``
-            .. _`code:model/compress[se_e2_a]`: `model/compress[se_e2_a]`_
-
-        .. |flag:model/compress/type| replace:: *type*
-        .. _`flag:model/compress/type`: `model/compress/type`_
-
-
-        .. _`model/compress[se_e2_a]`:
-
-        When |flag:model/compress/type|_ is set to ``se_e2_a`` (or its alias ``se_a``):
-
-        .. _`model/compress[se_e2_a]/compress`:
-
-        compress:
-            | type: ``bool``
-            | argument path: ``model/compress[se_e2_a]/compress``
-
-            The name of the frozen model file.
-
-        .. _`model/compress[se_e2_a]/model_file`:
-
-        model_file:
-            | type: ``str``
-            | argument path: ``model/compress[se_e2_a]/model_file``
-
-            The input model file, which will be compressed by the DeePMD-kit.
-
-        .. _`model/compress[se_e2_a]/table_config`:
-
-        table_config:
-            | type: ``list``
-            | argument path: ``model/compress[se_e2_a]/table_config``
-
-            The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check).
-
-        .. _`model/compress[se_e2_a]/min_nbor_dist`:
-
-        min_nbor_dist:
-            | type: ``float``
-            | argument path: ``model/compress[se_e2_a]/min_nbor_dist``
-
-            The nearest distance between neighbor atoms saved in the frozen model.
-
-
-.. _`loss`:
-
-loss:
-    | type: ``dict``, optional
-    | argument path: ``loss``
-
-    The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset.
-    \.
-
-
-    Depending on the value of *type*, different sub args are accepted.
-
-    .. _`loss/type`:
-
-    type:
-        | type: ``str`` (flag key), default: ``ener``
-        | argument path: ``loss/type``
-        | possible choices: |code:loss[ener]|_, |code:loss[tensor]|_
-
-        The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`.
-        \.
-
-        .. |code:loss[ener]| replace:: ``ener``
-        .. _`code:loss[ener]`: `loss[ener]`_
-        .. |code:loss[tensor]| replace:: ``tensor``
-        .. _`code:loss[tensor]`: `loss[tensor]`_
-
-    .. |flag:loss/type| replace:: *type*
-    .. _`flag:loss/type`: `loss/type`_
-
-
-    .. _`loss[ener]`:
-
-    When |flag:loss/type|_ is set to ``ener``:
-
-    .. _`loss[ener]/start_pref_e`:
-
-    start_pref_e:
-        | type: ``float`` | ``int``, optional, default: ``0.02``
-        | argument path: ``loss[ener]/start_pref_e``
-
-        The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_energy and limit_pref_energy are set to 0, then the energy will be ignored.
-
-    .. _`loss[ener]/limit_pref_e`:
-
-    limit_pref_e:
-        | type: ``float`` | ``int``, optional, default: ``1.0``
-        | argument path: ``loss[ener]/limit_pref_e``
-
-        The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
-
-    .. _`loss[ener]/start_pref_f`:
-
-    start_pref_f:
-        | type: ``float`` | ``int``, optional, default: ``1000``
-        | argument path: ``loss[ener]/start_pref_f``
-
-        The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_force and limit_pref_force are set to 0, then the force will be ignored.
-
-    .. _`loss[ener]/limit_pref_f`:
-
-    limit_pref_f:
-        | type: ``float`` | ``int``, optional, default: ``1.0``
-        | argument path: ``loss[ener]/limit_pref_f``
-
-        The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
-
-    .. _`loss[ener]/start_pref_v`:
-
-    start_pref_v:
-        | type: ``float`` | ``int``, optional, default: ``0.0``
-        | argument path: ``loss[ener]/start_pref_v``
-
-        The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_virial and limit_pref_virial are set to 0, then the virial will be ignored.
-
-    .. _`loss[ener]/limit_pref_v`:
-
-    limit_pref_v:
-        | type: ``float`` | ``int``, optional, default: ``0.0``
-        | argument path: ``loss[ener]/limit_pref_v``
-
-        The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
-
-    .. _`loss[ener]/start_pref_ae`:
-
-    start_pref_ae:
-        | type: ``float`` | ``int``, optional, default: ``0.0``
-        | argument path: ``loss[ener]/start_pref_ae``
-
-        The prefactor of atom_ener loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the atom_ener label should be provided by file atom_ener.npy in each data system. If both start_pref_atom_ener and limit_pref_atom_ener are set to 0, then the atom_ener will be ignored.
-
-    .. _`loss[ener]/limit_pref_ae`:
-
-    limit_pref_ae:
-        | type: ``float`` | ``int``, optional, default: ``0.0``
-        | argument path: ``loss[ener]/limit_pref_ae``
-
-        The prefactor of atom_ener loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
-
-    .. _`loss[ener]/relative_f`:
-
-    relative_f:
-        | type: ``float`` | ``NoneType``, optional
-        | argument path: ``loss[ener]/relative_f``
-
-        If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label.
-
-
-    .. _`loss[tensor]`:
-
-    When |flag:loss/type|_ is set to ``tensor``:
-
-    .. _`loss[tensor]/pref`:
-
-    pref:
-        | type: ``float`` | ``int``
-        | argument path: ``loss[tensor]/pref``
-
-        The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included.
-
-    .. _`loss[tensor]/pref_atomic`:
-
-    pref_atomic:
-        | type: ``float`` | ``int``
-        | argument path: ``loss[tensor]/pref_atomic``
-
-        The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0.
-
-
-.. _`learning_rate`:
-
-learning_rate:
-    | type: ``dict``
-    | argument path: ``learning_rate``
-
-    The definitio of learning rate
-
-
-    Depending on the value of *type*, different sub args are accepted.
-
-    .. _`learning_rate/type`:
-
-    type:
-        | type: ``str`` (flag key), default: ``exp``
-        | argument path: ``learning_rate/type``
-        | possible choices: |code:learning_rate[exp]|_
-
-        The type of the learning rate.
-
-        .. |code:learning_rate[exp]| replace:: ``exp``
-        .. _`code:learning_rate[exp]`: `learning_rate[exp]`_
-
-    .. |flag:learning_rate/type| replace:: *type*
-    .. _`flag:learning_rate/type`: `learning_rate/type`_
-
-
-    .. _`learning_rate[exp]`:
-
-    When |flag:learning_rate/type|_ is set to ``exp``:
-
-    .. _`learning_rate[exp]/start_lr`:
-
-    start_lr:
-        | type: ``float``, optional, default: ``0.001``
-        | argument path: ``learning_rate[exp]/start_lr``
-
-        The learning rate the start of the training.
-
-    .. _`learning_rate[exp]/stop_lr`:
-
-    stop_lr:
-        | type: ``float``, optional, default: ``1e-08``
-        | argument path: ``learning_rate[exp]/stop_lr``
-
-        The desired learning rate at the end of the training.
-
-    .. _`learning_rate[exp]/decay_steps`:
-
-    decay_steps:
-        | type: ``int``, optional, default: ``5000``
-        | argument path: ``learning_rate[exp]/decay_steps``
-
-        The learning rate is decaying every this number of training steps.
-
-
-.. _`training`:
-
-training:
-    | type: ``dict``
-    | argument path: ``training``
-
-    The training options.
-
-    .. _`training/training_data`:
-
-    training_data:
-        | type: ``dict``
-        | argument path: ``training/training_data``
-
-        Configurations of training data.
-
-        .. _`training/training_data/systems`:
-
-        systems:
-            | type: ``list`` | ``str``
-            | argument path: ``training/training_data/systems``
-
-            The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
-
-        .. _`training/training_data/set_prefix`:
-
-        set_prefix:
-            | type: ``str``, optional, default: ``set``
-            | argument path: ``training/training_data/set_prefix``
-
-            The prefix of the sets in the `systems <training/training_data/systems_>`_.
-
-        .. _`training/training_data/batch_size`:
-
-        batch_size:
-            | type: ``list`` | ``int`` | ``str``, optional, default: ``auto``
-            | argument path: ``training/training_data/batch_size``
-
-            This key can be
-
-            - list: the length of which is the same as the `systems <training/training_data/systems_>`_. The batch size of each system is given by the elements of the list.
-
-            - int: all `systems <training/training_data/systems_>`_ use the same batch size.
-
-            - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
-
-            - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
-
-        .. _`training/training_data/auto_prob`:
-
-        auto_prob:
-            | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style*
-            | argument path: ``training/training_data/auto_prob``
-
-            Determine the probability of systems automatically. The method is assigned by this key and can be
-
-            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
-
-            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
-
-            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.
-
-        .. _`training/training_data/sys_probs`:
-
-        sys_probs:
-            | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights*
-            | argument path: ``training/training_data/sys_probs``
-
-            A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system.
-
-    .. _`training/validation_data`:
-
-    validation_data:
-        | type: ``dict`` | ``NoneType``, optional, default: ``None``
-        | argument path: ``training/validation_data``
-
-        Configurations of validation data. Similar to that of training data, except that a `numb_btch` argument may be configured
-
-        .. _`training/validation_data/systems`:
-
-        systems:
-            | type: ``list`` | ``str``
-            | argument path: ``training/validation_data/systems``
-
-            The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
-
-        .. _`training/validation_data/set_prefix`:
-
-        set_prefix:
-            | type: ``str``, optional, default: ``set``
-            | argument path: ``training/validation_data/set_prefix``
-
-            The prefix of the sets in the `systems <training/validation_data/systems_>`_.
-
-        .. _`training/validation_data/batch_size`:
-
-        batch_size:
-            | type: ``list`` | ``int`` | ``str``, optional, default: ``auto``
-            | argument path: ``training/validation_data/batch_size``
-
-            This key can be
-
-            - list: the length of which is the same as the `systems <training/validation_data/systems_>`_. The batch size of each system is given by the elements of the list.
-
-            - int: all `systems <training/validation_data/systems_>`_ use the same batch size.
-
-            - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
-
-            - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
-
-        .. _`training/validation_data/auto_prob`:
-
-        auto_prob:
-            | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style*
-            | argument path: ``training/validation_data/auto_prob``
-
-            Determine the probability of systems automatically. The method is assigned by this key and can be
-
-            - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()
-
-            - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system
-
-            - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.
-
-        .. _`training/validation_data/sys_probs`:
-
-        sys_probs:
-            | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights*
-            | argument path: ``training/validation_data/sys_probs``
-
-            A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system.
-
-        .. _`training/validation_data/numb_btch`:
-
-        numb_btch:
-            | type: ``int``, optional, default: ``1``, alias: *numb_batch*
-            | argument path: ``training/validation_data/numb_btch``
-
-            An integer that specifies the number of systems to be sampled for each validation period.
-
-    .. _`training/numb_steps`:
-
-    numb_steps:
-        | type: ``int``, alias: *stop_batch*
-        | argument path: ``training/numb_steps``
-
-        Number of training batch. Each training uses one batch of data.
-
-    .. _`training/seed`:
-
-    seed:
-        | type: ``int`` | ``NoneType``, optional
-        | argument path: ``training/seed``
-
-        The random seed for getting frames from the training data set.
-
-    .. _`training/disp_file`:
-
-    disp_file:
-        | type: ``str``, optional, default: ``lcurve.out``
-        | argument path: ``training/disp_file``
-
-        The file for printing learning curve.
-
-    .. _`training/disp_freq`:
-
-    disp_freq:
-        | type: ``int``, optional, default: ``1000``
-        | argument path: ``training/disp_freq``
-
-        The frequency of printing learning curve.
-
-    .. _`training/numb_test`:
-
-    numb_test:
-        | type: ``list`` | ``int`` | ``str``, optional, default: ``1``
-        | argument path: ``training/numb_test``
-
-        Number of frames used for the test during training.
-
-    .. _`training/save_freq`:
-
-    save_freq:
-        | type: ``int``, optional, default: ``1000``
-        | argument path: ``training/save_freq``
-
-        The frequency of saving check point.
-
-    .. _`training/save_ckpt`:
-
-    save_ckpt:
-        | type: ``str``, optional, default: ``model.ckpt``
-        | argument path: ``training/save_ckpt``
-
-        The file name of saving check point.
-
-    .. _`training/disp_training`:
-
-    disp_training:
-        | type: ``bool``, optional, default: ``True``
-        | argument path: ``training/disp_training``
-
-        Displaying verbose information during training.
-
-    .. _`training/time_training`:
-
-    time_training:
-        | type: ``bool``, optional, default: ``True``
-        | argument path: ``training/time_training``
-
-        Timing durining training.
-
-    .. _`training/profiling`:
-
-    profiling:
-        | type: ``bool``, optional, default: ``False``
-        | argument path: ``training/profiling``
-
-        Profiling during training.
-
-    .. _`training/profiling_file`:
-
-    profiling_file:
-        | type: ``str``, optional, default: ``timeline.json``
-        | argument path: ``training/profiling_file``
-
-        Output file for profiling.
-
-    .. _`training/tensorboard`:
-
-    tensorboard:
-        | type: ``bool``, optional, default: ``False``
-        | argument path: ``training/tensorboard``
-
-        Enable tensorboard
-
-    .. _`training/tensorboard_log_dir`:
-
-    tensorboard_log_dir:
-        | type: ``str``, optional, default: ``log``
-        | argument path: ``training/tensorboard_log_dir``
-
-        The log directory of tensorboard outputs
-
-    .. _`training/tensorboard_freq`:
-
-    tensorboard_freq:
-        | type: ``int``, optional, default: ``1``
-        | argument path: ``training/tensorboard_freq``
-
-        The frequency of writing tensorboard events.
diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md
index ebc7cda2c9..011db0bf9f 100644
--- a/doc/train/finetuning.md
+++ b/doc/train/finetuning.md
@@ -1,4 +1,8 @@
-# Finetune the pretrained model
+# Finetune the pretrained model {{ tensorflow_icon }} {{ pytorch_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+:::
 
 Pretraining-and-finetuning is a widely used approach in other fields such as Computer Vision (CV) or Natural Language Processing (NLP)
 to vastly reduce the training cost, while it's not trivial in potential models.
@@ -32,6 +36,7 @@ such as {ref}`descriptor <model/descriptor>`, {ref}`fitting_net <model/fitting_n
 However, you can still set the `trainable` parameters in each part of `input.json` to control the training procedure.
 
 To obtain a more simplified script, for example, you can change the {ref}`model <model>` part in `input.json` to perform finetuning:
+
 ```json
     "model": {
         "type_map":     ["O", "H"],
diff --git a/doc/train/gpu-limitations.md b/doc/train/gpu-limitations.md
index 5df76d28c9..92577fd65c 100644
--- a/doc/train/gpu-limitations.md
+++ b/doc/train/gpu-limitations.md
@@ -1,5 +1,7 @@
-# Known limitations of using GPUs
+# Known limitations of using GPUs {{ tensorflow_icon }}
+
 If you use DeePMD-kit in a GPU environment, the acceptable value range of some variables is additionally restricted compared to the CPU environment due to the software's GPU implementations:
+
 1. The number of atom types of a given system must be less than 128.
 2. The maximum distance between an atom and its neighbors must be less than 128. It can be controlled by setting the rcut value of training parameters.
 3. Theoretically, the maximum number of atoms that a single GPU can accept is about 10,000,000. However, this value is limited by the GPU memory size currently, usually within 1000,000 atoms even in the model compression mode.
diff --git a/doc/train/index.md b/doc/train/index.md
deleted file mode 100644
index f37c1a55ce..0000000000
--- a/doc/train/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Training
-
-- [Training a model](training.md)
-- [Advanced options](training-advanced.md)
-- [Parallel training](parallel-training.md)
-- [multi-task training](multi-task-training.md)
-- [TensorBoard Usage](tensorboard.md)
-- [Known limitations of using GPUs](gpu-limitations.md)
-- [Training Parameters](../train-input-auto.rst)
-- [Finetuning the Pretrained Model](finetuning.md)
diff --git a/doc/train/index.rst b/doc/train/index.rst
index 92e84b3000..78ee31e5cb 100644
--- a/doc/train/index.rst
+++ b/doc/train/index.rst
@@ -8,7 +8,8 @@ Training
    training-advanced
    train-input
    parallel-training
-   multi-task-training
+   multi-task-training-tf
+   multi-task-training-pt
    tensorboard
    gpu-limitations
    finetuning
diff --git a/doc/train/multi-task-training-pt.md b/doc/train/multi-task-training-pt.md
new file mode 100644
index 0000000000..284ecf9a27
--- /dev/null
+++ b/doc/train/multi-task-training-pt.md
@@ -0,0 +1,86 @@
+# Multi-task training {{ pytorch_icon }}
+
+:::{note}
+**Supported backends**: PyTorch {{ pytorch_icon }}
+:::
+
+<!-- we plan to drop TensorFlow backend multi-task training. Replace with the PyTorch one -->
+
+## Theory
+
+The multi-task training process can simultaneously handle different datasets with properties that cannot be fitted in one network (e.g. properties from DFT calculations under different exchange-correlation functionals or different basis sets).
+These datasets are denoted by $\boldsymbol x^{(1)}, \dots, \boldsymbol x^{(n_t)}$.
+For each dataset, a training task is defined as
+
+```math
+    \min_{\boldsymbol \theta}   L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol  \theta^{(t)}, \tau), \quad t=1, \dots, n_t.
+```
+
+In the Pytorch implementation, during the multi-task training process, all tasks can share any portion of the model parameters.
+A typical scenario is that each task shares the same descriptor with trainable parameters $\boldsymbol{\theta}_ {d}$, while each has its own fitting network with trainable parameters $\boldsymbol{\theta}_ f^{(t)}$, thus
+$\boldsymbol{\theta}^{(t)} = \{ \boldsymbol{\theta}_ {d} , \boldsymbol{\theta}_ {f}^{(t)} \}$.
+At each training step, a task will be randomly selected from ${1, \dots, n_t}$ according to the user-specified probability,
+and the Adam optimizer is executed to minimize $L^{(t)}$ for one step to update the parameter $\boldsymbol \theta^{(t)}$.
+In the case of multi-GPU parallel training, different GPUs will independently select their tasks.
+In the DPA-2 model, this multi-task training framework is adopted.[^1]
+
+[^1] Duo Zhang, Xinzijian Liu, Xiangyu Zhang, Chengqian Zhang, Chun Cai, Hangrui Bi, Yiming Du, Xuejian Qin, Jiameng Huang, Bowen Li, Yifan Shan, Jinzhe Zeng, Yuzhi Zhang, Siyuan Liu, Yifan Li, Junhan Chang, Xinyan Wang, Shuo Zhou, Jianchuan Liu, Xiaoshan Luo, Zhenyu Wang, Wanrun Jiang, Jing Wu, Yudi Yang, Jiyuan Yang, Manyi Yang, Fu-Qiang Gong, Linshuang Zhang, Mengchao Shi, Fu-Zhi Dai, Darrin M. York, Shi Liu, Tong Zhu, Zhicheng Zhong, Jian Lv, Jun Cheng, Weile Jia, Mohan Chen, Guolin Ke, Weinan E, Linfeng Zhang, Han Wang，[arXiv preprint arXiv:2312.15492 (2023)](https://arxiv.org/abs/2312.15492) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+
+Compared with the previous TensorFlow implementation, the new support in PyTorch is more flexible and efficient.
+In particular, it makes multi-GPU parallel training and even tasks beyond DFT possible,
+enabling larger-scale and more general multi-task training to obtain more general pre-trained models.
+
+## Perform the multi-task training using PyTorch
+
+Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode,
+typically with one common descriptor and multiple specific fitting nets for each data set.
+To proceed, one need to change the representation of the model definition in the input script.
+The core idea is to replace the previous single model definition {ref}`model <model>` with multiple model definitions {ref}`model/model_dict/model_key <model/model_dict/model_key>`,
+define the shared parameters of the model part {ref}`shared_dict <model/shared_dict>`, and then expand other parts for multi-model settings.
+Specifically, there are several parts that need to be modified:
+
+- {ref}`model/shared_dict <model/shared_dict>`: The parameter definition of the shared part, including various descriptors,
+  type maps (or even fitting nets can be shared). Each module can be defined with a user-defined `part_key`, such as `my_descriptor`.
+  The content needs to align with the corresponding definition in the single-task training model component, such as the definition of the descriptor.
+
+- {ref}`model/model_dict <model/model_dict>`: The core definition of the model part and the explanation of sharing rules,
+  starting with user-defined model name keys `model_key`, such as `my_model_1`.
+  Each model part needs to align with the components of the single-task training {ref}`model <model>`, but with the following sharing rules:
+- - If you want to share the current model component with other tasks, which should be part of the {ref}`model/shared_dict <model/shared_dict>`,
+    you can directly fill in the corresponding `part_key`, such as
+    `"descriptor": "my_descriptor", `
+    to replace the previous detailed parameters. Here, you can also specify the shared_level, such as
+    `"descriptor": "my_descriptor:shared_level", `
+    and use the user-defined integer `shared_level` in the code to share the corresponding module to varying degrees
+    (default is to share all parameters, i.e., `shared_level`=0).
+    The parts that are exclusive to each model can be written following the previous definition.
+
+- {ref}`loss_dict <loss_dict>`: The loss settings corresponding to each task model, specified by the `model_key`.
+  Each {ref}`loss_dict/model_key <loss_dict/model_key>` contains the corresponding loss settings,
+  which are the same as the definition in single-task training {ref}`<loss>`.
+
+- {ref}`training/data_dict <training/data_dict>`: The data settings corresponding to each task model, specified by the `model_key`.
+  Each `training/data_dict/model_key` contains the corresponding `training_data` and `validation_data` settings,
+  which are the same as the definition in single-task training {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>`.
+
+- (Optional) {ref}`training/model_prob <training/model_prob>`: The sampling weight settings corresponding to each `model_key`, i.e., the probability weight in the training step.
+  You can specify any positive real number weight for each task. The higher the weight, the higher the probability of being sampled in each training.
+  This setting is optional, and if not set, tasks will be sampled with equal weights.
+
+An example input for multi-task training two models in water system is shown as following:
+
+```{literalinclude} ../../examples/water_multi_task/pytorch_example/input_torch.json
+:language: json
+:linenos:
+```
+
+## Finetune from the pretrained multi-task model
+
+To finetune based on the checkpoint `model.pt` after the multi-task pre-training is completed,
+users only need to prepare the normal input for single-task training `input_single.json`,
+and then select one of the trained model's task names `model_key`.
+Run the following command:
+
+```bash
+$ dp --pt train input_single.json --finetune model.pt --model-branch model_key
+```
diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training-tf.md
similarity index 76%
rename from doc/train/multi-task-training.md
rename to doc/train/multi-task-training-tf.md
index c647e6905e..0f745958eb 100644
--- a/doc/train/multi-task-training.md
+++ b/doc/train/multi-task-training-tf.md
@@ -1,10 +1,17 @@
-# Multi-task training
+# Multi-task training {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
+
+<!-- we plan to drop TensorFlow backend multi-task training. Replace with the PyTorch one -->
 
 ## Theory
 
 The multi-task training process can simultaneously handle different datasets with properties that cannot be fitted in one network (e.g. properties from DFT calculations under different exchange-correlation functionals or different basis sets).
 These datasets are denoted by $\boldsymbol x^{(1)}, \dots, \boldsymbol x^{(n_t)}$.
 For each dataset, a training task is defined as
+
 ```math
     \min_{\boldsymbol \theta}   L^{(t)} (\boldsymbol x^{(t)}; \boldsymbol  \theta^{(t)}, \tau), \quad t=1, \dots, n_t.
 ```
@@ -15,24 +22,26 @@ At each training step, a task is randomly picked from ${1, \dots, n_t}$, and the
 If different fitting networks have the same architecture, they can share the parameters of some layers
 to improve training efficiency.[^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ## Perform the multi-task training
+
 Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode,
 with one common descriptor and multiple specific fitting nets for each data set.
 One can simply switch the following parameters in training input script to perform multi-task mode:
+
 - {ref}`fitting_net <model/fitting_net>` --> {ref}`fitting_net_dict <model/fitting_net_dict>`,
-each key of which can be one individual fitting net.
-- {ref}`training_data <training/training_data>`,  {ref}`validation_data <training/validation_data>`
---> {ref}`data_dict <training/data_dict>`, each key of which can be one individual data set contains
-several data systems for corresponding fitting net, the keys must be consistent with those in
-{ref}`fitting_net_dict <model/fitting_net_dict>`.
+  each key of which can be one individual fitting net.
+- {ref}`training_data <training/training_data>`, {ref}`validation_data <training/validation_data>`
+  --> {ref}`data_dict <training/data_dict>`, each key of which can be one individual data set contains
+  several data systems for corresponding fitting net, the keys must be consistent with those in
+  {ref}`fitting_net_dict <model/fitting_net_dict>`.
 - {ref}`loss <loss>` --> {ref}`loss_dict <loss_dict>`, each key of which can be one individual loss setting
-for corresponding fitting net, the keys must be consistent with those in
-{ref}`fitting_net_dict <model/fitting_net_dict>`, if not set, the corresponding fitting net will use the default loss.
+  for corresponding fitting net, the keys must be consistent with those in
+  {ref}`fitting_net_dict <model/fitting_net_dict>`, if not set, the corresponding fitting net will use the default loss.
 - (Optional) {ref}`fitting_weight <training/fitting_weight>`, each key of which can be a non-negative integer or float,
-deciding the chosen probability for corresponding fitting net in training, if not set or invalid,
-the corresponding fitting net will not be used.
+  deciding the chosen probability for corresponding fitting net in training, if not set or invalid,
+  the corresponding fitting net will not be used.
 
 The training procedure will automatically choose single-task or multi-task mode, based on the above parameters.
 Note that parameters of single-task mode and multi-task mode can not be mixed.
@@ -40,6 +49,7 @@ Note that parameters of single-task mode and multi-task mode can not be mixed.
 An example input for training energy and dipole in water system can be found here: [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json).
 
 The supported descriptors for multi-task mode are listed:
+
 - {ref}`se_a (se_e2_a) <model/descriptor[se_e2_a]>`
 - {ref}`se_r (se_e2_r) <model/descriptor[se_e2_r]>`
 - {ref}`se_at (se_e3) <model/descriptor[se_e3]>`
@@ -48,6 +58,7 @@ The supported descriptors for multi-task mode are listed:
 - {ref}`hybrid <model/descriptor[hybrid]>`
 
 The supported fitting nets for multi-task mode are listed:
+
 - {ref}`ener <model/fitting_net[ener]>`
 - {ref}`dipole <model/fitting_net[dipole]>`
 - {ref}`polar <model/fitting_net[polar]>`
@@ -55,12 +66,14 @@ The supported fitting nets for multi-task mode are listed:
 The output of `dp freeze` command in multi-task mode can be seen in [freeze command](../freeze/freeze.md).
 
 ## Initialization from pretrained multi-task model
+
 For advance training in multi-task mode, one can first train the descriptor on several upstream datasets and then transfer it on new downstream ones with newly added fitting nets.
 At the second step, you can also inherit some fitting nets trained on upstream datasets, by merely adding fitting net keys in {ref}`fitting_net_dict <model/fitting_net_dict>` and
 optional fitting net weights in {ref}`fitting_weight <training/fitting_weight>`.
 
 Take [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json) again for example.
 You can first train a multi-task model using input script with the following {ref}`model <model>` part:
+
 ```json
     "model": {
         "type_map": ["O", "H"],
@@ -70,25 +83,30 @@ You can first train a multi-task model using input script with the following {re
             "rcut_smth":    0.5,
             "rcut":     6.0,
             "neuron":       [25, 50, 100],
+            "type_one_side": true
         },
         "fitting_net_dict": {
             "water_dipole": {
                 "type":         "dipole",
-                "neuron":       [100, 100, 100],
+                "neuron":       [100, 100, 100]
             },
             "water_ener": {
                 "neuron":       [240, 240, 240],
-                "resnet_dt":    true,
+                "resnet_dt":    true
             }
         },
     }
 ```
+
 After training, you can freeze this multi-task model into one unit graph:
+
 ```bash
 $ dp freeze -o graph.pb --united-model
 ```
+
 Then if you want to transfer the trained descriptor and some fitting nets (take `water_ener` for example) to newly added datasets with new fitting net `water_ener_2`,
 you can modify the {ref}`model <model>` part of the new input script in a more simplified way:
+
 ```json
     "model": {
         "type_map": ["O", "H"],
@@ -102,12 +120,14 @@ you can modify the {ref}`model <model>` part of the new input script in a more s
         },
     }
 ```
+
 It will autocomplete the configurations according to the frozen graph.
 
 Note that for newly added fitting net keys, other parts in the input script, including {ref}`data_dict <training/data_dict>` and {ref}`loss_dict <loss_dict>` (optionally {ref}`fitting_weight <training/fitting_weight>`),
 should be set explicitly. While for old fitting net keys, it will inherit the old configurations if not set.
 
 Finally, you can perform the modified multi-task training from the frozen model with command:
+
 ```bash
 $ dp train input.json --init_frz_model graph.pb
 ```
@@ -119,6 +139,7 @@ In this situation, one can set {ref}`model/fitting_net[ener]/layer_name>` to sha
 The architecture of the layers with the same name should be the same.
 
 For example, if one want to share the first and the third layers for two three-hidden-layer fitting networks, the following parameters should be set.
+
 ```json
 "fitting_net_dict": {
     "ccsd": {
diff --git a/doc/train/parallel-training.md b/doc/train/parallel-training.md
index 98d12f2b9b..9ea92b4751 100644
--- a/doc/train/parallel-training.md
+++ b/doc/train/parallel-training.md
@@ -1,15 +1,22 @@
-# Parallel training
+# Parallel training {{ tensorflow_icon }} {{ pytorch_icon }}
 
-Currently, parallel training is enabled in a synchronized way with help of [Horovod](https://github.com/horovod/horovod).
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+:::
+
+## TensorFlow Implementation {{ tensorflow_icon }}
+
+Currently, parallel training in tensorflow version is enabled in a synchronized way with help of [Horovod](https://github.com/horovod/horovod).
 Depending on the number of training processes (according to MPI context) and the number of GPU cards available, DeePMD-kit will decide whether to launch the training in parallel (distributed) mode or in serial mode. Therefore, no additional options are specified in your JSON/YAML input file.
 
-## Tuning learning rate
+### Tuning learning rate
 
 Horovod works in the data-parallel mode, resulting in a larger global batch size. For example, the real batch size is 8 when {ref}`batch_size <training/training_data/batch_size>` is set to 2 in the input file and you launch 4 workers. Thus, {ref}`learning_rate <learning_rate>` is automatically scaled by the number of workers for better convergence. Technical details of such heuristic rule are discussed at [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).
 
 The number of decay steps required to achieve the same accuracy can decrease by the number of cards (e.g., 1/2 of steps in the above case), but needs to be scaled manually in the input file.
 
 In some cases, it won't work well when scaling the learning rate by worker count in a `linear` way. Then you can try `sqrt` or `none` by setting argument {ref}`scale_by_worker <learning_rate/scale_by_worker>` like below.
+
 ```json
     "learning_rate" :{
         "scale_by_worker": "none",
@@ -17,18 +24,18 @@ In some cases, it won't work well when scaling the learning rate by worker count
     }
 ```
 
-## Scaling test
+### Scaling test
 
 Testing `examples/water/se_e2_a` on an 8-GPU host, linear acceleration can be observed with the increasing number of cards.
 
 | Num of GPU cards | Seconds every 100 samples | Samples per second | Speed up |
-|  --  | -- | -- | -- |
-| 1  | 1.4515 | 68.89 | 1.00 |
-| 2  | 1.5962 | 62.65*2 | 1.82 |
-| 4  | 1.7635 | 56.71*4 | 3.29 |
-| 8  | 1.7267 | 57.91*8 | 6.72 |
+| ---------------- | ------------------------- | ------------------ | -------- |
+| 1                | 1.4515                    | 68.89              | 1.00     |
+| 2                | 1.5962                    | 62.65\*2           | 1.82     |
+| 4                | 1.7635                    | 56.71\*4           | 3.29     |
+| 8                | 1.7267                    | 57.91\*8           | 6.72     |
 
-## How to use
+### How to use
 
 Training workers can be launched with `horovodrun`. The following command launches 4 processes on the same host:
 
@@ -42,13 +49,16 @@ Need to mention, the environment variable `CUDA_VISIBLE_DEVICES` must be set to
 To maximize the performance, one should follow [FAQ: How to control the parallelism of a job](../troubleshooting/howtoset_num_nodes.md) to control the number of threads.
 
 When using MPI with Horovod, `horovodrun` is a simple wrapper around `mpirun`. In the case where fine-grained control over options is passed to `mpirun`, [`mpirun` can be invoked directly](https://horovod.readthedocs.io/en/stable/mpi_include.html), and it will be detected automatically by Horovod, e.g.,
+
 ```bash
 CUDA_VISIBLE_DEVICES=4,5,6,7 mpirun -l -launcher=fork -hosts=localhost -np 4 \
     dp train --mpi-log=workers input.json
 ```
+
 this is sometimes necessary for an HPC environment.
 
 Whether distributed workers are initiated can be observed in the "Summary of the training" section in the log (`world size` > 1, and `distributed`).
+
 ```
 [0] DEEPMD INFO    ---Summary of the training---------------------------------------
 [0] DEEPMD INFO    distributed
@@ -64,9 +74,10 @@ Whether distributed workers are initiated can be observed in the "Summary of the
 [0] DEEPMD INFO    -----------------------------------------------------------------
 ```
 
-## Logging
+### Logging
 
 What's more, 2 command-line arguments are defined to control the logging behavior when performing parallel training with MPI.
+
 ```
 optional arguments:
   -l LOG_PATH, --log-path LOG_PATH
@@ -80,3 +91,99 @@ optional arguments:
                         means each process will output its own log (default:
                         master)
 ```
+
+## PyTorch Implementation {{ pytorch_icon }}
+
+Currently, parallel training in pytorch version is implemented in the form of PyTorch Distributed Data Parallelism [DDP](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html).
+DeePMD-kit will decide whether to launch the training in parallel (distributed) mode or in serial mode depending on your execution command.
+
+### Dataloader and Dataset
+
+One of the major differences between two backends during training is that the PyTorch version employs a multi-threaded data loading utility [DataLoader](https://pytorch.org/docs/stable/data.html).
+We utilize the PyTorch framework and have designed and implemented a multiprocessing data processing and loading system called DpLoaderSet based on torch DataLoader and Dataset.
+
+First, we establish a DeepmdData class for each system, which is consistent with the TensorFlow version in this level. Then, we create a dataloader for each system, resulting in the same number of dataloaders as the number of systems. Next, we create a dataset for the dataloaders obtained in the previous step. This allows us to query the data for each system through this dataset, while the iteration pointers for each system are maintained by their respective dataloaders. Finally, a dataloader is created for the outermost dataset.
+
+We achieve custom sampling methods using a weighted sampler. The length of the sampler is set to total_batch_num \* num_workers.The parameter "num_workers" defines the number of threads involved in multi-threaded loading, which can be modified by setting the environment variable NUM_WORKERS (default: min(8, ncpus)).
+
+> **Note** The underlying dataloader will use a distributed sampler to ensure that each GPU receives batches with different content in parallel mode, which will use sequential sampler in serial mode. In the TensorFlow version, Horovod shuffles the dataset using different random seeds for the same purpose..
+
+```mermaid
+flowchart LR
+
+    subgraph systems
+        subgraph system1
+            direction LR
+            frame1[frame 1]
+            frame2[frame 2]
+        end
+
+        subgraph system2
+            direction LR
+            frame3[frame 3]
+            frame4[frame 4]
+            frame5[frame 5]
+        end
+    end
+
+    subgraph dataset
+        dataset1[dataset 1]
+        dataset2[dataset 2]
+    end
+    system1 -- frames --> dataset1
+    system2 --> dataset2
+
+    subgraph distribted sampler
+        ds1[distributed sampler 1]
+        ds2[distributed sampler 2]
+    end
+    dataset1 --> ds1
+    dataset2 --> ds2
+
+    subgraph dataloader
+        dataloader1[dataloader 1]
+        dataloader2[dataloader 2]
+    end
+    ds1 -- mini batch --> dataloader1
+    ds2 --> dataloader2
+
+    subgraph index[index on Rank 0]
+        dl11[dataloader 1, entry 1]
+        dl21[dataloader 2, entry 1]
+        dl22[dataloader 2, entry 2]
+    end
+    dataloader1 --> dl11
+    dataloader2 --> dl21
+    dataloader2 --> dl22
+
+    index -- for each step, choose 1 system --> WeightedSampler
+    --> dploaderset --> bufferedq[buffered queue] --> model
+```
+
+### How to use
+
+We use [`torchrun`](https://pytorch.org/docs/stable/elastic/run.html#usage) to launch a DDP training session.
+
+To start training with multiple GPUs in one node, set parameter `nproc_per_node` as the number of it:
+
+```bash
+torchrun --nproc_per_node=4 --no-python dp --pt train input.json
+# Not setting `nproc_per_node` uses only 1 GPU
+torchrun --no-python dp --pt train input.json
+```
+
+To train a model with a cluster, one can manually launch the task using the commands below (usually this should be done by your job management system). Set `nnodes` as the number of available nodes, `node_rank` as the rank of the current node among all nodes (not the rank of processes!), and `nproc_per_node` as the number of available GPUs in one node. Please make sure that every node can access the rendezvous address and port (`rdzv_endpoint` in the command), and has a same amount of GPUs.
+
+```bash
+# Running DDP on 2 nodes with 4 GPUs each
+# On node 0:
+torchrun --rdzv_endpoint=node0:12321 --nnodes=2 --nproc_per_node=4 --node_rank=0 --no_python dp --pt train tests/water/se_e2_a.json
+# On node 1:
+torchrun --rdzv_endpoint=node0:12321 --nnodes=2 --nproc_per_node=4 --node_rank=1 --no_python dp --pt train tests/water/se_e2_a.json
+```
+
+> **Note** Set environment variables to tune [CPU specific optimizations](https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations) in advance.
+
+> **Note** for developers: `torchrun` by default passes settings as environment variables [(list here)](https://pytorch.org/docs/stable/elastic/run.html#environment-variables).
+
+> To check forward, backward, and communication time, please set env var `TORCH_CPP_LOG_LEVEL=INFO TORCH_DISTRIBUTED_DEBUG=DETAIL`. More details can be found [here](https://pytorch.org/docs/stable/distributed.html#logging).
diff --git a/doc/train/tensorboard.md b/doc/train/tensorboard.md
index 4846005216..7b41c004ce 100644
--- a/doc/train/tensorboard.md
+++ b/doc/train/tensorboard.md
@@ -1,4 +1,8 @@
-# TensorBoard Usage
+# TensorBoard Usage {{ tensorflow_icon }} {{ pytorch_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}, PyTorch {{ pytorch_icon }}
+:::
 
 TensorBoard provides the visualization and tooling needed for machine learning
 experimentation. Full instructions for TensorBoard can be found
@@ -8,10 +12,10 @@ experimentation. Full instructions for TensorBoard can be found
 
 DeePMD-kit can now use most of the interesting features enabled by TensorBoard!
 
-* **Tracking and visualizing metrics,** such as l2_loss, l2_energy_loss and l2_force_loss
-* **Visualizing the model graph** (ops and layers)
-* **Viewing histograms of weights, biases, or other tensors as they change over time.**
-* **Viewing summaries of trainable variables**
+- **Tracking and visualizing metrics,** such as l2_loss, l2_energy_loss and l2_force_loss
+- **Visualizing the model graph** (ops and layers)
+- **Viewing histograms of weights, biases, or other tensors as they change over time.**
+- **Viewing summaries of trainable variables**
 
 <!-- * **Projecting embeddings to a lower dimensional space.**
 * **Precision curves.** -->
@@ -80,6 +84,7 @@ tensorboard --logdir path/to/logs
 ![DeePMD-kit distribution](../images/tensorboard-distribution.png)
 
 ### Viewing summaries of trainable variables
+
 ![DeePMD-kit scalar](../images/tensorboard-scalar.png)
 
 ## Attention
diff --git a/doc/train/train-input.rst b/doc/train/train-input.rst
index 2a32aeb930..04e82451e4 100644
--- a/doc/train/train-input.rst
+++ b/doc/train/train-input.rst
@@ -4,5 +4,5 @@ Training Parameters
    One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://deepmodeling.com/dpgui/input/deepmd-kit-2.0>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dp gui`. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for furthur training.
 
 .. dargs::
-   :module: deepmd.utils.argcheck
+   :module: deepmd.tf.utils.argcheck
    :func: gen_args
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 4940b77fa7..a0f6759256 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -7,21 +7,26 @@ In this section, we will take `$deepmd_source_dir/examples/water/se_e2_a/input.j
 ### Theory
 
 The learning rate $\gamma$ decays exponentially:
+
 ```math
     \gamma(\tau) = \gamma^0 r ^ {\lfloor  \tau/s \rfloor},
 ```
+
 where $\tau \in \mathbb{N}$ is the index of the training step, $\gamma^0  \in \mathbb{R}$ is the learning rate at the first step, and the decay rate $r$ is given by
+
 ```math
     r = {\left(\frac{\gamma^{\text{stop}}}{\gamma^0}\right )} ^{\frac{s}{\tau^{\text{stop}}}},
 ```
+
 where $\tau^{\text{stop}} \in \mathbb{N}$, $\gamma^{\text{stop}} \in \mathbb{R}$, and $s \in \mathbb{N}$ are the stopping step, the stopping learning rate, and the decay steps, respectively, all of which are hyperparameters provided in advance.
 [^1]
 
-[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen,  Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
+[^1]: This section is built upon Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang, [J. Chem. Phys. 159, 054801 (2023)](https://doi.org/10.1063/5.0155600) licensed under a [Creative Commons Attribution (CC BY) license](http://creativecommons.org/licenses/by/4.0/).
 
 ### Instructions
 
 The {ref}`learning_rate <learning_rate>` section in `input.json` is given as follows
+
 ```json
     "learning_rate" :{
 	"type":		"exp",
@@ -31,17 +36,19 @@ The {ref}`learning_rate <learning_rate>` section in `input.json` is given as fol
 	"_comment":	"that's all"
     }
 ```
-* {ref}`start_lr <learning_rate[exp]/start_lr>` gives the learning rate at the beginning of the training.
-* {ref}`stop_lr <learning_rate[exp]/stop_lr>` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge.
-* During the training, the learning rate decays exponentially from {ref}`start_lr <learning_rate[exp]/start_lr>` to {ref}`stop_lr <learning_rate[exp]/stop_lr>` following the formula:
 
-    ```
-    lr(t) = start_lr * decay_rate ^ ( t / decay_steps )
-    ```
+- {ref}`start_lr <learning_rate[exp]/start_lr>` gives the learning rate at the beginning of the training.
+- {ref}`stop_lr <learning_rate[exp]/stop_lr>` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge.
+- During the training, the learning rate decays exponentially from {ref}`start_lr <learning_rate[exp]/start_lr>` to {ref}`stop_lr <learning_rate[exp]/stop_lr>` following the formula:
+
+  ```
+  lr(t) = start_lr * decay_rate ^ ( t / decay_steps )
+  ```
 
 ## Training parameters
 
 Other training parameters are given in the {ref}`training <training>` section.
+
 ```json
     "training": {
  	"training_data": {
@@ -65,15 +72,18 @@ Other training parameters are given in the {ref}`training <training>` section.
 	"save_freq":	1000
     }
 ```
+
 The sections {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>` give the training dataset and validation dataset, respectively. Taking the training dataset for example, the keys are explained below:
-* {ref}`systems <training/training_data/systems>` provide paths of the training data systems. DeePMD-kit allows you to provide multiple systems with different numbers of atoms. This key can be a `list` or a `str`.
-    * `list`: {ref}`systems <training/training_data/systems>` gives the training data systems.
-    * `str`: {ref}`systems <training/training_data/systems>` should be a valid path. DeePMD-kit will recursively search all data systems in this path.
-* At each training step, DeePMD-kit randomly picks {ref}`batch_size <training/training_data/batch_size>` frame(s) from one of the systems. The probability of using a system is by default in proportion to the number of batches in the system. More options are available for automatically determining the probability of using systems. One can set the key {ref}`auto_prob <training/training_data/auto_prob>` to
-    * `"prob_uniform"` all systems are used with the same probability.
-    * `"prob_sys_size"` the probability of using a system is proportional to its size (number of frames).
-    * `"prob_sys_size; sidx_0:eidx_0:w_0; sidx_1:eidx_1:w_1;..."` the `list` of systems is divided into blocks. Block `i` has systems ranging from `sidx_i` to `eidx_i`. The probability of using a system from block `i` is proportional to `w_i`. Within one block, the probability of using a system is proportional to its size.
-* An example of using `"auto_prob"` is given below. The probability of using `systems[2]` is 0.4, and the sum of the probabilities of using `systems[0]` and `systems[1]` is 0.6. If the number of frames in `systems[1]` is twice of `system[0]`, then the probability of using `system[1]` is 0.4 and that of `system[0]` is 0.2.
+
+- {ref}`systems <training/training_data/systems>` provide paths of the training data systems. DeePMD-kit allows you to provide multiple systems with different numbers of atoms. This key can be a `list` or a `str`.
+  - `list`: {ref}`systems <training/training_data/systems>` gives the training data systems.
+  - `str`: {ref}`systems <training/training_data/systems>` should be a valid path. DeePMD-kit will recursively search all data systems in this path.
+- At each training step, DeePMD-kit randomly picks {ref}`batch_size <training/training_data/batch_size>` frame(s) from one of the systems. The probability of using a system is by default in proportion to the number of batches in the system. More options are available for automatically determining the probability of using systems. One can set the key {ref}`auto_prob <training/training_data/auto_prob>` to
+  - `"prob_uniform"` all systems are used with the same probability.
+  - `"prob_sys_size"` the probability of using a system is proportional to its size (number of frames).
+  - `"prob_sys_size; sidx_0:eidx_0:w_0; sidx_1:eidx_1:w_1;..."` the `list` of systems is divided into blocks. Block `i` has systems ranging from `sidx_i` to `eidx_i`. The probability of using a system from block `i` is proportional to `w_i`. Within one block, the probability of using a system is proportional to its size.
+- An example of using `"auto_prob"` is given below. The probability of using `systems[2]` is 0.4, and the sum of the probabilities of using `systems[0]` and `systems[1]` is 0.6. If the number of frames in `systems[1]` is twice of `system[0]`, then the probability of using `system[1]` is 0.4 and that of `system[0]` is 0.2.
+
 ```json
  	"training_data": {
 	    "systems":		["../data_water/data_0/", "../data_water/data_1/", "../data_water/data_2/"],
@@ -81,7 +91,9 @@ The sections {ref}`training_data <training/training_data>` and {ref}`validation_
 	    "batch_size":	"auto"
 	}
 ```
-* The probability of using systems can also be specified explicitly with key {ref}`sys_probs <training/training_data/sys_probs>` which is a list having the length of the number of systems. For example
+
+- The probability of using systems can also be specified explicitly with key {ref}`sys_probs <training/training_data/sys_probs>` which is a list having the length of the number of systems. For example
+
 ```json
  	"training_data": {
 	    "systems":		["../data_water/data_0/", "../data_water/data_1/", "../data_water/data_2/"],
@@ -89,34 +101,40 @@ The sections {ref}`training_data <training/training_data>` and {ref}`validation_
 	    "batch_size":	"auto:32"
 	}
 ```
-* The key {ref}`batch_size <training/training_data/batch_size>` specifies the number of frames used to train or validate the model in a training step. It can be set to
-    * `list`: the length of which is the same as the {ref}`systems`. The batch size of each system is given by the elements of the list.
-    * `int`: all systems use the same batch size.
-    * `"auto"`: the same as `"auto:32"`, see `"auto:N"`
-    * `"auto:N"`: automatically determines the batch size so that the {ref}`batch_size <training/training_data/batch_size>` times the number of atoms in the system is no less than `N`.
-* The key {ref}`numb_batch <training/validation_data/numb_btch>` in {ref}`validate_data <training/validation_data>` gives the number of batches of model validation. Note that the batches may not be from the same system
+
+- The key {ref}`batch_size <training/training_data/batch_size>` specifies the number of frames used to train or validate the model in a training step. It can be set to
+  - `list`: the length of which is the same as the {ref}`systems`. The batch size of each system is given by the elements of the list.
+  - `int`: all systems use the same batch size.
+  - `"auto"`: the same as `"auto:32"`, see `"auto:N"`
+  - `"auto:N"`: automatically determines the batch size so that the {ref}`batch_size <training/training_data/batch_size>` times the number of atoms in the system is no less than `N`.
+- The key {ref}`numb_batch <training/validation_data/numb_btch>` in {ref}`validate_data <training/validation_data>` gives the number of batches of model validation. Note that the batches may not be from the same system
 
 The section {ref}`mixed_precision <training/mixed_precision>` specifies the mixed precision settings, which will enable the mixed precision training workflow for DeePMD-kit. The keys are explained below:
-* {ref}`output_prec <training/mixed_precision/output_prec>`  precision used in the output tensors, only `float32` is supported currently.
-* {ref}`compute_prec <training/mixed_precision/compute_prec>` precision used in the computing tensors, only `float16` is supported currently.
-Note there are several limitations about mixed precision training:
-* Only {ref}`se_e2_a <model/descriptor[se_e2_a]>` type descriptor is supported by the mixed precision training workflow.
-* The precision of the embedding net and the fitting net are forced to be set to `float32`.
+
+- {ref}`output_prec <training/mixed_precision/output_prec>` precision used in the output tensors, only `float32` is supported currently.
+- {ref}`compute_prec <training/mixed_precision/compute_prec>` precision used in the computing tensors, only `float16` is supported currently.
+  Note there are several limitations about mixed precision training:
+- Only {ref}`se_e2_a <model/descriptor[se_e2_a]>` type descriptor is supported by the mixed precision training workflow.
+- The precision of the embedding net and the fitting net are forced to be set to `float32`.
 
 Other keys in the {ref}`training <training>` section are explained below:
-* {ref}`numb_steps <training/numb_steps>` The number of training steps.
-* {ref}`seed <training/seed>` The random seed for getting frames from the training data set.
-* {ref}`disp_file <training/disp_file>` The file for printing learning curve.
-* {ref}`disp_freq <training/disp_freq>` The frequency of printing learning curve. Set in the unit of training steps
-* {ref}`save_freq <training/save_freq>` The frequency of saving checkpoint.
+
+- {ref}`numb_steps <training/numb_steps>` The number of training steps.
+- {ref}`seed <training/seed>` The random seed for getting frames from the training data set.
+- {ref}`disp_file <training/disp_file>` The file for printing learning curve.
+- {ref}`disp_freq <training/disp_freq>` The frequency of printing learning curve. Set in the unit of training steps
+- {ref}`save_freq <training/save_freq>` The frequency of saving checkpoint.
 
 ## Options and environment variables
 
 Several command line options can be passed to `dp train`, which can be checked with
+
 ```bash
 $ dp train --help
 ```
+
 An explanation will be provided
+
 ```
 positional arguments:
   INPUT                 the input json database
@@ -146,16 +164,16 @@ To maximize the performance, one should follow [FAQ: How to control the parallel
 
 One can set other environmental variables:
 
-| Environment variables | Allowed value          | Default value | Usage                      |
-| --------------------- | ---------------------- | ------------- | -------------------------- |
-| DP_INTERFACE_PREC     | `high`, `low`          | `high`        | Control high (double) or low (float) precision of training. |
-| DP_AUTO_PARALLELIZATION | 0, 1                 | 0             | Enable auto parallelization for CPU operators. |
-| DP_JIT                | 0, 1                   | 0             | Enable JIT. Note that this option may either improve or decrease the performance. Requires TensorFlow supports JIT.  |
-
+| Environment variables   | Allowed value | Default value | Usage                                                                                                               |
+| ----------------------- | ------------- | ------------- | ------------------------------------------------------------------------------------------------------------------- |
+| DP_INTERFACE_PREC       | `high`, `low` | `high`        | Control high (double) or low (float) precision of training.                                                         |
+| DP_AUTO_PARALLELIZATION | 0, 1          | 0             | Enable auto parallelization for CPU operators.                                                                      |
+| DP_JIT                  | 0, 1          | 0             | Enable JIT. Note that this option may either improve or decrease the performance. Requires TensorFlow supports JIT. |
 
 ## Adjust `sel` of a frozen model
 
 One can use `--init-frz-model` features to adjust (increase or decrease) [`sel`](../model/sel.md) of a existing model. Firstly, one needs to adjust [`sel`](./train-input.rst) in `input.json`. For example, adjust from `[46, 92]` to `[23, 46]`.
+
 ```json
 "model": {
 	"descriptor": {
@@ -163,7 +181,9 @@ One can use `--init-frz-model` features to adjust (increase or decrease) [`sel`]
 	}
 }
 ```
+
 To obtain the new model at once, [`numb_steps`](./train-input.rst) should be set to zero:
+
 ```json
 "training": {
 	"numb_steps": 0
@@ -171,6 +191,7 @@ To obtain the new model at once, [`numb_steps`](./train-input.rst) should be set
 ```
 
 Then, one can initialize the training from the frozen model and freeze the new model at once:
+
 ```sh
 dp train input.json --init-frz-model frozen_model.pb
 dp freeze -o frozen_model_adjusted_sel.pb
diff --git a/doc/train/training.md b/doc/train/training.md
index c1e5b89a84..5b7bbd32a8 100644
--- a/doc/train/training.md
+++ b/doc/train/training.md
@@ -1,17 +1,21 @@
 # Train a model
 
 Several examples of training can be found in the `examples` directory:
+
 ```bash
 $ cd $deepmd_source_dir/examples/water/se_e2_a/
 ```
 
 After switching to that directory, the training can be invoked by
+
 ```bash
 $ dp train input.json
 ```
+
 where `input.json` is the name of the input script.
 
 By default, the verbosity level of the DeePMD-kit is `INFO`, one may see a lot of important information on the code and environment showing on the screen. Among them two pieces of information regarding data systems are worth special notice.
+
 ```bash
 DEEPMD INFO    ---Summary of DataSystem: training     -----------------------------------------------
 DEEPMD INFO    found 3 system(s):
@@ -26,9 +30,11 @@ DEEPMD INFO                                        system  natoms  bch_sz   n_bc
 DEEPMD INFO                          ../data_water/data_3     192       1      80  1.000    T
 DEEPMD INFO    --------------------------------------------------------------------------------------
 ```
+
 The DeePMD-kit prints detailed information on the training and validation data sets. The data sets are defined by {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>` defined in the {ref}`training <training>` section of the input script. The training data set is composed of three data systems, while the validation data set is composed by one data system. The number of atoms, batch size, the number of batches in the system and the probability of using the system are all shown on the screen. The last column presents if the periodic boundary condition is assumed for the system.
 
 During the training, the error of the model is tested every {ref}`disp_freq <training/disp_freq>` training steps with the batch used to train the model and with {ref}`numb_btch <training/validation_data/numb_btch>` batches from the validating data. The training error and validation error are printed correspondingly in the file {ref}`disp_file <training/disp_file>` (default is `lcurve.out`). The batch size can be set in the input script by the key {ref}`batch_size <training/training_data/batch_size>` in the corresponding sections for the training and validation data set. An example of the output
+
 ```bash
 #  step      rmse_val    rmse_trn    rmse_e_val  rmse_e_trn    rmse_f_val  rmse_f_trn         lr
       0      3.33e+01    3.41e+01      1.03e+01    1.03e+01      8.39e-01    8.72e-01    1.0e-03
@@ -38,6 +44,7 @@ During the training, the error of the model is tested every {ref}`disp_freq <tra
     400      1.36e+01    1.32e+01      1.07e-02    2.07e-03      4.29e-01    4.19e-01    1.0e-03
     500      1.07e+01    1.05e+01      2.45e-03    4.11e-03      3.38e-01    3.31e-01    1.0e-03
 ```
+
 The file contains 8 columns, from left to right, which are the training step, the validation loss, training loss, root mean square (RMS) validation error of energy, RMS training error of energy, RMS validation error of force, RMS training error of force and the learning rate. The RMS error (RMSE) of the energy is normalized by the number of atoms in the system. One can visualize this file with a simple Python script:
 
 ```py
diff --git a/doc/troubleshooting/howtoset_netsize.md b/doc/troubleshooting/howtoset_netsize.md
index 22d215eec6..b09fa05fa8 100644
--- a/doc/troubleshooting/howtoset_netsize.md
+++ b/doc/troubleshooting/howtoset_netsize.md
@@ -2,140 +2,136 @@
 
 Here are some test forms on fitting-net size tuning or embedding-net size tuning performed on several different systems.
 
-
 ## Al2O3
 
 ### Fitting net size tuning form on Al2O3: (embedding-net size: [25,50,100])
 
-Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---|---
-[240,240,240] |   1.742252e-02 |  7.259383e-05 |  4.014115e-02
-[80,80,80]    |   1.799349e-02 |  7.497287e-05 |  4.042977e-02
-[40,40,40]    |   1.799036e-02 |  7.495984e-05 |  4.068806e-02
-[20,20,20]    |   1.834032e-02 |  7.641801e-05 |  4.094784e-02
-[10,10,10]    |   1.913058e-02 |  7.971073e-05 |  4.154775e-02
-[5,5,5]       |   1.932914e-02 |  8.053808e-05 |  4.188052e-02
-[4,4,4]       |   1.944832e-02 |  8.103467e-05 |  4.217826e-02
-[3,3,3]       |   2.068631e-02 |  8.619296e-05 |  4.300497e-02
-[2,2,2]       |   2.267962e-02 |  9.449840e-05 |  4.413609e-02
-[1,1,1]       |   2.813596e-02 |  1.172332e-04 |  4.781115e-02
-[]            |   3.135002e-02 |  1.306251e-04 |  5.373120e-02
+| Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ---------------- | ---------------- | ----------------------- | ------------------------ |
+| [240,240,240]    | 1.742252e-02     | 7.259383e-05            | 4.014115e-02             |
+| [80,80,80]       | 1.799349e-02     | 7.497287e-05            | 4.042977e-02             |
+| [40,40,40]       | 1.799036e-02     | 7.495984e-05            | 4.068806e-02             |
+| [20,20,20]       | 1.834032e-02     | 7.641801e-05            | 4.094784e-02             |
+| [10,10,10]       | 1.913058e-02     | 7.971073e-05            | 4.154775e-02             |
+| [5,5,5]          | 1.932914e-02     | 8.053808e-05            | 4.188052e-02             |
+| [4,4,4]          | 1.944832e-02     | 8.103467e-05            | 4.217826e-02             |
+| [3,3,3]          | 2.068631e-02     | 8.619296e-05            | 4.300497e-02             |
+| [2,2,2]          | 2.267962e-02     | 9.449840e-05            | 4.413609e-02             |
+| [1,1,1]          | 2.813596e-02     | 1.172332e-04            | 4.781115e-02             |
+| []               | 3.135002e-02     | 1.306251e-04            | 5.373120e-02             |
 
 _[] means no hidden layer, but there is still a linear output layer. This situation is equal to the linear regression._
 
 ### Embedding net size tuning form on Al2O3: (Fitting-net size: [240,240,240])
 
-Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---|---
-[25,50,100]  | 1.742252e-02  | 7.259383e-05  | 4.014115e-02
-[10,20,40]   | 2.909990e-02  | 1.212496e-04  | 4.734667e-02
-[5,10,20]    | 3.357767e-02  | 1.399070e-04  | 5.706385e-02
-[4,8,16]     | 6.060367e-02  | 2.525153e-04  | 7.333304e-02
-[3,6,12]     | 5.656043e-02  | 2.356685e-04  | 7.793539e-02
-[2,4,8]      | 5.277023e-02  | 2.198759e-04  | 7.459995e-02
-[1,2,4]      | 1.302282e-01  | 5.426174e-04  | 9.672238e-02
-
+| Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ------------------ | ---------------- | ----------------------- | ------------------------ |
+| [25,50,100]        | 1.742252e-02     | 7.259383e-05            | 4.014115e-02             |
+| [10,20,40]         | 2.909990e-02     | 1.212496e-04            | 4.734667e-02             |
+| [5,10,20]          | 3.357767e-02     | 1.399070e-04            | 5.706385e-02             |
+| [4,8,16]           | 6.060367e-02     | 2.525153e-04            | 7.333304e-02             |
+| [3,6,12]           | 5.656043e-02     | 2.356685e-04            | 7.793539e-02             |
+| [2,4,8]            | 5.277023e-02     | 2.198759e-04            | 7.459995e-02             |
+| [1,2,4]            | 1.302282e-01     | 5.426174e-04            | 9.672238e-02             |
 
 ## Cu
 
 ### Fitting net size tuning form on Cu: (embedding-net size: [25,50,100])
 
-Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---|---
-[240,240,240]  | 4.135548e-02   |  1.615449e-04   |  8.940946e-02
-[20,20,20]     | 4.323858e-02   |  1.689007e-04   |  8.955762e-02
-[10,10,10]     | 4.399364e-02   |  1.718502e-04   |  8.962891e-02
-[5,5,5]        | 4.468404e-02   |  1.745470e-04   |  8.970111e-02
-[4,4,4]        | 4.463580e-02   |  1.743586e-04   |  8.972011e-02
-[3,3,3]        | 4.493758e-02   |  1.755374e-04   |  8.971303e-02
-[2,2,2]        | 4.500736e-02   |  1.758100e-04   |  8.973878e-02
-[1,1,1]        | 4.542073e-02   |  1.774247e-04   |  8.964761e-02
-[]             | 4.545168e-02   |  1.775456e-04   |  8.983201e-02
+| Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ---------------- | ---------------- | ----------------------- | ------------------------ |
+| [240,240,240]    | 4.135548e-02     | 1.615449e-04            | 8.940946e-02             |
+| [20,20,20]       | 4.323858e-02     | 1.689007e-04            | 8.955762e-02             |
+| [10,10,10]       | 4.399364e-02     | 1.718502e-04            | 8.962891e-02             |
+| [5,5,5]          | 4.468404e-02     | 1.745470e-04            | 8.970111e-02             |
+| [4,4,4]          | 4.463580e-02     | 1.743586e-04            | 8.972011e-02             |
+| [3,3,3]          | 4.493758e-02     | 1.755374e-04            | 8.971303e-02             |
+| [2,2,2]          | 4.500736e-02     | 1.758100e-04            | 8.973878e-02             |
+| [1,1,1]          | 4.542073e-02     | 1.774247e-04            | 8.964761e-02             |
+| []               | 4.545168e-02     | 1.775456e-04            | 8.983201e-02             |
 
 ### Embedding net size tuning form on Cu: (Fitting-net size: [240,240,240])
 
-Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---|---
-[25,50,100]          | 4.135548e-02  |  1.615449e-04  |  8.940946e-02
-[20,40,80]           | 4.203562e-02  |  1.642016e-04  |  8.925881e-02
-[15,30,60]           | 4.146672e-02  |  1.619794e-04  |  8.936911e-02
-[10,20,40]           | 4.263060e-02  |  1.665258e-04  |  8.955818e-02
-[5,10,20]            | 4.994913e-02  |  1.951138e-04  |  9.007786e-02
-[4,8,16]             | 1.022157e-01  |  3.992802e-04  |  9.532119e-02
-[3,6,12]             | 1.362098e-01  |  5.320695e-04  |  1.073860e-01
-[2,4,8]              | 7.061800e-02  |  2.758515e-04  |  9.126418e-02
-[1,2,4] && seed = 1  | 9.843161e-02  |  3.844985e-04  |  9.348505e-02
-[1,2,4] && seed = 2  | 9.404335e-02  |  3.673568e-04  |  9.304089e-02
-[1,2,4] && seed = 3  | 1.508016e-01  |  5.890688e-04  |  1.382356e-01
-[1,2,4] && seed = 4  | 9.686949e-02  |  3.783965e-04  |  9.294820e-02
-
+| Embedding-net size  | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ------------------- | ---------------- | ----------------------- | ------------------------ |
+| [25,50,100]         | 4.135548e-02     | 1.615449e-04            | 8.940946e-02             |
+| [20,40,80]          | 4.203562e-02     | 1.642016e-04            | 8.925881e-02             |
+| [15,30,60]          | 4.146672e-02     | 1.619794e-04            | 8.936911e-02             |
+| [10,20,40]          | 4.263060e-02     | 1.665258e-04            | 8.955818e-02             |
+| [5,10,20]           | 4.994913e-02     | 1.951138e-04            | 9.007786e-02             |
+| [4,8,16]            | 1.022157e-01     | 3.992802e-04            | 9.532119e-02             |
+| [3,6,12]            | 1.362098e-01     | 5.320695e-04            | 1.073860e-01             |
+| [2,4,8]             | 7.061800e-02     | 2.758515e-04            | 9.126418e-02             |
+| [1,2,4] && seed = 1 | 9.843161e-02     | 3.844985e-04            | 9.348505e-02             |
+| [1,2,4] && seed = 2 | 9.404335e-02     | 3.673568e-04            | 9.304089e-02             |
+| [1,2,4] && seed = 3 | 1.508016e-01     | 5.890688e-04            | 1.382356e-01             |
+| [1,2,4] && seed = 4 | 9.686949e-02     | 3.783965e-04            | 9.294820e-02             |
 
 ## Water
 
 ### Fitting net size tuning form on water: (embedding-net size: [25,50,100])
 
-Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---
-[240,240,240]  | 9.1589E-04  | 5.1540E-02
-[200,200,200]  | 9.3221E-04  | 5.2366E-02
-[160,160,160]  | 9.4274E-04  | 5.3403E-02
-[120,120,120]  | 9.5407E-04  | 5.3093E-02
-[80,80,80]     | 9.4605E-04  | 5.3402E-02
-[40,40,40]     | 9.8533E-04  | 5.5790E-02
-[20,20,20]     | 1.0057E-03  | 5.8232E-02
-[10,10,10]     | 1.0466E-03  | 6.2279E-02
-[5,5,5]        | 1.1154E-03  | 6.7994E-02
-[4,4,4]        | 1.1289E-03  | 6.9613E-02
-[3,3,3]        | 1.2368E-03  | 7.9786E-02
-[2,2,2]        | 1.3558E-03  | 9.7042E-02
-[1,1,1]        | 1.4633E-03  | 1.1265E-01
-[]             | 1.5193E-03  | 1.2136E-01
+| Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ---------------- | ----------------------- | ------------------------ |
+| [240,240,240]    | 9.1589E-04              | 5.1540E-02               |
+| [200,200,200]    | 9.3221E-04              | 5.2366E-02               |
+| [160,160,160]    | 9.4274E-04              | 5.3403E-02               |
+| [120,120,120]    | 9.5407E-04              | 5.3093E-02               |
+| [80,80,80]       | 9.4605E-04              | 5.3402E-02               |
+| [40,40,40]       | 9.8533E-04              | 5.5790E-02               |
+| [20,20,20]       | 1.0057E-03              | 5.8232E-02               |
+| [10,10,10]       | 1.0466E-03              | 6.2279E-02               |
+| [5,5,5]          | 1.1154E-03              | 6.7994E-02               |
+| [4,4,4]          | 1.1289E-03              | 6.9613E-02               |
+| [3,3,3]          | 1.2368E-03              | 7.9786E-02               |
+| [2,2,2]          | 1.3558E-03              | 9.7042E-02               |
+| [1,1,1]          | 1.4633E-03              | 1.1265E-01               |
+| []               | 1.5193E-03              | 1.2136E-01               |
 
 ### Embedding net size tuning form on water: (Fitting-net size: [240,240,240])
 
-Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---
-[25,50,100]  | 9.1589E-04  | 5.1540E-02
-[20,40,80]   | 9.5080E-04  | 5.3593E-02
-[15,30,60]   | 9.7996E-04  | 5.6338E-02
-[10,20,40]   | 1.0353E-03  | 6.2776E-02
-[5,10,20]    | 1.1254E-03  | 7.3195E-02
-[4,8,16]     | 1.2495E-03  | 8.0371E-02
-[3,6,12]     | 1.3604E-03  | 9.9883E-02
-[2,4,8]      | 1.4358E-03  | 9.7389E-02
-[1,2,4]      | 2.1765E-03  | 1.7276E-01
-
+| Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ------------------ | ----------------------- | ------------------------ |
+| [25,50,100]        | 9.1589E-04              | 5.1540E-02               |
+| [20,40,80]         | 9.5080E-04              | 5.3593E-02               |
+| [15,30,60]         | 9.7996E-04              | 5.6338E-02               |
+| [10,20,40]         | 1.0353E-03              | 6.2776E-02               |
+| [5,10,20]          | 1.1254E-03              | 7.3195E-02               |
+| [4,8,16]           | 1.2495E-03              | 8.0371E-02               |
+| [3,6,12]           | 1.3604E-03              | 9.9883E-02               |
+| [2,4,8]            | 1.4358E-03              | 9.7389E-02               |
+| [1,2,4]            | 2.1765E-03              | 1.7276E-01               |
 
 ## Mg-Al
 
 ### Fitting net size tuning form on Mg-Al: (embedding-net size: [25,50,100])
 
-Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---
-[240,240,240]  | 3.9606e-03  | 1.6289e-02
-[200,200,200]  | 3.9449e-03  | 1.6471e-02
-[160,160,160]  | 4.0947e-03  | 1.6413e-02
-[120,120,120]  | 3.9234e-03  | 1.6283e-02
-[80,80,80]     | 3.9758e-03  | 1.6506e-02
-[40,40,40]     | 3.9142e-03  | 1.6348e-02
-[20,20,20]     | 4.1302e-03  | 1.7006e-02
-[10,10,10]     | 4.3433e-03  | 1.7524e-02
-[5,5,5]        | 5.3154e-03  | 1.9716e-02
-[4,4,4]        | 5.4210e-03  | 1.9710e-02
-[2,2,2]        | 6.2667e-03  | 2.2568e-02
-[1,1,1]        | 7.3676e-03  | 2.6375e-02
-[]             | 7.3999e-03  | 2.6097e-02
+| Fitting-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ---------------- | ----------------------- | ------------------------ |
+| [240,240,240]    | 3.9606e-03              | 1.6289e-02               |
+| [200,200,200]    | 3.9449e-03              | 1.6471e-02               |
+| [160,160,160]    | 4.0947e-03              | 1.6413e-02               |
+| [120,120,120]    | 3.9234e-03              | 1.6283e-02               |
+| [80,80,80]       | 3.9758e-03              | 1.6506e-02               |
+| [40,40,40]       | 3.9142e-03              | 1.6348e-02               |
+| [20,20,20]       | 4.1302e-03              | 1.7006e-02               |
+| [10,10,10]       | 4.3433e-03              | 1.7524e-02               |
+| [5,5,5]          | 5.3154e-03              | 1.9716e-02               |
+| [4,4,4]          | 5.4210e-03              | 1.9710e-02               |
+| [2,2,2]          | 6.2667e-03              | 2.2568e-02               |
+| [1,1,1]          | 7.3676e-03              | 2.6375e-02               |
+| []               | 7.3999e-03              | 2.6097e-02               |
 
 ### Embedding net size tuning form on Mg-Al: (Fitting-net size: [240,240,240])
 
-Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
----|---|---
-[25,50,100]  | 3.9606e-03  | 1.6289e-02
-[20,40,80]   | 4.0292e-03  | 1.6555e-02
-[15,30,60]   | 4.1743e-03  | 1.7026e-02
-[10,20,40]   | 4.8138e-03  | 1.8516e-02
-[5,10,20]    | 5.6052e-03  | 2.0709e-02
-[4,8,16]     | 6.1335e-03  | 2.1450e-02
-[3,6,12]     | 6.6469e-03  | 2.3003e-02
-[2,4,8]      | 6.8222e-03  | 2.6318e-02
-[1,2,4]      | 1.0678e-02  | 3.9559e-02
+| Embedding-net size | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom) |
+| ------------------ | ----------------------- | ------------------------ |
+| [25,50,100]        | 3.9606e-03              | 1.6289e-02               |
+| [20,40,80]         | 4.0292e-03              | 1.6555e-02               |
+| [15,30,60]         | 4.1743e-03              | 1.7026e-02               |
+| [10,20,40]         | 4.8138e-03              | 1.8516e-02               |
+| [5,10,20]          | 5.6052e-03              | 2.0709e-02               |
+| [4,8,16]           | 6.1335e-03              | 2.1450e-02               |
+| [3,6,12]           | 6.6469e-03              | 2.3003e-02               |
+| [2,4,8]            | 6.8222e-03              | 2.6318e-02               |
+| [1,2,4]            | 1.0678e-02              | 3.9559e-02               |
diff --git a/doc/troubleshooting/howtoset_num_nodes.md b/doc/troubleshooting/howtoset_num_nodes.md
index 8a9beab857..532fa39e66 100644
--- a/doc/troubleshooting/howtoset_num_nodes.md
+++ b/doc/troubleshooting/howtoset_num_nodes.md
@@ -13,19 +13,21 @@ To enable MPI support for training, one should [install horovod](../install/inst
 MPI support for inference is not directly supported by DeePMD-kit, but indirectly supported by the third-party software. For example, [LAMMPS enables running simulations in parallel](https://docs.lammps.org/Developer_parallel.html) using the MPI parallel communication standard with distributed data. That software has to build against MPI.
 
 Set the number of processes with:
+
 ```bash
 mpirun -np $num_nodes dp
 ```
+
 Note that `mpirun` here should be the same as the MPI used to build software. For example, one can use `mpirun --version` and `lmp -h` to see if `mpirun` and LAMMPS has the same MPI version.
 
 Sometimes, `$num_nodes` and the nodes information can be directly given by the HPC scheduler system, if the MPI used here is the same as the MPI used to build the scheduler system. Otherwise, one have to manually assign these information.
 
 ## Parallelism between independent operators
 
-For CPU devices, TensorFlow use multiple streams to run independent operators (OP).
+For CPU devices, TensorFlow and PyTorch use multiple streams to run independent operators (OP).
 
 ```bash
-export TF_INTER_OP_PARALLELISM_THREADS=3
+export DP_INTER_OP_PARALLELISM_THREADS=3
 ```
 
 However, for GPU devices, TensorFlow uses only one compute stream and multiple copy streams.
@@ -33,20 +35,35 @@ Note that some of DeePMD-kit OPs do not have GPU support, so it is still encoura
 
 ## Parallelism within an individual operators
 
-For CPU devices, `TF_INTRA_OP_PARALLELISM_THREADS` controls parallelism within TensorFlow native OPs when TensorFlow is built against Eigen.
+For CPU devices, `DP_INTRA_OP_PARALLELISM_THREADS` controls parallelism within TensorFlow (when TensorFlow is built against Eigen) and PyTorch native OPs.
 
 ```bash
-export TF_INTRA_OP_PARALLELISM_THREADS=2
+export DP_INTRA_OP_PARALLELISM_THREADS=2
 ```
 
-`OMP_NUM_THREADS` is threads for OpenMP parallelism. It controls parallelism within TensorFlow native OPs when TensorFlow is built by Intel OneDNN and DeePMD-kit custom CPU OPs.
-It may also control parallelsim for NumPy when NumPy is built against OpenMP, so one who uses GPUs for training should also care this environmental variable.
+`OMP_NUM_THREADS` is the number of threads for OpenMP parallelism.
+It controls parallelism within TensorFlow (when TensorFlow is built upon Intel OneDNN) and PyTorch (when PyTorch is built upon OpenMP) native OPs and DeePMD-kit custom CPU OPs.
+It may also control parallelism for NumPy when NumPy is built against OpenMP, so one who uses GPUs for training should also care this environmental variable.
 
 ```bash
 export OMP_NUM_THREADS=2
 ```
 
-There are several other environmental variables for OpenMP, such as `KMP_BLOCKTIME`. See [Intel documentation](https://www.intel.com/content/www/us/en/developer/articles/technical/maximize-tensorflow-performance-on-cpu-considerations-and-recommendations-for-inference.html) for detailed information.
+There are several other environmental variables for OpenMP, such as `KMP_BLOCKTIME`.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+See [Intel documentation](https://www.intel.com/content/www/us/en/developer/articles/technical/maximize-tensorflow-performance-on-cpu-considerations-and-recommendations-for-inference.html) for detailed information.
+
+:::
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+See [PyTorch documentation](https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html) for detailed information.
+
+:::
+::::
 
 ## Tune the performance
 
@@ -54,10 +71,11 @@ There is no one general parallel configuration that works for all situations, so
 
 Here are some empirical examples.
 If you wish to use 3 cores of 2 CPUs on one node, you may set the environmental variables and run DeePMD-kit as follows:
+
 ```bash
 export OMP_NUM_THREADS=3
-export TF_INTRA_OP_PARALLELISM_THREADS=3
-export TF_INTER_OP_PARALLELISM_THREADS=2
+export DP_INTRA_OP_PARALLELISM_THREADS=3
+export DP_INTER_OP_PARALLELISM_THREADS=2
 dp train input.json
 ```
 
@@ -65,8 +83,8 @@ For a node with 128 cores, it is recommended to start with the following variabl
 
 ```bash
 export OMP_NUM_THREADS=16
-export TF_INTRA_OP_PARALLELISM_THREADS=16
-export TF_INTER_OP_PARALLELISM_THREADS=8
+export DP_INTRA_OP_PARALLELISM_THREADS=16
+export DP_INTER_OP_PARALLELISM_THREADS=8
 ```
 
 Again, in general, one should make sure the product of the parallel numbers is less than or equal to the number of cores available.
diff --git a/doc/troubleshooting/index.md b/doc/troubleshooting/index.md
deleted file mode 100644
index a77d058811..0000000000
--- a/doc/troubleshooting/index.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# FAQs
-
-As a consequence of differences in computers or systems, problems may occur. Some common circumstances are listed as follows.
-In addition, some frequently asked questions are listed as follows.
-If other unexpected problems occur, you’re welcome to contact us for help.
-
-- [Model compatibility](model-compatability.md)
-- [Installation](installation.md)
-- [The temperature undulates violently during the early stages of MD](md-energy-undulation.md)
-- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](md-version-compatibility.md)
-- [Do we need to set rcut < half boxsize?](howtoset-rcut.md)
-- [How to set sel?](howtoset-sel.md)
-- [How to control the parallelism of a job?](howtoset_num_nodes.md)
-- [How to tune Fitting/embedding-net size?](howtoset_netsize.md)
-- [Why does a model have low precision?](precision.md)
diff --git a/doc/troubleshooting/installation.md b/doc/troubleshooting/installation.md
index bd52f88d80..1d18cc648b 100644
--- a/doc/troubleshooting/installation.md
+++ b/doc/troubleshooting/installation.md
@@ -1,15 +1,21 @@
 # Installation
+
 ## Inadequate versions of gcc/g++
+
 Sometimes you may use a gcc/g++ of version < 4.8. In this way, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit, but i-Pi and GROMACS plugins will be disabled automatically. Or if you have a gcc/g++ of version > 4.8, say, 7.2.0, you may choose to use it by doing
+
 ```bash
 export CC=/path/to/gcc-7.2.0/bin/gcc
 export CXX=/path/to/gcc-7.2.0/bin/g++
 ```
 
 ## Build files left in DeePMD-kit
+
 When you try to build a second time when installing DeePMD-kit, files produced before may contribute to failure. Thus, you may clear them by
+
 ```bash
 cd build
 rm -r *
 ```
+
 and redo the `cmake` process.
diff --git a/doc/troubleshooting/md-version-compatibility.md b/doc/troubleshooting/md-version-compatibility.md
index 631cab92ea..25c1860ae2 100644
--- a/doc/troubleshooting/md-version-compatibility.md
+++ b/doc/troubleshooting/md-version-compatibility.md
@@ -1,10 +1,13 @@
 # MD: cannot run LAMMPS after installing a new version of DeePMD-kit
+
 This typically happens when you install a new version of DeePMD-kit and copy directly the generated `USER-DEEPMD` to a LAMMPS source code folder and re-install LAMMPS.
 
 To solve this problem, it suffices to first remove `USER-DEEPMD` from the LAMMPS source code by
+
 ```bash
 make no-user-deepmd
 ```
+
 and then install the new `USER-DEEPMD`.
 
 If this does not solve your problem, try to decompress the LAMMPS source tarball and install LAMMPS from scratch again, which typically should be very fast.
diff --git a/doc/troubleshooting/model-compatability.md b/doc/troubleshooting/model-compatability.md
index faab447da9..867a9aaf88 100644
--- a/doc/troubleshooting/model-compatability.md
+++ b/doc/troubleshooting/model-compatability.md
@@ -7,10 +7,11 @@ DeePMD-kit guarantees that the codes with the same major and minor revisions are
 One can execute `dp convert-from` to convert an old model to a new one.
 
 | Model version | v0.12 | v1.0 | v1.1 | v1.2 | v1.3 | v2.0 | v2.1 | v2.2 |
-|:-:|:-----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
-| Compatibility  | 😊 | 😊 | 😊 | 😊 | 😊 | 😄 | 😄 | 😄 |
+| :-----------: | :---: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |
+| Compatibility |  😊   |  😊  |  😊  |  😊  |  😊  |  😄  |  😄  |  😄  |
 
 **Legend**:
+
 - 😄: The model is compatible with the DeePMD-kit package.
 - 😊: The model is incompatible with the DeePMD-kit package, but one can execute `dp convert-from` to convert an old model to v2.2.
 - 😢: The model is incompatible with the DeePMD-kit package, and there is no way to convert models.
diff --git a/doc/troubleshooting/precision.md b/doc/troubleshooting/precision.md
index 1b162d141c..56dbd51958 100644
--- a/doc/troubleshooting/precision.md
+++ b/doc/troubleshooting/precision.md
@@ -20,6 +20,7 @@ It is neccessary to check them carefully to avoid inconsistent data.
 
 The accuracy of models will not exceed the accuracy of training data, so the training data should reach enough accuracy.
 Here is a checklist for the accuracy of data:
+
 - SCF should converge to a suitable threshold for all points in the training data.
 - The convergence of the energy, force and virial with respect to the energy cutoff and k-spacing sample is checked.
 - Sometimes, QM software may generate unstable outliers, which should be removed.
diff --git a/examples/dos/data/heat-221-reformat/atomic_system/set.000/atom_dos.npy b/examples/dos/data/heat-221-reformat/atomic_system/set.000/atom_dos.npy
new file mode 100644
index 0000000000..22809c1068
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/atomic_system/set.000/atom_dos.npy differ
diff --git a/examples/dos/data/heat-221-reformat/atomic_system/set.000/box.npy b/examples/dos/data/heat-221-reformat/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/atomic_system/set.000/box.npy differ
diff --git a/examples/dos/data/heat-221-reformat/atomic_system/set.000/coord.npy b/examples/dos/data/heat-221-reformat/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/atomic_system/set.000/coord.npy differ
diff --git a/examples/dos/data/heat-221-reformat/atomic_system/type.raw b/examples/dos/data/heat-221-reformat/atomic_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/examples/dos/data/heat-221-reformat/atomic_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/examples/dos/data/heat-221-reformat/atomic_system/type_map.raw b/examples/dos/data/heat-221-reformat/atomic_system/type_map.raw
new file mode 100644
index 0000000000..e267321d2c
--- /dev/null
+++ b/examples/dos/data/heat-221-reformat/atomic_system/type_map.raw
@@ -0,0 +1 @@
+Si
diff --git a/examples/dos/data/heat-221-reformat/global_system/set.000/box.npy b/examples/dos/data/heat-221-reformat/global_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/global_system/set.000/box.npy differ
diff --git a/examples/dos/data/heat-221-reformat/global_system/set.000/coord.npy b/examples/dos/data/heat-221-reformat/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/global_system/set.000/coord.npy differ
diff --git a/examples/dos/data/heat-221-reformat/global_system/set.000/dos.npy b/examples/dos/data/heat-221-reformat/global_system/set.000/dos.npy
new file mode 100644
index 0000000000..904b23e709
Binary files /dev/null and b/examples/dos/data/heat-221-reformat/global_system/set.000/dos.npy differ
diff --git a/examples/dos/data/heat-221-reformat/global_system/type.raw b/examples/dos/data/heat-221-reformat/global_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/examples/dos/data/heat-221-reformat/global_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/examples/dos/data/heat-221-reformat/global_system/type_map.raw b/examples/dos/data/heat-221-reformat/global_system/type_map.raw
new file mode 100644
index 0000000000..e267321d2c
--- /dev/null
+++ b/examples/dos/data/heat-221-reformat/global_system/type_map.raw
@@ -0,0 +1 @@
+Si
diff --git a/examples/dos/train/input.json b/examples/dos/train/input.json
index f2094c18a6..327a9c3aff 100644
--- a/examples/dos/train/input.json
+++ b/examples/dos/train/input.json
@@ -17,6 +17,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 8,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1
     },
diff --git a/examples/dos/train/input_torch.json b/examples/dos/train/input_torch.json
new file mode 100644
index 0000000000..99bc106e7d
--- /dev/null
+++ b/examples/dos/train/input_torch.json
@@ -0,0 +1,75 @@
+{
+  "model": {
+    "type_map": [
+      "Si"
+    ],
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        90
+      ],
+      "rcut_smth": 1.8,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1
+    },
+    "fitting_net": {
+      "type": "dos",
+      "numb_dos": 250,
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 0,
+      "precision": "float64",
+      "seed": 1
+    }
+  },
+  "loss": {
+    "type": "dos",
+    "start_pref_dos": 0.0,
+    "limit_pref_dos": 0.0,
+    "start_pref_cdf": 0.0,
+    "limit_pref_cdf": 0.0,
+    "start_pref_ados": 1.0,
+    "limit_pref_ados": 1.0,
+    "start_pref_acdf": 0.0,
+    "limit_pref_acdf": 0.0
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "stop_lr": 1e-08
+  },
+  "training": {
+    "stop_batch": 100000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json",
+    "training_data": {
+      "systems": [
+        "../data/heat-221-reformat/atomic_system/",
+        "../data/heat-221-reformat/global_system/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
+    }
+  },
+  "_comment1": "that's all"
+}
diff --git a/examples/fparam/train/input.json b/examples/fparam/train/input.json
index a81051f459..1bb9b55f95 100644
--- a/examples/fparam/train/input.json
+++ b/examples/fparam/train/input.json
@@ -1,6 +1,9 @@
 {
   "_comment1": " model parameters",
   "model": {
+    "type_map": [
+      "Be"
+    ],
     "data_stat_nbatch": 1,
     "descriptor": {
       "type": "se_a",
@@ -16,6 +19,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 8,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1
     },
diff --git a/examples/fparam/train/input_aparam.json b/examples/fparam/train/input_aparam.json
index fdc53706b9..93a34f7305 100644
--- a/examples/fparam/train/input_aparam.json
+++ b/examples/fparam/train/input_aparam.json
@@ -16,6 +16,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 8,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1
     },
diff --git a/examples/infer_water/infer_water.c b/examples/infer_water/infer_water.c
index f4eeae147f..cf13f45e3a 100644
--- a/examples/infer_water/infer_water.c
+++ b/examples/infer_water/infer_water.c
@@ -32,5 +32,5 @@ int main() {
   free(v);
   free(ae);
   free(av);
-  free(dp);
+  DP_DeleteDeepPot(dp);
 }
diff --git a/examples/nopbc/train/input.json b/examples/nopbc/train/input.json
index 2c33791d45..491a7e1476 100644
--- a/examples/nopbc/train/input.json
+++ b/examples/nopbc/train/input.json
@@ -22,6 +22,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 12,
+      "type_one_side": true,
       "seed": 1,
       "_comment2": " that's all"
     },
diff --git a/examples/spin/data_reformat/data_0/set.000/box.npy b/examples/spin/data_reformat/data_0/set.000/box.npy
new file mode 100644
index 0000000000..1f72eb7185
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/box.npy differ
diff --git a/examples/spin/data_reformat/data_0/set.000/coord.npy b/examples/spin/data_reformat/data_0/set.000/coord.npy
new file mode 100644
index 0000000000..4b60ae0e0b
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/coord.npy differ
diff --git a/examples/spin/data_reformat/data_0/set.000/energy.npy b/examples/spin/data_reformat/data_0/set.000/energy.npy
new file mode 100644
index 0000000000..8754b6dad2
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/energy.npy differ
diff --git a/examples/spin/data_reformat/data_0/set.000/force.npy b/examples/spin/data_reformat/data_0/set.000/force.npy
new file mode 100644
index 0000000000..e95173d561
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/force.npy differ
diff --git a/examples/spin/data_reformat/data_0/set.000/force_mag.npy b/examples/spin/data_reformat/data_0/set.000/force_mag.npy
new file mode 100644
index 0000000000..65bc1ef837
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/force_mag.npy differ
diff --git a/examples/spin/data_reformat/data_0/set.000/spin.npy b/examples/spin/data_reformat/data_0/set.000/spin.npy
new file mode 100644
index 0000000000..c426f1c7f6
Binary files /dev/null and b/examples/spin/data_reformat/data_0/set.000/spin.npy differ
diff --git a/examples/spin/data_reformat/data_0/type.raw b/examples/spin/data_reformat/data_0/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/examples/spin/data_reformat/data_0/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/examples/spin/data_reformat/data_0/type_map.raw b/examples/spin/data_reformat/data_0/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/examples/spin/data_reformat/data_0/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/examples/spin/data_reformat/data_1/set.000/box.npy b/examples/spin/data_reformat/data_1/set.000/box.npy
new file mode 100644
index 0000000000..1f72eb7185
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/box.npy differ
diff --git a/examples/spin/data_reformat/data_1/set.000/coord.npy b/examples/spin/data_reformat/data_1/set.000/coord.npy
new file mode 100644
index 0000000000..fc51107998
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/coord.npy differ
diff --git a/examples/spin/data_reformat/data_1/set.000/energy.npy b/examples/spin/data_reformat/data_1/set.000/energy.npy
new file mode 100644
index 0000000000..a0eecad8d8
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/energy.npy differ
diff --git a/examples/spin/data_reformat/data_1/set.000/force.npy b/examples/spin/data_reformat/data_1/set.000/force.npy
new file mode 100644
index 0000000000..ec4a05f8f2
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/force.npy differ
diff --git a/examples/spin/data_reformat/data_1/set.000/force_mag.npy b/examples/spin/data_reformat/data_1/set.000/force_mag.npy
new file mode 100644
index 0000000000..844df39b76
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/force_mag.npy differ
diff --git a/examples/spin/data_reformat/data_1/set.000/spin.npy b/examples/spin/data_reformat/data_1/set.000/spin.npy
new file mode 100644
index 0000000000..1444e35c5f
Binary files /dev/null and b/examples/spin/data_reformat/data_1/set.000/spin.npy differ
diff --git a/examples/spin/data_reformat/data_1/type.raw b/examples/spin/data_reformat/data_1/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/examples/spin/data_reformat/data_1/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/examples/spin/data_reformat/data_1/type_map.raw b/examples/spin/data_reformat/data_1/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/examples/spin/data_reformat/data_1/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/examples/spin/data_reformat/data_2/set.000/box.npy b/examples/spin/data_reformat/data_2/set.000/box.npy
new file mode 100644
index 0000000000..4e817ccff5
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/box.npy differ
diff --git a/examples/spin/data_reformat/data_2/set.000/coord.npy b/examples/spin/data_reformat/data_2/set.000/coord.npy
new file mode 100644
index 0000000000..aa515d0b6e
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/coord.npy differ
diff --git a/examples/spin/data_reformat/data_2/set.000/energy.npy b/examples/spin/data_reformat/data_2/set.000/energy.npy
new file mode 100644
index 0000000000..cd4efe3b55
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/energy.npy differ
diff --git a/examples/spin/data_reformat/data_2/set.000/force.npy b/examples/spin/data_reformat/data_2/set.000/force.npy
new file mode 100644
index 0000000000..5cf07333e0
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/force.npy differ
diff --git a/examples/spin/data_reformat/data_2/set.000/force_mag.npy b/examples/spin/data_reformat/data_2/set.000/force_mag.npy
new file mode 100644
index 0000000000..14b73ffb54
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/force_mag.npy differ
diff --git a/examples/spin/data_reformat/data_2/set.000/spin.npy b/examples/spin/data_reformat/data_2/set.000/spin.npy
new file mode 100644
index 0000000000..4bd1396c7d
Binary files /dev/null and b/examples/spin/data_reformat/data_2/set.000/spin.npy differ
diff --git a/examples/spin/data_reformat/data_2/type.raw b/examples/spin/data_reformat/data_2/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/examples/spin/data_reformat/data_2/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/examples/spin/data_reformat/data_2/type_map.raw b/examples/spin/data_reformat/data_2/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/examples/spin/data_reformat/data_2/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/examples/spin/se_e2_a/input.json b/examples/spin/se_e2_a/input_tf.json
similarity index 98%
rename from examples/spin/se_e2_a/input.json
rename to examples/spin/se_e2_a/input_tf.json
index f9e0988163..8d124d1fc4 100644
--- a/examples/spin/se_e2_a/input.json
+++ b/examples/spin/se_e2_a/input_tf.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/spin/se_e2_a/input_torch.json b/examples/spin/se_e2_a/input_torch.json
new file mode 100644
index 0000000000..37859b8402
--- /dev/null
+++ b/examples/spin/se_e2_a/input_torch.json
@@ -0,0 +1,90 @@
+{
+  "model": {
+    "type_map": [
+      "Ni",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        60,
+        60
+      ],
+      "rcut_smth": 5.4,
+      "rcut": 5.6,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 10,
+    "spin": {
+      "use_spin": [
+        true,
+        false
+      ],
+      "virtual_scale": [
+        0.3140
+      ],
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener_spin",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_fr": 1000,
+    "limit_pref_fr": 1,
+    "start_pref_fm": 1000,
+    "limit_pref_fm": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data_reformat/data_0",
+        "../data_reformat/data_1"
+      ],
+      "batch_size": 3,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data_reformat/data_2"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 100000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 10000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/examples/water/d3/input.json b/examples/water/d3/input.json
index bbe7a2c8a9..e811920f5b 100644
--- a/examples/water/d3/input.json
+++ b/examples/water/d3/input.json
@@ -24,6 +24,7 @@
           ],
           "resnet_dt": false,
           "axis_neuron": 16,
+          "type_one_side": true,
           "precision": "float64",
           "seed": 1,
           "_comment2": " that's all"
diff --git a/examples/water/dpa2/input_torch.json b/examples/water/dpa2/input_torch.json
new file mode 100644
index 0000000000..108e75df62
--- /dev/null
+++ b/examples/water/dpa2/input_torch.json
@@ -0,0 +1,98 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "dpa2",
+      "tebd_dim": 8,
+      "repinit_rcut": 9.0,
+      "repinit_rcut_smth": 8.0,
+      "repinit_nsel": 120,
+      "repformer_rcut": 4.0,
+      "repformer_rcut_smth": 3.5,
+      "repformer_nsel": 40,
+      "repinit_neuron": [
+        25,
+        50,
+        100
+      ],
+      "repinit_axis_neuron": 12,
+      "repinit_activation": "tanh",
+      "repformer_nlayers": 12,
+      "repformer_g1_dim": 128,
+      "repformer_g2_dim": 32,
+      "repformer_attn2_hidden": 32,
+      "repformer_attn2_nhead": 4,
+      "repformer_attn1_hidden": 128,
+      "repformer_attn1_nhead": 4,
+      "repformer_axis_dim": 4,
+      "repformer_update_h2": false,
+      "repformer_update_g1_has_conv": true,
+      "repformer_update_g1_has_grrg": true,
+      "repformer_update_g1_has_drrd": true,
+      "repformer_update_g1_has_attn": true,
+      "repformer_update_g2_has_g1g1": true,
+      "repformer_update_g2_has_attn": true,
+      "repformer_attn2_has_gate": true,
+      "repformer_add_type_ebd_to_seq": false
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./dpa2",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 2000,
+    "_comment": "that's all"
+  }
+}
diff --git a/examples/water/dplr/train/dw.json b/examples/water/dplr/train/dw.json
index 401e6272f5..038e07abef 100644
--- a/examples/water/dplr/train/dw.json
+++ b/examples/water/dplr/train/dw.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 8,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/water/dplr/train/ener.json b/examples/water/dplr/train/ener.json
index 7b47bfda55..809f1a5ece 100644
--- a/examples/water/dplr/train/ener.json
+++ b/examples/water/dplr/train/ener.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 8,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 3458359619,
       "_comment2": " that's all"
diff --git a/examples/water/hybrid/input.json b/examples/water/hybrid/input.json
index 2315d26444..dd29c15d9d 100644
--- a/examples/water/hybrid/input.json
+++ b/examples/water/hybrid/input.json
@@ -23,6 +23,7 @@
           ],
           "resnet_dt": false,
           "axis_neuron": 4,
+          "type_one_side": true,
           "precision": "float64",
           "seed": 1,
           "_comment2": " that's all"
@@ -41,6 +42,7 @@
             20
           ],
           "resnet_dt": false,
+          "type_one_side": true,
           "precision": "float64",
           "seed": 1,
           "_comment3": " that's all"
diff --git a/examples/water/se_atten/input_torch.json b/examples/water/se_atten/input_torch.json
new file mode 100644
index 0000000000..7e9cf06f35
--- /dev/null
+++ b/examples/water/se_atten/input_torch.json
@@ -0,0 +1,89 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "dpa1",
+      "sel": 120,
+      "rcut_smth": 0.5,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "tebd_dim": 8,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "attn": 128,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false,
+      "post_ln": true,
+      "ffn": false,
+      "ffn_embed_dim": 1024,
+      "activation_function": "tanh",
+      "scaling_factor": 1.0,
+      "head_num": 1,
+      "normalize": true,
+      "temperature": 1.0
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./dpa1",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  }
+}
diff --git a/examples/water/se_e2_a/input.json b/examples/water/se_e2_a/input.json
index 46c38ba834..0a24d11549 100644
--- a/examples/water/se_e2_a/input.json
+++ b/examples/water/se_e2_a/input.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/water/se_e2_a/input_torch.json b/examples/water/se_e2_a/input_torch.json
new file mode 100644
index 0000000000..fe424afed3
--- /dev/null
+++ b/examples/water/se_e2_a/input_torch.json
@@ -0,0 +1,81 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./se_e2_a",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 100000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 10000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/examples/water/se_e2_a_mixed_prec/input.json b/examples/water/se_e2_a_mixed_prec/input.json
index 0382b80b30..f1993f6cc0 100644
--- a/examples/water/se_e2_a_mixed_prec/input.json
+++ b/examples/water/se_e2_a_mixed_prec/input.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "seed": 1,
       "_comment2": " that's all"
     },
diff --git a/examples/water/se_e2_r/input.json b/examples/water/se_e2_r/input.json
index 7fdd1835c6..783b4c7bdb 100644
--- a/examples/water/se_e2_r/input.json
+++ b/examples/water/se_e2_r/input.json
@@ -19,6 +19,7 @@
         20
       ],
       "resnet_dt": false,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/water/zbl/input.json b/examples/water/zbl/input.json
index 180a6cc8b5..cb5602d92d 100644
--- a/examples/water/zbl/input.json
+++ b/examples/water/zbl/input.json
@@ -24,6 +24,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/water_multi_task/ener_dipole/input.json b/examples/water_multi_task/ener_dipole/input.json
index 9d00adac2e..45b49c5d90 100644
--- a/examples/water_multi_task/ener_dipole/input.json
+++ b/examples/water_multi_task/ener_dipole/input.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/examples/water_multi_task/pytorch_example/input_torch.json b/examples/water_multi_task/pytorch_example/input_torch.json
new file mode 100644
index 0000000000..801848f077
--- /dev/null
+++ b/examples/water_multi_task/pytorch_example/input_torch.json
@@ -0,0 +1,133 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "shared_dict": {
+      "type_map_all": [
+        "O",
+        "H"
+      ],
+      "sea_descriptor_1": {
+        "type": "se_e2_a",
+        "sel": [
+          46,
+          92
+        ],
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "type_one_side": true,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "_comment": "that's all"
+    },
+    "model_dict": {
+      "water_1": {
+        "type_map": "type_map_all",
+        "descriptor": "sea_descriptor_1",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      },
+      "water_2": {
+        "type_map": "type_map_all",
+        "descriptor": "sea_descriptor_1",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "decay_rate": 0.98,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "_comment": " that's all",
+    "water_1": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    },
+    "water_2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
+  "training": {
+    "model_prob": {
+      "water_1": 0.5,
+      "water_2": 0.5
+    },
+    "data_dict": {
+      "water_1": {
+        "training_data": {
+          "systems": [
+            "../../water/data/data_0/",
+            "../../water/data/data_1/",
+            "../../water/data/data_2/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "../../water/data/data_3/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      },
+      "water_2": {
+        "training_data": {
+          "systems": [
+            "../../water/data/data_0/",
+            "../../water/data/data_1/",
+            "../../water/data/data_2/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      }
+    },
+    "numb_steps": 100000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 100,
+    "_comment": "that's all"
+  }
+}
diff --git a/examples/water_tensor/dipole/dipole_input.json b/examples/water_tensor/dipole/dipole_input.json
index b42b9b8465..3feb1fbbc0 100644
--- a/examples/water_tensor/dipole/dipole_input.json
+++ b/examples/water_tensor/dipole/dipole_input.json
@@ -20,6 +20,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 6,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
@@ -57,16 +58,16 @@
   "training": {
     "training_data": {
       "systems": [
-        "./training_data/atomic_system",
-        "./training_data/global_system"
+        "./training_data_reformat/atomic_system",
+        "./training_data_reformat/global_system"
       ],
       "batch_size": "auto",
       "_comment8": "that's all"
     },
     "validation_data": {
       "systems": [
-        "./validation_data/atomic_system",
-        "./validation_data/global_system"
+        "./validation_data_reformat/atomic_system",
+        "./validation_data_reformat/global_system"
       ],
       "batch_size": 1,
       "numb_btch": 3,
diff --git a/examples/water_tensor/dipole/dipole_input_torch.json b/examples/water_tensor/dipole/dipole_input_torch.json
new file mode 100644
index 0000000000..f6903d3334
--- /dev/null
+++ b/examples/water_tensor/dipole/dipole_input_torch.json
@@ -0,0 +1,84 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "atom_exclude_types": [
+      1
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 3.80,
+      "rcut": 4.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 6,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "type": "dipole",
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.01,
+    "decay_steps": 5000,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment6": " that's all"
+  },
+  "_comment7": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "./training_data_reformat/atomic_system",
+        "./training_data_reformat/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment8": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "./validation_data_reformat/atomic_system",
+        "./validation_data_reformat/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment9": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment10": "that's all"
+  },
+  "_comment11": "that's all"
+}
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/set.000/atomic_dipole.npy b/examples/water_tensor/dipole/training_data/atomic_system/set.000/atomic_dipole.npy
deleted file mode 100644
index f88508c839..0000000000
Binary files a/examples/water_tensor/dipole/training_data/atomic_system/set.000/atomic_dipole.npy and /dev/null differ
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/nopbc b/examples/water_tensor/dipole/training_data_reformat/atomic_system/nopbc
similarity index 100%
rename from examples/water_tensor/dipole/training_data/atomic_system/nopbc
rename to examples/water_tensor/dipole/training_data_reformat/atomic_system/nopbc
diff --git a/examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/atomic_dipole.npy b/examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/atomic_dipole.npy
new file mode 100644
index 0000000000..2cabc71e21
Binary files /dev/null and b/examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/atomic_dipole.npy differ
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/set.000/box.npy b/examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/dipole/training_data/atomic_system/set.000/box.npy
rename to examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/box.npy
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/set.000/coord.npy b/examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/dipole/training_data/atomic_system/set.000/coord.npy
rename to examples/water_tensor/dipole/training_data_reformat/atomic_system/set.000/coord.npy
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/type.raw b/examples/water_tensor/dipole/training_data_reformat/atomic_system/type.raw
similarity index 100%
rename from examples/water_tensor/dipole/training_data/atomic_system/type.raw
rename to examples/water_tensor/dipole/training_data_reformat/atomic_system/type.raw
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/type_map.raw b/examples/water_tensor/dipole/training_data_reformat/atomic_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/dipole/training_data/atomic_system/type_map.raw
rename to examples/water_tensor/dipole/training_data_reformat/atomic_system/type_map.raw
diff --git a/examples/water_tensor/dipole/training_data/global_system/nopbc b/examples/water_tensor/dipole/training_data_reformat/global_system/nopbc
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/nopbc
rename to examples/water_tensor/dipole/training_data_reformat/global_system/nopbc
diff --git a/examples/water_tensor/dipole/training_data/global_system/set.000/box.npy b/examples/water_tensor/dipole/training_data_reformat/global_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/set.000/box.npy
rename to examples/water_tensor/dipole/training_data_reformat/global_system/set.000/box.npy
diff --git a/examples/water_tensor/dipole/training_data/global_system/set.000/coord.npy b/examples/water_tensor/dipole/training_data_reformat/global_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/set.000/coord.npy
rename to examples/water_tensor/dipole/training_data_reformat/global_system/set.000/coord.npy
diff --git a/examples/water_tensor/dipole/training_data/global_system/set.000/dipole.npy b/examples/water_tensor/dipole/training_data_reformat/global_system/set.000/dipole.npy
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/set.000/dipole.npy
rename to examples/water_tensor/dipole/training_data_reformat/global_system/set.000/dipole.npy
diff --git a/examples/water_tensor/dipole/training_data/global_system/type.raw b/examples/water_tensor/dipole/training_data_reformat/global_system/type.raw
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/type.raw
rename to examples/water_tensor/dipole/training_data_reformat/global_system/type.raw
diff --git a/examples/water_tensor/dipole/training_data/global_system/type_map.raw b/examples/water_tensor/dipole/training_data_reformat/global_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/dipole/training_data/global_system/type_map.raw
rename to examples/water_tensor/dipole/training_data_reformat/global_system/type_map.raw
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/set.000/atomic_dipole.npy b/examples/water_tensor/dipole/validation_data/atomic_system/set.000/atomic_dipole.npy
deleted file mode 100644
index ebb484d7ff..0000000000
Binary files a/examples/water_tensor/dipole/validation_data/atomic_system/set.000/atomic_dipole.npy and /dev/null differ
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/nopbc b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/nopbc
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/atomic_system/nopbc
rename to examples/water_tensor/dipole/validation_data_reformat/atomic_system/nopbc
diff --git a/examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/atomic_dipole.npy b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/atomic_dipole.npy
new file mode 100644
index 0000000000..3a59af8138
Binary files /dev/null and b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/atomic_dipole.npy differ
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/set.000/box.npy b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/atomic_system/set.000/box.npy
rename to examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/box.npy
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/set.000/coord.npy b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/atomic_system/set.000/coord.npy
rename to examples/water_tensor/dipole/validation_data_reformat/atomic_system/set.000/coord.npy
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/type.raw b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/type.raw
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/atomic_system/type.raw
rename to examples/water_tensor/dipole/validation_data_reformat/atomic_system/type.raw
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/type_map.raw b/examples/water_tensor/dipole/validation_data_reformat/atomic_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/atomic_system/type_map.raw
rename to examples/water_tensor/dipole/validation_data_reformat/atomic_system/type_map.raw
diff --git a/examples/water_tensor/dipole/validation_data/global_system/nopbc b/examples/water_tensor/dipole/validation_data_reformat/global_system/nopbc
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/nopbc
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/nopbc
diff --git a/examples/water_tensor/dipole/validation_data/global_system/set.000/box.npy b/examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/set.000/box.npy
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/box.npy
diff --git a/examples/water_tensor/dipole/validation_data/global_system/set.000/coord.npy b/examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/set.000/coord.npy
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/coord.npy
diff --git a/examples/water_tensor/dipole/validation_data/global_system/set.000/dipole.npy b/examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/dipole.npy
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/set.000/dipole.npy
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/set.000/dipole.npy
diff --git a/examples/water_tensor/dipole/validation_data/global_system/type.raw b/examples/water_tensor/dipole/validation_data_reformat/global_system/type.raw
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/type.raw
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/type.raw
diff --git a/examples/water_tensor/dipole/validation_data/global_system/type_map.raw b/examples/water_tensor/dipole/validation_data_reformat/global_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/dipole/validation_data/global_system/type_map.raw
rename to examples/water_tensor/dipole/validation_data_reformat/global_system/type_map.raw
diff --git a/examples/water_tensor/polar/polar_input.json b/examples/water_tensor/polar/polar_input.json
index ca53182e79..66aea23a95 100644
--- a/examples/water_tensor/polar/polar_input.json
+++ b/examples/water_tensor/polar/polar_input.json
@@ -21,6 +21,7 @@
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
@@ -62,16 +63,16 @@
   "training": {
     "training_data": {
       "systems": [
-        "./training_data/atomic_system",
-        "./training_data/global_system"
+        "./training_data_reformat/atomic_system",
+        "./training_data_reformat/global_system"
       ],
       "batch_size": "auto",
       "_comment8": "that's all"
     },
     "validation_data": {
       "systems": [
-        "./validation_data/atomic_system",
-        "./validation_data/global_system"
+        "./validation_data_reformat/atomic_system",
+        "./validation_data_reformat/global_system"
       ],
       "batch_size": 1,
       "numb_btch": 3,
diff --git a/examples/water_tensor/polar/polar_input_torch.json b/examples/water_tensor/polar/polar_input_torch.json
new file mode 100644
index 0000000000..b0329ef609
--- /dev/null
+++ b/examples/water_tensor/polar/polar_input_torch.json
@@ -0,0 +1,90 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "atom_exclude_types": [
+      1
+    ],
+    "data_stat_nbatch": 10,
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "type": "polar",
+      "fit_diag": false,
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.01,
+    "stop_lr": 3.51e-7,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref_atomic": 1.0,
+    "pref": 1.0,
+    "_comment6": "that's all"
+  },
+
+  "_comment7": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "./training_data_reformat/atomic_system",
+        "./training_data_reformat/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment8": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "./validation_data_reformat/atomic_system",
+        "./validation_data_reformat/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment9": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment10": "that's all"
+  },
+
+  "_comment11": "that's all"
+}
diff --git a/examples/water_tensor/polar/training_data/atomic_system/set.000/atomic_polarizability.npy b/examples/water_tensor/polar/training_data/atomic_system/set.000/atomic_polarizability.npy
deleted file mode 100644
index 5d7f31e86d..0000000000
Binary files a/examples/water_tensor/polar/training_data/atomic_system/set.000/atomic_polarizability.npy and /dev/null differ
diff --git a/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/atomic_polarizability.npy b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/atomic_polarizability.npy
new file mode 100644
index 0000000000..2aa2cdd4f2
Binary files /dev/null and b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/atomic_polarizability.npy differ
diff --git a/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/box.npy b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..a0ce7ef9a7
Binary files /dev/null and b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/box.npy differ
diff --git a/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/coord.npy b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..baa2c0a7c3
Binary files /dev/null and b/examples/water_tensor/polar/training_data_reformat/atomic_system/set.000/coord.npy differ
diff --git a/examples/water_tensor/polar/training_data/atomic_system/type.raw b/examples/water_tensor/polar/training_data_reformat/atomic_system/type.raw
similarity index 100%
rename from examples/water_tensor/polar/training_data/atomic_system/type.raw
rename to examples/water_tensor/polar/training_data_reformat/atomic_system/type.raw
diff --git a/examples/water_tensor/polar/training_data/atomic_system/type_map.raw b/examples/water_tensor/polar/training_data_reformat/atomic_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/polar/training_data/atomic_system/type_map.raw
rename to examples/water_tensor/polar/training_data_reformat/atomic_system/type_map.raw
diff --git a/examples/water_tensor/polar/training_data/global_system/set.000/box.npy b/examples/water_tensor/polar/training_data_reformat/global_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/polar/training_data/global_system/set.000/box.npy
rename to examples/water_tensor/polar/training_data_reformat/global_system/set.000/box.npy
diff --git a/examples/water_tensor/polar/training_data/global_system/set.000/coord.npy b/examples/water_tensor/polar/training_data_reformat/global_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/polar/training_data/global_system/set.000/coord.npy
rename to examples/water_tensor/polar/training_data_reformat/global_system/set.000/coord.npy
diff --git a/examples/water_tensor/polar/training_data/global_system/set.000/polarizability.npy b/examples/water_tensor/polar/training_data_reformat/global_system/set.000/polarizability.npy
similarity index 100%
rename from examples/water_tensor/polar/training_data/global_system/set.000/polarizability.npy
rename to examples/water_tensor/polar/training_data_reformat/global_system/set.000/polarizability.npy
diff --git a/examples/water_tensor/polar/training_data/global_system/type.raw b/examples/water_tensor/polar/training_data_reformat/global_system/type.raw
similarity index 100%
rename from examples/water_tensor/polar/training_data/global_system/type.raw
rename to examples/water_tensor/polar/training_data_reformat/global_system/type.raw
diff --git a/examples/water_tensor/polar/training_data/global_system/type_map.raw b/examples/water_tensor/polar/training_data_reformat/global_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/polar/training_data/global_system/type_map.raw
rename to examples/water_tensor/polar/training_data_reformat/global_system/type_map.raw
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/set.000/atomic_polarizability.npy b/examples/water_tensor/polar/validation_data/atomic_system/set.000/atomic_polarizability.npy
deleted file mode 100644
index 4d0771e7e7..0000000000
Binary files a/examples/water_tensor/polar/validation_data/atomic_system/set.000/atomic_polarizability.npy and /dev/null differ
diff --git a/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/atomic_polarizability.npy b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/atomic_polarizability.npy
new file mode 100644
index 0000000000..4cccd9c81a
Binary files /dev/null and b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/atomic_polarizability.npy differ
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/set.000/box.npy b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/box.npy
similarity index 72%
rename from examples/water_tensor/polar/validation_data/atomic_system/set.000/box.npy
rename to examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/box.npy
index 382a14b7b6..a3809a0db5 100644
Binary files a/examples/water_tensor/polar/validation_data/atomic_system/set.000/box.npy and b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/box.npy differ
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/set.000/coord.npy b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/coord.npy
similarity index 74%
rename from examples/water_tensor/polar/validation_data/atomic_system/set.000/coord.npy
rename to examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/coord.npy
index 779b44bad5..4e18cab336 100644
Binary files a/examples/water_tensor/polar/validation_data/atomic_system/set.000/coord.npy and b/examples/water_tensor/polar/validation_data_reformat/atomic_system/set.000/coord.npy differ
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/type.raw b/examples/water_tensor/polar/validation_data_reformat/atomic_system/type.raw
similarity index 100%
rename from examples/water_tensor/polar/validation_data/atomic_system/type.raw
rename to examples/water_tensor/polar/validation_data_reformat/atomic_system/type.raw
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/type_map.raw b/examples/water_tensor/polar/validation_data_reformat/atomic_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/polar/validation_data/atomic_system/type_map.raw
rename to examples/water_tensor/polar/validation_data_reformat/atomic_system/type_map.raw
diff --git a/examples/water_tensor/polar/validation_data/global_system/set.000/box.npy b/examples/water_tensor/polar/validation_data_reformat/global_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/polar/validation_data/global_system/set.000/box.npy
rename to examples/water_tensor/polar/validation_data_reformat/global_system/set.000/box.npy
diff --git a/examples/water_tensor/polar/validation_data/global_system/set.000/coord.npy b/examples/water_tensor/polar/validation_data_reformat/global_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/polar/validation_data/global_system/set.000/coord.npy
rename to examples/water_tensor/polar/validation_data_reformat/global_system/set.000/coord.npy
diff --git a/examples/water_tensor/polar/validation_data/global_system/set.000/polarizability.npy b/examples/water_tensor/polar/validation_data_reformat/global_system/set.000/polarizability.npy
similarity index 100%
rename from examples/water_tensor/polar/validation_data/global_system/set.000/polarizability.npy
rename to examples/water_tensor/polar/validation_data_reformat/global_system/set.000/polarizability.npy
diff --git a/examples/water_tensor/polar/validation_data/global_system/type.raw b/examples/water_tensor/polar/validation_data_reformat/global_system/type.raw
similarity index 100%
rename from examples/water_tensor/polar/validation_data/global_system/type.raw
rename to examples/water_tensor/polar/validation_data_reformat/global_system/type.raw
diff --git a/examples/water_tensor/polar/validation_data/global_system/type_map.raw b/examples/water_tensor/polar/validation_data_reformat/global_system/type_map.raw
similarity index 100%
rename from examples/water_tensor/polar/validation_data/global_system/type_map.raw
rename to examples/water_tensor/polar/validation_data_reformat/global_system/type_map.raw
diff --git a/examples/zinc_protein/zinc_se_a_mask.json b/examples/zinc_protein/zinc_se_a_mask.json
index 04f63aa4ed..8d3c747e08 100644
--- a/examples/zinc_protein/zinc_se_a_mask.json
+++ b/examples/zinc_protein/zinc_se_a_mask.json
@@ -27,6 +27,7 @@
       ],
       "resnet_dt": true,
       "axis_neuron": 16,
+      "type_one_side": true,
       "precision": "float64",
       "seed": 1,
       "_comment2": " that's all"
diff --git a/pyproject.toml b/pyproject.toml
index e91fd320f3..2eb4c2f3c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,10 @@
 [build-system]
 requires = [
+    # TODO: unpin the upper bound when scikit-build dynamic metadata API is stable
     # dynamic metadata API is still unstable
-    # TODO: unpin the upper bound when it is stable
-    "scikit-build-core>=0.5,<0.8,!=0.6.0",
+    "scikit-build-core>=0.5,<0.9,!=0.6.0",
     "packaging",
+    'tomli >= 1.1.0 ; python_version < "3.11"',
 ]
 build-backend = "backend.dp_backend"
 backend-path = ["."]
@@ -27,7 +28,7 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
     "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.2",
     "Intended Audience :: Science/Research",
-    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
     "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Scientific/Engineering :: Physics",
@@ -39,27 +40,95 @@ dependencies = [
     'scipy',
     'pyyaml',
     'dargs >= 0.4.1',
-    'python-hostlist >= 1.21',
     'typing_extensions; python_version < "3.8"',
     'importlib_metadata>=1.4; python_version < "3.8"',
     'h5py',
     'wcmatch',
     'packaging',
+    'ml_dtypes',
 ]
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 keywords = ["deepmd"]
 
 [project.entry-points."lammps.plugins"]
-deepmd = "deepmd.lmp:get_op_dir"
+deepmd = "deepmd.tf.lmp:get_op_dir"
 
 [project.entry-points."dpgui"]
-"DeePMD-kit" = "deepmd_utils.utils.argcheck:gen_args"
+"DeePMD-kit" = "deepmd.utils.argcheck:gen_args"
+
+[project.entry-points."dpdata.plugins"]
+deepmd_driver = "deepmd.driver:DPDriver"
 
 [project.urls]
 Homepage = "https://github.com/deepmodeling/deepmd-kit"
 documentation = "https://docs.deepmodeling.com/projects/deepmd"
 repository = "https://github.com/deepmodeling/deepmd-kit"
 
+# Metadata below is dynamic. However, it still has static parts,
+# which can be read by the build backend.
+[tool.deepmd_build_backend.optional-dependencies]
+test = [
+    "dpdata>=0.2.7",
+    "ase",
+    "pytest",
+    "pytest-cov",
+    "pytest-sugar",
+    "dpgui",
+    "mendeleev",
+]
+docs = [
+    "sphinx>=3.1.1",
+    "sphinx_rtd_theme>=1.0.0rc1",
+    "sphinx_markdown_tables",
+    "myst-nb>=1.0.0rc0",
+    "myst-parser>=0.19.2",
+    "sphinx-design",
+    "breathe",
+    "exhale",
+    "numpydoc",
+    "ase",
+    "deepmodeling-sphinx>=0.1.0",
+    "dargs>=0.3.4",
+    "sphinx-argparse",
+    "pygments-lammps",
+    "sphinxcontrib-bibtex",
+]
+lmp = [
+    "lammps~=2023.8.2.3.0",
+]
+ipi = [
+    "i-PI",
+]
+gui = [
+    "dpgui",
+]
+cu11 = [
+    "nvidia-cuda-runtime-cu11",
+    "nvidia-cublas-cu11",
+    "nvidia-cufft-cu11",
+    "nvidia-curand-cu11",
+    "nvidia-cusolver-cu11",
+    "nvidia-cusparse-cu11",
+    "nvidia-cudnn-cu11<9",
+    "nvidia-cuda-nvcc-cu11",
+]
+cu12 = [
+    "nvidia-cuda-runtime-cu12",
+    "nvidia-cublas-cu12",
+    "nvidia-cufft-cu12",
+    "nvidia-curand-cu12",
+    "nvidia-cusolver-cu12",
+    "nvidia-cusparse-cu12",
+    "nvidia-cudnn-cu12<9",
+    "nvidia-cuda-nvcc-cu12",
+]
+torch = [
+    "torch>=2a",
+]
+
+[tool.deepmd_build_backend.scripts]
+dp = "deepmd.main:main"
+
 [tool.setuptools_scm]
 
 [tool.scikit-build]
@@ -82,7 +151,6 @@ sdist.exclude = [
 ]
 wheel.packages = [
     "deepmd",
-    "deepmd_utils",
 ]
 wheel.py-api = "py37"
 build-dir = "build/{wheel_tag}"
@@ -102,7 +170,7 @@ provider-path = "backend"
 provider = "scikit_build_core.metadata.fancy_pypi_readme"
 
 [[tool.scikit-build.generate]]
-path = "deepmd_utils/_version.py"
+path = "deepmd/_version.py"
 template = '''
 version = "${version}"
 '''
@@ -128,25 +196,22 @@ test-command = [
     "python -m deepmd -h",
     "dp -h",
     "dp_ipi",
-    "pytest {project}/source/tests/test_lammps.py"
+    "pytest {project}/source/tests/tf/test_lammps.py"
 ]
 test-extras = ["cpu", "test", "lmp", "ipi"]
 build = ["cp310-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
-# TODO: uncomment when CUDA 11 is deprecated
+# TODO: uncomment to use the latest image when CUDA 11 is deprecated
 # manylinux-x86_64-image = "manylinux_2_28"
 manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
 manylinux-aarch64-image = "manylinux_2_28"
 
 [tool.cibuildwheel.macos]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update2", DP_ENABLE_IPI="1" }
+environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1" }
 before-all = [
-    """if [[ "$CIBW_BUILD" != *macosx_arm64* ]]; then brew install mpich; fi""",
-]
-before-build = [
-    """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then python -m pip install "tensorflow-macos>=2.13.0rc0" --platform macosx_12_0_arm64 --no-deps --target=$RUNNER_TEMP/tensorflow; fi""",
+    """brew install mpich""",
 ]
-repair-wheel-command = """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then rm -rf $RUNNER_TEMP/tensorflow; fi && delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies"""
+repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies"""
 
 [tool.cibuildwheel.linux]
 repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}"
@@ -155,10 +220,13 @@ environment-pass = [
     "DP_VARIANT",
     "CUDA_VERSION",
     "DP_PKG_NAME",
+    "SETUPTOOLS_SCM_PRETEND_VERSION",
 ]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update2", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
+environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
 before-all = [
     """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
+    # https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/
+    """rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux""",
     """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
     "yum install -y mpich-devel",
 ]
@@ -223,7 +291,10 @@ ignore = "D413, D416, D203, D107, D213"
 profile = "black"
 force_grid_wrap = 1
 
-[tool.ruff]
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
     "E", # errors
     "F", # pyflakes
@@ -232,6 +303,9 @@ select = [
     "C4", # flake8-comprehensions
     "RUF", # ruff
     "NPY", # numpy
+    "TID251", # banned-api
+    "TID253", # banned-module-level-imports
+    "T20", # ban print
 ]
 
 ignore = [
@@ -251,8 +325,36 @@ ignore = [
 ]
 ignore-init-module-imports = true
 
-[tool.ruff.pydocstyle]
+exclude = [
+    "source/3rdparty/**",
+]
+
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
+[tool.ruff.lint.flake8-tidy-imports]
+banned-module-level-imports = [
+    "deepmd.tf",
+    "deepmd.pt",
+    "tensorflow",
+    "torch",
+]
+
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"torch.testing.assert_allclose".msg = "Use `torch.testing.assert_close()` instead, see https://github.com/pytorch/pytorch/issues/61844."
+
+[tool.ruff.lint.extend-per-file-ignores]
+# Also ignore `E402` in all `__init__.py` files.
+"deepmd/tf/**" = ["TID253"]
+"deepmd/pt/**" = ["TID253"]
+"source/tests/tf/**" = ["TID253"]
+"source/tests/pt/**" = ["TID253"]
+"source/ipi/tests/**" = ["TID253"]
+"source/lmp/tests/**" = ["TID253"]
+"**/*.ipynb" = ["T20"]  # printing in a nb file is expected
+
 [tool.pytest.ini_options]
 markers = "run"
+
+[tool.coverage.run]
+plugins = ["source.3rdparty.coverage_plugins.jit_plugin"]
diff --git a/source/3rdparty/README.md b/source/3rdparty/README.md
new file mode 100644
index 0000000000..ac9cfd4edc
--- /dev/null
+++ b/source/3rdparty/README.md
@@ -0,0 +1,7 @@
+# 3rd-party source codes
+
+| Name                      | Repository                         | Version | License |
+| ------------------------- | ---------------------------------- | ------- | ------- |
+| json                      | https://github.com/nlohmann/json   | 3.9.1   | MIT     |
+| Implib.so                 | https://github.com/yugr/Implib.so  | 0ddaa71 | MIT     |
+| coverage_plugins          | https://github.com/pytorch/pytorch | 2.2.0   | BSD-3   |
diff --git a/source/tests/data_dp_mask/nopbc b/source/3rdparty/coverage_plugins/__init__.py
similarity index 100%
rename from source/tests/data_dp_mask/nopbc
rename to source/3rdparty/coverage_plugins/__init__.py
diff --git a/source/3rdparty/coverage_plugins/jit_plugin.py b/source/3rdparty/coverage_plugins/jit_plugin.py
new file mode 100644
index 0000000000..e6d0786a32
--- /dev/null
+++ b/source/3rdparty/coverage_plugins/jit_plugin.py
@@ -0,0 +1,80 @@
+"""
+This coverage plug-in attempts to cover JIT'd functions and methods that were previously missed in code coverage. Any
+function and method that was passed through/decorated with torch.jit.script or torch.jit.script_method should now be
+marked covered when coverage is run with this plug-in.
+
+DISCLAIMER: note that this will mark the entire JIT'd function/method as covered without seeking proof that the
+compiled code has been executed. This means that even if the code chunk is merely compiled and not run, it will get
+marked as covered.
+"""
+
+from inspect import (
+    getsourcefile,
+    getsourcelines,
+    isclass,
+    iscode,
+    isfunction,
+    ismethod,
+    ismodule,
+)
+from time import time
+from typing import Any
+
+from coverage import CoverageData, CoveragePlugin  # type: ignore[import]
+
+# All coverage stats resulting from this plug-in will be in a separate .coverage file that should be merged later with
+# `coverage combine`. The convention seems to be .coverage.dotted.suffix based on the following link:
+# https://coverage.readthedocs.io/en/coverage-5.5/cmd.html#combining-data-files-coverage-combine
+cov_data = CoverageData(basename=f".coverage.jit.{time()}")
+
+
+def is_not_builtin_class(obj: Any) -> bool:
+    return isclass(obj) and not type(obj).__module__ == "builtins"
+
+
+class JitPlugin(CoveragePlugin):  # type: ignore[misc, no-any-unimported]
+    """
+    dynamic_context is an overridden function that gives us access to every frame run during the coverage process. We
+    look for when the function being run is `should_drop`, as all functions that get passed into `should_drop` will be
+    compiled and thus should be marked as covered.
+    """
+
+    def dynamic_context(self, frame: Any) -> None:
+        if frame.f_code.co_name == "should_drop":
+            obj = frame.f_locals["fn"]
+            # The many conditions in the if statement below are based on the accepted arguments to getsourcefile. Based
+            # on its documentation (https://docs.python.org/3/library/inspect.html#inspect.getsourcefile), the argument
+            # must be a module, class, method, function, traceback, frame, or code object AND it cannot be a built-in
+            # module, class, or function.
+            # Currently, we DO NOT include tracebacks or frames as they should not be JIT'd, and we have not checked for
+            # built-in modules or functions as those do not seem to be JIT'd either.
+            if (
+                is_not_builtin_class(obj)
+                or ismodule(obj)
+                or ismethod(obj)
+                or isfunction(obj)
+                or iscode(obj)
+            ):
+                filename = getsourcefile(obj)
+                # We don't want to report for filename = None
+                if filename:
+                    # TODO: Because torch.jit._IgnoreContextManager relies on Python's `exec` method
+                    # which doesn't generate source codelines, getsourcelines(obj) fails. For now,
+                    # we just ignore the exception until we figure out a better way to
+                    # implement torch.jit._IgnoreContextManager.
+                    try:
+                        sourcelines, starting_lineno = getsourcelines(obj)
+                    except OSError:
+                        pass
+                    else:
+                        line_data = {
+                            filename: range(
+                                starting_lineno, starting_lineno + len(sourcelines)
+                            )
+                        }
+                        cov_data.add_lines(line_data)
+        super().dynamic_context(frame)
+
+
+def coverage_init(reg: Any, options: Any) -> None:
+    reg.add_dynamic_context(JitPlugin())
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index c1c9b8e7fe..2aa7bb93e1 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -2,6 +2,8 @@
 cmake_minimum_required(VERSION 3.16)
 project(DeePMD)
 
+option(ENABLE_TENSORFLOW "Enable TensorFlow interface" OFF)
+option(ENABLE_PYTORCH "Enable PyTorch interface" OFF)
 option(BUILD_TESTING "Build test and enable converage" OFF)
 set(DEEPMD_C_ROOT
     ""
@@ -131,6 +133,7 @@ if(INSTALL_TENSORFLOW)
   set(USE_TF_PYTHON_LIBS TRUE)
 endif(INSTALL_TENSORFLOW)
 if(USE_TF_PYTHON_LIBS)
+  set(ENABLE_TENSORFLOW TRUE)
   if(NOT "$ENV{CIBUILDWHEEL}" STREQUAL "1")
     find_package(
       Python
@@ -141,11 +144,76 @@ if(USE_TF_PYTHON_LIBS)
     set(PYTHON_INCLUDE_DIRS ${PYTHON_INCLUDE_DIR})
   endif()
 endif(USE_TF_PYTHON_LIBS)
+if(TENSORFLOW_ROOT)
+  set(ENABLE_TENSORFLOW TRUE)
+endif()
 
 # find tensorflow, I need tf abi info
-if(NOT DEEPMD_C_ROOT)
+if(ENABLE_TENSORFLOW AND NOT DEEPMD_C_ROOT)
   find_package(tensorflow REQUIRED)
 endif()
+if(BUILD_CPP_IF
+   AND USE_PT_PYTHON_LIBS
+   AND NOT CMAKE_CROSSCOMPILING
+   AND NOT SKBUILD)
+  find_package(
+    Python
+    COMPONENTS Interpreter
+    REQUIRED)
+  execute_process(
+    COMMAND ${Python_EXECUTABLE} -c
+            "import torch;print(torch.utils.cmake_prefix_path)"
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+    OUTPUT_VARIABLE PYTORCH_CMAKE_PREFIX_PATH
+    RESULT_VARIABLE PYTORCH_CMAKE_PREFIX_PATH_RESULT_VAR
+    ERROR_VARIABLE PYTORCH_CMAKE_PREFIX_PATH_ERROR_VAR
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if(NOT ${PYTORCH_CMAKE_PREFIX_PATH_RESULT_VAR} EQUAL 0)
+    message(
+      FATAL_ERROR
+        "Cannot determine PyTorch CMake prefix path, error code: $PYTORCH_CMAKE_PREFIX_PATH_RESULT_VAR}, error message: ${PYTORCH_CMAKE_PREFIX_PATH_ERROR_VAR}"
+    )
+  endif()
+  list(APPEND CMAKE_PREFIX_PATH ${PYTORCH_CMAKE_PREFIX_PATH})
+endif()
+if(ENABLE_PYTORCH AND NOT DEEPMD_C_ROOT)
+  find_package(Torch REQUIRED)
+  string(REGEX MATCH "_GLIBCXX_USE_CXX11_ABI=([0-9]+)" CXXABI_PT_MATCH
+               "${TORCH_CXX_FLAGS}")
+  if(CXXABI_PT_MATCH)
+    message(STATUS "PyTorch CXX11 ABI: ${CMAKE_MATCH_1}")
+    if(DEFINED OP_CXX_ABI)
+      if(NOT ${CMAKE_MATCH_1} EQUAL ${OP_CXX_ABI})
+        message(
+          FATAL_ERROR
+            "PyTorch CXX11 ABI mismatch TensorFlow: ${CMAKE_MATCH_1} != ${OP_CXX_ABI}"
+        )
+      endif()
+    else()
+      set(OP_CXX_ABI ${CMAKE_MATCH_1})
+      add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
+    endif()
+  endif()
+  # get torch directory
+  set(PyTorch_LIBRARY_PATH ${TORCH_INCLUDE_DIRS}/../lib)
+  # trick: use TensorFlow_LIBRARY_PATH as general backend library paths
+  list(APPEND TensorFlow_LIBRARY_PATH ${PyTorch_LIBRARY_PATH})
+endif()
+# log enabled backends
+if(NOT DEEPMD_C_ROOT)
+  message(STATUS "Enabled backends:")
+  if(ENABLE_TENSORFLOW)
+    message(STATUS "- TensorFlow")
+  endif()
+  if(ENABLE_PYTORCH)
+    message(STATUS "- PyTorch")
+  endif()
+  if(NOT ENABLE_TENSORFLOW
+     AND NOT ENABLE_PYTORCH
+     AND NOT BUILD_PY_IF)
+    message(FATAL_ERROR "No backend is enabled.")
+  endif()
+endif()
 
 # find threads
 find_package(Threads)
@@ -231,12 +299,19 @@ if(DEEPMD_C_ROOT)
                IMPORTED_LOCATION "${deepmd_c}"
                INTERFACE_INCLUDE_DIRECTORIES "${DEEPMD_INCLUDE_C_DIR}/deepmd")
   # use variable for TF path to set deepmd_c path
+  set(TENSORFLOW_ROOT "${DEEPMD_C_ROOT}")
   set(TensorFlow_LIBRARY_PATH "${DEEPMD_C_ROOT}/lib")
   set(TENSORFLOW_INCLUDE_DIRS "${DEEPMD_C_ROOT}/include")
+  set(TORCH_LIBRARIES "${DEEPMD_C_ROOT}/lib/libtorch.so")
 endif()
 
 if(NOT DEEPMD_C_ROOT)
-  add_subdirectory(op/)
+  if(ENABLE_TENSORFLOW)
+    add_subdirectory(op/tf/)
+  endif()
+  if(ENABLE_PYTORCH)
+    add_subdirectory(op/pt/)
+  endif()
   add_subdirectory(lib/)
 endif()
 if(BUILD_PY_IF)
@@ -253,10 +328,7 @@ if(BUILD_CPP_IF)
   endif()
   if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.8)
     # add_subdirectory (md/)
-    if(ENABLE_IPI
-       OR NOT BUILD_PY_IF
-       AND NOT DEEPMD_C_ROOT)
-      # ipi has a dependency on libdeepmd
+    if(ENABLE_IPI OR NOT BUILD_PY_IF)
       add_subdirectory(ipi/)
     endif()
     if(NOT BUILD_PY_IF)
diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index d05f790bf9..911813e428 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -25,6 +25,13 @@ extern DP_Nlist* DP_NewNlist(int inum_,
                              int* numneigh_,
                              int** firstneigh_);
 
+/**
+ * @brief Delete a neighbor list.
+ *
+ * @param nl Neighbor list to delete.
+ */
+extern void DP_DeleteNlist(DP_Nlist* nl);
+
 /**
  * @brief Check if there is any exceptions throw.
  *
@@ -72,6 +79,13 @@ extern DP_DeepPot* DP_NewDeepPotWithParam2(const char* c_model,
                                            const char* c_file_content,
                                            const int size_file_content);
 
+/**
+ * @brief Delete a Deep Potential.
+ *
+ * @param dp Deep Potential to delete.
+ */
+extern void DP_DeleteDeepPot(DP_DeepPot* dp);
+
 /**
  * @brief Evaluate the energy, force and virial by using a DP. (double version)
  * @attention The number of frames is assumed to be 1.
@@ -491,6 +505,13 @@ extern DP_DeepPotModelDevi* DP_NewDeepPotModelDeviWithParam(
     const int n_file_contents,
     const int* size_file_contents);
 
+/**
+ * @brief Delete a Deep Potential Model Deviation.
+ *
+ * @param dp Deep Potential to delete.
+ */
+extern void DP_DeleteDeepPotModelDevi(DP_DeepPotModelDevi* dp);
+
 /**
  * @brief Evaluate the energy, force and virial by using a DP model deviation
  *with neighbor list. (double version)
@@ -792,6 +813,13 @@ extern DP_DeepTensor* DP_NewDeepTensorWithParam(const char* c_model,
                                                 const int gpu_rank,
                                                 const char* c_name_scope);
 
+/**
+ * @brief Delete a Deep Tensor.
+ *
+ * @param dp Deep Tensor to delete.
+ */
+extern void DP_DeleteDeepTensor(DP_DeepTensor* dt);
+
 /**
  * @brief Evaluate the tensor by using a DP. (double version)
  * @param[in] dt The Deep Tensor to use.
@@ -1094,25 +1122,32 @@ extern DP_DipoleChargeModifier* DP_NewDipoleChargeModifier(const char* c_model);
 extern DP_DipoleChargeModifier* DP_NewDipoleChargeModifierWithParam(
     const char* c_model, const int gpu_rank, const char* c_name_scope);
 
+/**
+ * @brief Delete a Dipole Charge Modifier.
+ *
+ * @param dp Dipole Charge Modifier to delete.
+ */
+extern void DP_DeleteDipoleChargeModifier(DP_DipoleChargeModifier* dcm);
+
 /**
  * @brief Evaluate the force and virial correction by using a dipole charge
  *modifier with the neighbor list. (double version)
  * @param[in] dcm The dipole charge modifier to use.
  * @param[in] natoms The number of atoms.
- * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ * @param[in] coord The coordinates of atoms. The array should be of size nall
  *x 3.
- * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] atype The atom types. The array should contain nall ints.
  * @param[in] cell The cell of the region. The array should be of size 9. Pass
  *NULL if pbc is not used.
  * @param[in] pairs The pairs of atoms. The list should contain npairs pairs of
  *ints.
  * @param[in] npairs The number of pairs.
  * @param[in] delef_ The electric field on each atom. The array should be of
- *size nframes x natoms x 3.
+ *size nframes x nloc x 3.
  * @param[in] nghost The number of ghost atoms.
  * @param[in] nlist The neighbor list.
  * @param[out] dfcorr_ Output force correction. The array should be of size
- *natoms x 3.
+ *nall x 3.
  * @param[out] dvcorr_ Output virial correction. The array should be of size 9.
  * @warning The output arrays should be allocated before calling this function.
  *Pass NULL if not required.
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 06a50ee3f0..16b8f08cad 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -522,6 +522,7 @@ struct InputNlist {
         nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_)) {
     DP_CHECK_OK(DP_NlistCheckOK, nl);
   };
+  ~InputNlist() { DP_DeleteNlist(nl); };
   /// @brief C API neighbor list.
   DP_Nlist *nl;
   /// @brief Number of core region atoms
@@ -556,6 +557,8 @@ void inline convert_nlist(InputNlist &to_nlist,
     to_nlist.numneigh[ii] = from_nlist[ii].size();
     to_nlist.firstneigh[ii] = &from_nlist[ii][0];
   }
+  // delete the original nl
+  DP_DeleteNlist(to_nlist.nl);
   to_nlist.nl = DP_NewNlist(to_nlist.inum, to_nlist.ilist, to_nlist.numneigh,
                             to_nlist.firstneigh);
 }
@@ -568,7 +571,7 @@ class DeepPot {
    * @brief DP constructor without initialization.
    **/
   DeepPot() : dp(nullptr){};
-  ~DeepPot(){};
+  ~DeepPot() { DP_DeleteDeepPot(dp); };
   /**
    * @brief DP constructor with initialization.
    * @param[in] model The name of the frozen model file.
@@ -579,7 +582,15 @@ class DeepPot {
           const int &gpu_rank = 0,
           const std::string &file_content = "")
       : dp(nullptr) {
-    init(model, gpu_rank, file_content);
+    try {
+      init(model, gpu_rank, file_content);
+    } catch (...) {
+      // Clean up and rethrow, as the destructor will not be called
+      if (dp) {
+        DP_DeleteDeepPot(dp);
+      }
+      throw;
+    }
   };
   /**
    * @brief Initialize the DP.
@@ -1100,13 +1111,21 @@ class DeepPotModelDevi {
    * @brief DP model deviation constructor without initialization.
    **/
   DeepPotModelDevi() : dp(nullptr){};
-  ~DeepPotModelDevi(){};
+  ~DeepPotModelDevi() { DP_DeleteDeepPotModelDevi(dp); };
   /**
    * @brief DP model deviation constructor with initialization.
    * @param[in] models The names of the frozen model file.
    **/
   DeepPotModelDevi(const std::vector<std::string> &models) : dp(nullptr) {
-    init(models);
+    try {
+      init(models);
+    } catch (...) {
+      // Clean up and rethrow, as the destructor will not be called
+      if (dp) {
+        DP_DeleteDeepPotModelDevi(dp);
+      }
+      throw;
+    }
   };
   /**
    * @brief Initialize the DP model deviation.
@@ -1523,7 +1542,7 @@ class DeepTensor {
    * @brief Deep Tensor constructor without initialization.
    **/
   DeepTensor() : dt(nullptr){};
-  ~DeepTensor(){};
+  ~DeepTensor() { DP_DeleteDeepTensor(dt); };
   /**
    * @brief DeepTensor constructor with initialization.
    * @param[in] model The name of the frozen model file.
@@ -1532,7 +1551,15 @@ class DeepTensor {
              const int &gpu_rank = 0,
              const std::string &name_scope = "")
       : dt(nullptr) {
-    init(model, gpu_rank, name_scope);
+    try {
+      init(model, gpu_rank, name_scope);
+    } catch (...) {
+      // Clean up and rethrow, as the destructor will not be called
+      if (dt) {
+        DP_DeleteDeepTensor(dt);
+      }
+      throw;
+    }
   };
   /**
    * @brief Initialize the DeepTensor.
@@ -1891,7 +1918,7 @@ class DipoleChargeModifier {
    * @brief DipoleChargeModifier constructor without initialization.
    **/
   DipoleChargeModifier() : dcm(nullptr){};
-  ~DipoleChargeModifier(){};
+  ~DipoleChargeModifier() { DP_DeleteDipoleChargeModifier(dcm); };
   /**
    * @brief DipoleChargeModifier constructor with initialization.
    * @param[in] model The name of the frozen model file.
@@ -1902,7 +1929,15 @@ class DipoleChargeModifier {
                        const int &gpu_rank = 0,
                        const std::string &name_scope = "")
       : dcm(nullptr) {
-    init(model, gpu_rank, name_scope);
+    try {
+      init(model, gpu_rank, name_scope);
+    } catch (...) {
+      // Clean up and rethrow, as the destructor will not be called
+      if (dcm) {
+        DP_DeleteDipoleChargeModifier(dcm);
+      }
+      throw;
+    }
   };
   /**
    * @brief Initialize the DipoleChargeModifier.
@@ -1930,13 +1965,13 @@ class DipoleChargeModifier {
    * @param[out] dfcorr_ The force correction on each atom.
    * @param[out] dvcorr_ The virial correction.
    * @param[in] dcoord_ The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   *nall x 3.
+   * @param[in] datype_ The atom types. The list should contain nall ints.
    * @param[in] dbox The cell of the region. The array should be of size 9.
    * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
    *of ints.
    * @param[in] delef_ The electric field on each atom. The array should be of
-   *size natoms x 3.
+   *size nghost x 3.
    * @param[in] nghost The number of ghost atoms.
    * @param[in] lmp_list The neighbor list.
    **/
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index bc6178702f..79dc486e0d 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -25,6 +25,8 @@ DP_Nlist* DP_NewNlist(int inum_,
             DP_Nlist* new_nl = new DP_Nlist(nl); return new_nl;)
 }
 
+void DP_DeleteNlist(DP_Nlist* nl) { delete nl; }
+
 DP_DeepPot::DP_DeepPot() {}
 DP_DeepPot::DP_DeepPot(deepmd::DeepPot& dp) : dp(dp) {
   dfparam = dp.dim_fparam();
@@ -61,6 +63,8 @@ DP_DeepPot* DP_NewDeepPotWithParam2(const char* c_model,
             DP_DeepPot* new_dp = new DP_DeepPot(dp); return new_dp;)
 }
 
+void DP_DeleteDeepPot(DP_DeepPot* dp) { delete dp; }
+
 DP_DeepPotModelDevi::DP_DeepPotModelDevi() {}
 DP_DeepPotModelDevi::DP_DeepPotModelDevi(deepmd::DeepPotModelDevi& dp)
     : dp(dp) {
@@ -97,6 +101,8 @@ DP_DeepPotModelDevi* DP_NewDeepPotModelDeviWithParam(
             return new_dp;)
 }
 
+void DP_DeleteDeepPotModelDevi(DP_DeepPotModelDevi* dp) { delete dp; }
+
 DP_DeepTensor::DP_DeepTensor() {}
 DP_DeepTensor::DP_DeepTensor(deepmd::DeepTensor& dt) : dt(dt) {}
 
@@ -115,6 +121,8 @@ DP_DeepTensor* DP_NewDeepTensorWithParam(const char* c_model,
             DP_DeepTensor* new_dt = new DP_DeepTensor(dt); return new_dt;)
 }
 
+void DP_DeleteDeepTensor(DP_DeepTensor* dt) { delete dt; }
+
 DP_DipoleChargeModifier::DP_DipoleChargeModifier() {}
 DP_DipoleChargeModifier::DP_DipoleChargeModifier(
     deepmd::DipoleChargeModifier& dcm)
@@ -137,6 +145,8 @@ DP_DipoleChargeModifier* DP_NewDipoleChargeModifierWithParam(
             return new_dcm;)
 }
 
+void DP_DeleteDipoleChargeModifier(DP_DipoleChargeModifier* dcm) { delete dcm; }
+
 }  // extern "C"
 
 template <typename VALUETYPE>
@@ -775,7 +785,7 @@ inline void DP_DipoleChargeModifierComputeNList_variant(
   for (int i = 0; i < npairs; i++) {
     pairs_.push_back(std::make_pair(pairs[i * 2], pairs[i * 2 + 1]));
   }
-  std::vector<VALUETYPE> delef_(delef, delef + natoms * 3);
+  std::vector<VALUETYPE> delef_(delef, delef + (natoms - nghost) * 3);
   std::vector<VALUETYPE> df, dv;
 
   DP_REQUIRES_OK(dcm, dcm->dcm.compute(df, dv, coord_, atype_, cell_, pairs_,
diff --git a/source/api_c/tests/test_deeppot_a.cc b/source/api_c/tests/test_deeppot_a.cc
index 63f53e16e9..b4a9a81f92 100644
--- a/source/api_c/tests/test_deeppot_a.cc
+++ b/source/api_c/tests/test_deeppot_a.cc
@@ -86,7 +86,10 @@ class TestInferDeepPotA : public ::testing::Test {
     }
   };
 
-  void TearDown() override { remove("deeppot.pb"); };
+  void TearDown() override {
+    remove("deeppot.pb");
+    DP_DeleteDeepPot(dp);
+  };
 };
 
 TEST_F(TestInferDeepPotA, double_infer) {
@@ -119,6 +122,12 @@ TEST_F(TestInferDeepPotA, double_infer) {
   for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-10);
   }
+
+  delete ener_;
+  delete[] force_;
+  delete[] virial_;
+  delete[] atomic_ener_;
+  delete[] atomic_virial_;
 }
 
 TEST_F(TestInferDeepPotA, float_infer) {
@@ -151,6 +160,11 @@ TEST_F(TestInferDeepPotA, float_infer) {
   for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-6);
   }
+  delete ener_;
+  delete[] force_;
+  delete[] virial_;
+  delete[] atomic_ener_;
+  delete[] atomic_virial_;
 }
 
 TEST_F(TestInferDeepPotA, cutoff) {
@@ -253,7 +267,11 @@ class TestInferDeepPotANoPBC : public ::testing::Test {
     }
   };
 
-  void TearDown() override { remove("deeppot.pb"); };
+  void TearDown() override {
+    remove("deeppot.pb");
+
+    DP_DeleteDeepPot(dp);
+  };
 };
 
 TEST_F(TestInferDeepPotANoPBC, double_infer) {
@@ -286,6 +304,11 @@ TEST_F(TestInferDeepPotANoPBC, double_infer) {
   for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-10);
   }
+  delete ener_;
+  delete[] force_;
+  delete[] virial_;
+  delete[] atomic_ener_;
+  delete[] atomic_virial_;
 }
 
 TEST_F(TestInferDeepPotANoPBC, float_infer) {
@@ -318,4 +341,9 @@ TEST_F(TestInferDeepPotANoPBC, float_infer) {
   for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-6);
   }
+  delete ener_;
+  delete[] force_;
+  delete[] virial_;
+  delete[] atomic_ener_;
+  delete[] atomic_virial_;
 }
diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt
index 2f296e3dfd..10a1dafca2 100644
--- a/source/api_cc/CMakeLists.txt
+++ b/source/api_cc/CMakeLists.txt
@@ -11,8 +11,16 @@ add_library(${libname} SHARED ${LIB_SRC})
 
 # link: libdeepmd libdeepmd_op libtensorflow_cc libtensorflow_framework
 target_link_libraries(${libname} PUBLIC ${LIB_DEEPMD})
-target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_cc
-                                         TensorFlow::tensorflow_framework)
+if(ENABLE_TENSORFLOW)
+  target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_cc
+                                           TensorFlow::tensorflow_framework)
+  target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW)
+endif()
+if(ENABLE_PYTORCH)
+  target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}")
+  target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH)
+endif()
+
 target_include_directories(
   ${libname}
   PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@@ -25,8 +33,10 @@ if(Protobuf_LIBRARY)
 endif()
 
 set_target_properties(
-  ${libname} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-                        BUILD_RPATH "$ORIGIN/../op")
+  ${libname}
+  PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
+             INSTALL_RPATH_USE_LINK_PATH TRUE
+             BUILD_RPATH "$ORIGIN/../op/tf;$ORIGIN/../op/pt")
 target_compile_definitions(${libname} PRIVATE TF_PRIVATE)
 if(CMAKE_TESTING_ENABLED)
   target_link_libraries(${libname} PRIVATE coverage_config)
@@ -55,3 +65,14 @@ ${CMAKE_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${LOW_PREC_V
     add_subdirectory(tests)
   endif()
 endif(BUILD_PY_IF)
+
+if(BUILD_TESTING)
+  # A compilation test to make sure api_cc can compile without any backend
+  add_library(deepmd_cc_test_no_backend SHARED ${LIB_SRC})
+  target_link_libraries(deepmd_cc_test_no_backend PUBLIC ${LIB_DEEPMD})
+  target_include_directories(
+    deepmd_cc_test_no_backend
+    PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+           $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+           $<INSTALL_INTERFACE:include>)
+endif()
diff --git a/source/api_cc/include/DataModifier.h b/source/api_cc/include/DataModifier.h
index 1e611a3930..0f46b5e0f8 100644
--- a/source/api_cc/include/DataModifier.h
+++ b/source/api_cc/include/DataModifier.h
@@ -84,7 +84,7 @@ class DipoleChargeModifierBase {
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  virtual std::vector<int> sel_types() const = 0;
+  virtual const std::vector<int>& sel_types() const = 0;
 };
 
 /**
@@ -127,13 +127,13 @@ class DipoleChargeModifier {
    * @param[out] dfcorr_ The force correction on each atom.
    * @param[out] dvcorr_ The virial correction.
    * @param[in] dcoord_ The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   *nall x 3.
+   * @param[in] datype_ The atom types. The list should contain nall ints.
    * @param[in] dbox The cell of the region. The array should be of size 9.
    * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
    *of ints.
    * @param[in] delef_ The electric field on each atom. The array should be of
-   *size natoms x 3.
+   *size nloc x 3.
    * @param[in] nghost The number of ghost atoms.
    * @param[in] lmp_list The neighbor list.
    **/
@@ -161,7 +161,7 @@ class DipoleChargeModifier {
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  std::vector<int> sel_types() const;
+  const std::vector<int>& sel_types() const;
 
  private:
   bool inited;
diff --git a/source/api_cc/include/DataModifierTF.h b/source/api_cc/include/DataModifierTF.h
index 2ca3729525..b2f610db3c 100644
--- a/source/api_cc/include/DataModifierTF.h
+++ b/source/api_cc/include/DataModifierTF.h
@@ -3,6 +3,7 @@
 
 #include "DataModifier.h"
 #include "common.h"
+#include "commonTF.h"
 
 namespace deepmd {
 /**
@@ -41,13 +42,13 @@ class DipoleChargeModifierTF : public DipoleChargeModifierBase {
    * @param[out] dfcorr_ The force correction on each atom.
    * @param[out] dvcorr_ The virial correction.
    * @param[in] dcoord_ The coordinates of atoms. The array should be of size
-   *natoms x 3.
-   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   *nall x 3.
+   * @param[in] datype_ The atom types. The list should contain nall ints.
    * @param[in] dbox The cell of the region. The array should be of size 9.
    * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
    *of ints.
    * @param[in] delef_ The electric field on each atom. The array should be of
-   *size natoms x 3.
+   *size nloc x 3.
    * @param[in] nghost The number of ghost atoms.
    * @param[in] lmp_list The neighbor list.
    **/
@@ -83,7 +84,7 @@ class DipoleChargeModifierTF : public DipoleChargeModifierBase {
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  std::vector<int> sel_types() const {
+  const std::vector<int>& sel_types() const {
     assert(inited);
     return sel_type;
   };
diff --git a/source/api_cc/include/DeepPotPT.h b/source/api_cc/include/DeepPotPT.h
new file mode 100644
index 0000000000..a7fc910b46
--- /dev/null
+++ b/source/api_cc/include/DeepPotPT.h
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#pragma once
+
+#include <torch/script.h>
+#include <torch/torch.h>
+
+#include "DeepPot.h"
+
+namespace deepmd {
+/**
+ * @brief PyTorch implementation for Deep Potential.
+ **/
+class DeepPotPT : public DeepPotBase {
+ public:
+  /**
+   * @brief DP constructor without initialization.
+   **/
+  DeepPotPT();
+  ~DeepPotPT();
+  /**
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  DeepPotPT(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+  /**
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
+
+ private:
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute(ENERGYVTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(
+      ENERGYVTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+  /**
+   * @brief Evaluate the energy, force, and virial with the mixed type
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] nframes The number of frames.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The array should be of size nframes x
+   *natoms.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE, typename ENERGYVTYPE>
+  void compute_mixed_type(
+      ENERGYVTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      std::vector<VALUETYPE>& atom_energy,
+      std::vector<VALUETYPE>& atom_virial,
+      const int& nframes,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
+
+ public:
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
+  /**
+   * @brief Get the number of types with spin.
+   * @return The number of types with spin.
+   **/
+  int numb_types_spin() const {
+    assert(inited);
+    return ntypes_spin;
+  };
+  /**
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  int dim_fparam() const {
+    assert(inited);
+    return dfparam;
+  };
+  /**
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  int dim_aparam() const {
+    assert(inited);
+    return daparam;
+  };
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string& type_map);
+
+  /**
+   * @brief Get whether the atom dimension of aparam is nall instead of fparam.
+   * @param[out] aparam_nall whether the atom dimension of aparam is nall
+   *instead of fparam.
+   **/
+  bool is_aparam_nall() const {
+    assert(inited);
+    return aparam_nall;
+  };
+
+  // forward to template class
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew(std::vector<double>& ener,
+                std::vector<double>& force,
+                std::vector<double>& virial,
+                std::vector<double>& atom_energy,
+                std::vector<double>& atom_virial,
+                const std::vector<double>& coord,
+                const std::vector<int>& atype,
+                const std::vector<double>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<double>& fparam = std::vector<double>(),
+                const std::vector<double>& aparam = std::vector<double>());
+  void computew(std::vector<double>& ener,
+                std::vector<float>& force,
+                std::vector<float>& virial,
+                std::vector<float>& atom_energy,
+                std::vector<float>& atom_virial,
+                const std::vector<float>& coord,
+                const std::vector<int>& atype,
+                const std::vector<float>& box,
+                const int nghost,
+                const InputNlist& inlist,
+                const int& ago,
+                const std::vector<float>& fparam = std::vector<float>(),
+                const std::vector<float>& aparam = std::vector<float>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<double>& force,
+      std::vector<double>& virial,
+      std::vector<double>& atom_energy,
+      std::vector<double>& atom_virial,
+      const int& nframes,
+      const std::vector<double>& coord,
+      const std::vector<int>& atype,
+      const std::vector<double>& box,
+      const std::vector<double>& fparam = std::vector<double>(),
+      const std::vector<double>& aparam = std::vector<double>());
+  void computew_mixed_type(
+      std::vector<double>& ener,
+      std::vector<float>& force,
+      std::vector<float>& virial,
+      std::vector<float>& atom_energy,
+      std::vector<float>& atom_virial,
+      const int& nframes,
+      const std::vector<float>& coord,
+      const std::vector<int>& atype,
+      const std::vector<float>& box,
+      const std::vector<float>& fparam = std::vector<float>(),
+      const std::vector<float>& aparam = std::vector<float>());
+
+ private:
+  int num_intra_nthreads, num_inter_nthreads;
+  bool inited;
+  int ntypes;
+  int ntypes_spin;
+  int dfparam;
+  int daparam;
+  bool aparam_nall;
+  // copy neighbor list info from host
+  torch::jit::script::Module module;
+  double rcut;
+  NeighborListData nlist_data;
+  int max_num_neighbors;
+  int gpu_id;
+  bool gpu_enabled;
+  at::Tensor firstneigh_tensor;
+};
+
+}  // namespace deepmd
diff --git a/source/api_cc/include/DeepPotTF.h b/source/api_cc/include/DeepPotTF.h
index 0580c61da5..699b0ff7fe 100644
--- a/source/api_cc/include/DeepPotTF.h
+++ b/source/api_cc/include/DeepPotTF.h
@@ -3,6 +3,7 @@
 
 #include "DeepPot.h"
 #include "common.h"
+#include "commonTF.h"
 #include "neighbor_list.h"
 
 namespace deepmd {
diff --git a/source/api_cc/include/DeepTensorTF.h b/source/api_cc/include/DeepTensorTF.h
index 3c724dce88..3ca316a29f 100644
--- a/source/api_cc/include/DeepTensorTF.h
+++ b/source/api_cc/include/DeepTensorTF.h
@@ -3,6 +3,7 @@
 
 #include "DeepTensor.h"
 #include "common.h"
+#include "commonTF.h"
 #include "neighbor_list.h"
 
 namespace deepmd {
diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h
index 7982c4f89d..2010780a6c 100644
--- a/source/api_cc/include/common.h
+++ b/source/api_cc/include/common.h
@@ -10,16 +10,9 @@
 #include "neighbor_list.h"
 #include "version.h"
 
-#ifdef TF_PRIVATE
-#include "tf_private.h"
-#else
-#include "tf_public.h"
-#endif
-
 namespace deepmd {
 
 typedef double ENERGYTYPE;
-// TODO: currently we only implement TF; reserve for future use
 enum DPBackend { TensorFlow, PyTorch, Paddle, Unknown };
 
 struct NeighborListData {
@@ -38,6 +31,7 @@ struct NeighborListData {
   void shuffle(const deepmd::AtomMap& map);
   void shuffle_exclude_empty(const std::vector<int>& fwd_map);
   void make_inlist(InputNlist& inlist);
+  void padding();
 };
 
 /**
@@ -150,9 +144,9 @@ void select_map_inv(typename std::vector<VT>::iterator out,
  * @brief Get the number of threads from the environment variable.
  * @details A warning will be thrown if environmental variables are not set.
  * @param[out] num_intra_nthreads The number of intra threads. Read from
- *TF_INTRA_OP_PARALLELISM_THREADS.
+ *DP_INTRA_OP_PARALLELISM_THREADS.
  * @param[out] num_inter_nthreads The number of inter threads. Read from
- *TF_INTER_OP_PARALLELISM_THREADS.
+ *DP_INTER_OP_PARALLELISM_THREADS.
  **/
 void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads);
 
@@ -175,143 +169,8 @@ struct tf_exception : public deepmd::deepmd_exception {
       : deepmd::deepmd_exception(std::string("TensorFlow Error: ") + msg){};
 };
 
-/**
- * @brief Check TensorFlow status. Exit if not OK.
- * @param[in] status TensorFlow status.
- **/
-void check_status(const tensorflow::Status& status);
-
 std::string name_prefix(const std::string& name_scope);
 
-/**
- * @brief Get the value of a tensor.
- * @param[in] session TensorFlow session.
- * @param[in] name The name of the tensor.
- * @param[in] scope The scope of the tensor.
- * @return The value of the tensor.
- **/
-template <typename VT>
-VT session_get_scalar(tensorflow::Session* session,
-                      const std::string name,
-                      const std::string scope = "");
-
-/**
- * @brief Get the vector of a tensor.
- * @param[out] o_vec The output vector.
- * @param[in] session TensorFlow session.
- * @param[in] name The name of the tensor.
- * @param[in] scope The scope of the tensor.
- **/
-template <typename VT>
-void session_get_vector(std::vector<VT>& o_vec,
-                        tensorflow::Session* session,
-                        const std::string name_,
-                        const std::string scope = "");
-
-/**
- * @brief Get the type of a tensor.
- * @param[in] session TensorFlow session.
- * @param[in] name The name of the tensor.
- * @param[in] scope The scope of the tensor.
- * @return The type of the tensor as int.
- **/
-int session_get_dtype(tensorflow::Session* session,
-                      const std::string name,
-                      const std::string scope = "");
-
-/**
- * @brief Get input tensors.
- * @param[out] input_tensors Input tensors.
- * @param[in] dcoord_ Coordinates of atoms.
- * @param[in] ntypes Number of atom types.
- * @param[in] datype_ Atom types.
- * @param[in] dbox Box matrix.
- * @param[in] cell_size Cell size.
- * @param[in] fparam_ Frame parameters.
- * @param[in] aparam_ Atom parameters.
- * @param[in] atommap Atom map.
- * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
- * nall.
- */
-template <typename MODELTYPE, typename VALUETYPE>
-int session_input_tensors(
-    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
-    const std::vector<VALUETYPE>& dcoord_,
-    const int& ntypes,
-    const std::vector<int>& datype_,
-    const std::vector<VALUETYPE>& dbox,
-    const double& cell_size,
-    const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
-    const deepmd::AtomMap& atommap,
-    const std::string scope = "",
-    const bool aparam_nall = false);
-
-/**
- * @brief Get input tensors.
- * @param[out] input_tensors Input tensors.
- * @param[in] dcoord_ Coordinates of atoms.
- * @param[in] ntypes Number of atom types.
- * @param[in] datype_ Atom types.
- * @param[in] dlist Neighbor list.
- * @param[in] fparam_ Frame parameters.
- * @param[in] aparam_ Atom parameters.
- * @param[in] atommap Atom map.
- * @param[in] nghost Number of ghost atoms.
- * @param[in] ago Update the internal neighbour list if ago is 0.
- * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
- * nall.
- */
-template <typename MODELTYPE, typename VALUETYPE>
-int session_input_tensors(
-    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
-    const std::vector<VALUETYPE>& dcoord_,
-    const int& ntypes,
-    const std::vector<int>& datype_,
-    const std::vector<VALUETYPE>& dbox,
-    InputNlist& dlist,
-    const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
-    const deepmd::AtomMap& atommap,
-    const int nghost,
-    const int ago,
-    const std::string scope = "",
-    const bool aparam_nall = false);
-
-/**
- * @brief Get input tensors for mixed type.
- * @param[out] input_tensors Input tensors.
- * @param[in] nframes Number of frames.
- * @param[in] dcoord_ Coordinates of atoms.
- * @param[in] ntypes Number of atom types.
- * @param[in] datype_ Atom types.
- * @param[in] dlist Neighbor list.
- * @param[in] fparam_ Frame parameters.
- * @param[in] aparam_ Atom parameters.
- * @param[in] atommap Atom map.
- * @param[in] nghost Number of ghost atoms.
- * @param[in] ago Update the internal neighbour list if ago is 0.
- * @param[in] scope The scope of the tensors.
- * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
- * nall.
- */
-template <typename MODELTYPE, typename VALUETYPE>
-int session_input_tensors_mixed_type(
-    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
-    const int& nframes,
-    const std::vector<VALUETYPE>& dcoord_,
-    const int& ntypes,
-    const std::vector<int>& datype_,
-    const std::vector<VALUETYPE>& dbox,
-    const double& cell_size,
-    const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
-    const deepmd::AtomMap& atommap,
-    const std::string scope = "",
-    const bool aparam_nall = false);
-
 /**
  * @brief Read model file to a string.
  * @param[in] model Path to the model.
diff --git a/source/api_cc/include/commonTF.h b/source/api_cc/include/commonTF.h
new file mode 100644
index 0000000000..0c14597e30
--- /dev/null
+++ b/source/api_cc/include/commonTF.h
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include <string>
+#include <vector>
+
+#ifdef TF_PRIVATE
+#include "tf_private.h"
+#else
+#include "tf_public.h"
+#endif
+
+namespace deepmd {
+/**
+ * @brief Check TensorFlow status. Exit if not OK.
+ * @param[in] status TensorFlow status.
+ **/
+void check_status(const tensorflow::Status& status);
+
+/**
+ * @brief Get the value of a tensor.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ * @return The value of the tensor.
+ **/
+template <typename VT>
+VT session_get_scalar(tensorflow::Session* session,
+                      const std::string name,
+                      const std::string scope = "");
+
+/**
+ * @brief Get the vector of a tensor.
+ * @param[out] o_vec The output vector.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ **/
+template <typename VT>
+void session_get_vector(std::vector<VT>& o_vec,
+                        tensorflow::Session* session,
+                        const std::string name_,
+                        const std::string scope = "");
+
+/**
+ * @brief Get the type of a tensor.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ * @return The type of the tensor as int.
+ **/
+int session_get_dtype(tensorflow::Session* session,
+                      const std::string name,
+                      const std::string scope = "");
+
+/**
+ * @brief Get input tensors.
+ * @param[out] input_tensors Input tensors.
+ * @param[in] dcoord_ Coordinates of atoms.
+ * @param[in] ntypes Number of atom types.
+ * @param[in] datype_ Atom types.
+ * @param[in] dbox Box matrix.
+ * @param[in] cell_size Cell size.
+ * @param[in] fparam_ Frame parameters.
+ * @param[in] aparam_ Atom parameters.
+ * @param[in] atommap Atom map.
+ * @param[in] scope The scope of the tensors.
+ * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * nall.
+ */
+template <typename MODELTYPE, typename VALUETYPE>
+int session_input_tensors(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const double& cell_size,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope = "",
+    const bool aparam_nall = false);
+
+/**
+ * @brief Get input tensors.
+ * @param[out] input_tensors Input tensors.
+ * @param[in] dcoord_ Coordinates of atoms.
+ * @param[in] ntypes Number of atom types.
+ * @param[in] datype_ Atom types.
+ * @param[in] dlist Neighbor list.
+ * @param[in] fparam_ Frame parameters.
+ * @param[in] aparam_ Atom parameters.
+ * @param[in] atommap Atom map.
+ * @param[in] nghost Number of ghost atoms.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[in] scope The scope of the tensors.
+ * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * nall.
+ */
+template <typename MODELTYPE, typename VALUETYPE>
+int session_input_tensors(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    InputNlist& dlist,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope = "",
+    const bool aparam_nall = false);
+
+/**
+ * @brief Get input tensors for mixed type.
+ * @param[out] input_tensors Input tensors.
+ * @param[in] nframes Number of frames.
+ * @param[in] dcoord_ Coordinates of atoms.
+ * @param[in] ntypes Number of atom types.
+ * @param[in] datype_ Atom types.
+ * @param[in] dlist Neighbor list.
+ * @param[in] fparam_ Frame parameters.
+ * @param[in] aparam_ Atom parameters.
+ * @param[in] atommap Atom map.
+ * @param[in] nghost Number of ghost atoms.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[in] scope The scope of the tensors.
+ * @param[in] aparam_nall Whether the atomic dimesion of atomic parameters is
+ * nall.
+ */
+template <typename MODELTYPE, typename VALUETYPE>
+int session_input_tensors_mixed_type(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const int& nframes,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const double& cell_size,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope = "",
+    const bool aparam_nall = false);
+
+}  // namespace deepmd
diff --git a/source/api_cc/include/version.h.in b/source/api_cc/include/version.h.in
index c6bf6cf491..26b0c1be48 100644
--- a/source/api_cc/include/version.h.in
+++ b/source/api_cc/include/version.h.in
@@ -9,4 +9,5 @@ const std::string global_git_date="@GIT_DATE@";
 const std::string global_git_branch="@GIT_BRANCH@";
 const std::string global_tf_include_dir="@TensorFlow_INCLUDE_DIRS@";
 const std::string global_tf_lib="@TensorFlow_LIBRARY@";
+const std::string global_pt_lib="@TORCH_LIBRARIES@";
 const std::string global_model_version="@MODEL_VERSION@";
diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index 954c969c13..bac2e13da5 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DataModifier.h"
 
+#ifdef BUILD_TENSORFLOW
 #include "DataModifierTF.h"
+#endif
 #include "common.h"
 
 using namespace deepmd;
@@ -29,9 +31,12 @@ void DipoleChargeModifier::init(const std::string& model,
   // TODO: To implement detect_backend
   DPBackend backend = deepmd::DPBackend::TensorFlow;
   if (deepmd::DPBackend::TensorFlow == backend) {
-    // TODO: throw errors if TF backend is not built, without mentioning TF
+#ifdef BUILD_TENSORFLOW
     dcm = std::make_shared<deepmd::DipoleChargeModifierTF>(model, gpu_rank,
                                                            name_scope_);
+#else
+    throw deepmd::deepmd_exception("TensorFlow backend is not built");
+#endif
   } else if (deepmd::DPBackend::PyTorch == backend) {
     throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
   } else if (deepmd::DPBackend::Paddle == backend) {
@@ -87,6 +92,6 @@ double DipoleChargeModifier::cutoff() const { return dcm->cutoff(); }
 
 int DipoleChargeModifier::numb_types() const { return dcm->numb_types(); }
 
-std::vector<int> DipoleChargeModifier::sel_types() const {
+const std::vector<int>& DipoleChargeModifier::sel_types() const {
   return dcm->sel_types();
 }
diff --git a/source/api_cc/src/DataModifierTF.cc b/source/api_cc/src/DataModifierTF.cc
index 219139cf89..324cb14098 100644
--- a/source/api_cc/src/DataModifierTF.cc
+++ b/source/api_cc/src/DataModifierTF.cc
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
+#ifdef BUILD_TENSORFLOW
 #include "DataModifierTF.h"
 
 #include "common.h"
@@ -361,3 +362,4 @@ void DipoleChargeModifierTF::computew(
   compute(dfcorr_, dvcorr_, dcoord_, datype_, dbox, pairs, delef_, nghost,
           lmp_list);
 }
+#endif  // BUILD_TENSORFLOW
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 083e9b091f..498f35f46b 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -1,13 +1,17 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
 #include "DeepPot.h"
 
-#include "common.h"
-// TODO: only include when TF backend is built
 #include <memory>
 #include <stdexcept>
 
 #include "AtomMap.h"
+#include "common.h"
+#ifdef BUILD_TENSORFLOW
 #include "DeepPotTF.h"
+#endif
+#ifdef BUILD_PYTORCH
+#include "DeepPotPT.h"
+#endif
 #include "device.h"
 
 using namespace deepmd;
@@ -32,13 +36,26 @@ void DeepPot::init(const std::string& model,
               << std::endl;
     return;
   }
-  // TODO: To implement detect_backend
-  DPBackend backend = deepmd::DPBackend::TensorFlow;
+  DPBackend backend;
+  if (model.length() >= 4 && model.substr(model.length() - 4) == ".pth") {
+    backend = deepmd::DPBackend::PyTorch;
+  } else if (model.length() >= 3 && model.substr(model.length() - 3) == ".pb") {
+    backend = deepmd::DPBackend::TensorFlow;
+  } else {
+    throw deepmd::deepmd_exception("Unsupported model file format");
+  }
   if (deepmd::DPBackend::TensorFlow == backend) {
-    // TODO: throw errors if TF backend is not built, without mentioning TF
+#ifdef BUILD_TENSORFLOW
     dp = std::make_shared<deepmd::DeepPotTF>(model, gpu_rank, file_content);
+#else
+    throw deepmd::deepmd_exception("TensorFlow backend is not built");
+#endif
   } else if (deepmd::DPBackend::PyTorch == backend) {
-    throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
+#ifdef BUILD_PYTORCH
+    dp = std::make_shared<deepmd::DeepPotPT>(model, gpu_rank, file_content);
+#else
+    throw deepmd::deepmd_exception("PyTorch backend is not built");
+#endif
   } else if (deepmd::DPBackend::Paddle == backend) {
     throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet");
   } else {
diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc
new file mode 100644
index 0000000000..b4631b5e46
--- /dev/null
+++ b/source/api_cc/src/DeepPotPT.cc
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#ifdef BUILD_PYTORCH
+#include "DeepPotPT.h"
+
+#include "common.h"
+#include "device.h"
+using namespace deepmd;
+torch::Tensor createNlistTensor(const std::vector<std::vector<int>>& data) {
+  std::vector<torch::Tensor> row_tensors;
+
+  for (const auto& row : data) {
+    torch::Tensor row_tensor = torch::tensor(row, torch::kInt32).unsqueeze(0);
+    row_tensors.push_back(row_tensor);
+  }
+
+  torch::Tensor tensor = torch::cat(row_tensors, 0).unsqueeze(0);
+  return tensor;
+}
+DeepPotPT::DeepPotPT() : inited(false) {}
+DeepPotPT::DeepPotPT(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content)
+    : inited(false) {
+  try {
+    init(model, gpu_rank, file_content);
+  } catch (...) {
+    // Clean up and rethrow, as the destructor will not be called
+    throw;
+  }
+}
+void DeepPotPT::init(const std::string& model,
+                     const int& gpu_rank,
+                     const std::string& file_content) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
+  }
+  deepmd::load_op_library();
+  int gpu_num = torch::cuda::device_count();
+  if (gpu_num > 0) {
+    gpu_id = gpu_rank % gpu_num;
+  } else {
+    gpu_id = 0;
+  }
+  torch::Device device(torch::kCUDA, gpu_id);
+  gpu_enabled = torch::cuda::is_available();
+  if (!gpu_enabled) {
+    device = torch::Device(torch::kCPU);
+    std::cout << "load model from: " << model << " to cpu " << std::endl;
+  } else {
+    std::cout << "load model from: " << model << " to gpu " << gpu_id
+              << std::endl;
+  }
+  module = torch::jit::load(model, device);
+
+  torch::jit::FusionStrategy strategy;
+  strategy = {{torch::jit::FusionBehavior::DYNAMIC, 10}};
+  torch::jit::setFusionStrategy(strategy);
+
+  get_env_nthreads(num_intra_nthreads,
+                   num_inter_nthreads);  // need to be fixed as
+                                         // DP_INTRA_OP_PARALLELISM_THREADS
+  if (num_inter_nthreads) {
+    try {
+      at::set_num_interop_threads(num_inter_nthreads);
+    } catch (...) {
+    }
+  }
+  if (num_intra_nthreads) {
+    try {
+      at::set_num_threads(num_intra_nthreads);
+    } catch (...) {
+    }
+  }
+
+  auto rcut_ = module.run_method("get_rcut").toDouble();
+  rcut = static_cast<double>(rcut_);
+  ntypes = module.run_method("get_ntypes").toInt();
+  ntypes_spin = 0;
+  dfparam = module.run_method("get_dim_fparam").toInt();
+  daparam = module.run_method("get_dim_aparam").toInt();
+  aparam_nall = module.run_method("is_aparam_nall").toBool();
+  inited = true;
+}
+DeepPotPT::~DeepPotPT() {}
+
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotPT::compute(ENERGYVTYPE& ener,
+                        std::vector<VALUETYPE>& force,
+                        std::vector<VALUETYPE>& virial,
+                        std::vector<VALUETYPE>& atom_energy,
+                        std::vector<VALUETYPE>& atom_virial,
+                        const std::vector<VALUETYPE>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<VALUETYPE>& box,
+                        const int nghost,
+                        const InputNlist& lmp_list,
+                        const int& ago,
+                        const std::vector<VALUETYPE>& fparam,
+                        const std::vector<VALUETYPE>& aparam) {
+  torch::Device device(torch::kCUDA, gpu_id);
+  if (!gpu_enabled) {
+    device = torch::Device(torch::kCPU);
+  }
+  int natoms = atype.size();
+  auto options = torch::TensorOptions().dtype(torch::kFloat64);
+  torch::ScalarType floatType = torch::kFloat64;
+  if (std::is_same_v<VALUETYPE, float>) {
+    options = torch::TensorOptions().dtype(torch::kFloat32);
+    floatType = torch::kFloat32;
+  }
+  auto int_options = torch::TensorOptions().dtype(torch::kInt64);
+  auto int32_options = torch::TensorOptions().dtype(torch::kInt32);
+  // select real atoms
+  std::vector<VALUETYPE> dcoord, dforce, aparam_, datom_energy, datom_virial;
+  std::vector<int> datype, fwd_map, bkw_map;
+  int nghost_real, nall_real, nloc_real;
+  int nall = natoms;
+  select_real_atoms_coord(dcoord, datype, aparam_, nghost_real, fwd_map,
+                          bkw_map, nall_real, nloc_real, coord, atype, aparam,
+                          nghost, ntypes, 1, daparam, nall, aparam_nall);
+  int nloc = nall_real - nghost_real;
+  int nframes = 1;
+  if (nloc == 0) {
+    // no backward map needed
+    ener.resize(nframes);
+    // dforce of size nall * 3
+    force.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
+    fill(force.begin(), force.end(), (VALUETYPE)0.0);
+    // dvirial of size 9
+    virial.resize(static_cast<size_t>(nframes) * 9);
+    fill(virial.begin(), virial.end(), (VALUETYPE)0.0);
+    // datom_energy_ of size nall
+    atom_energy.resize(static_cast<size_t>(nframes) * fwd_map.size());
+    fill(atom_energy.begin(), atom_energy.end(), (VALUETYPE)0.0);
+    // datom_virial_ of size nall * 9
+    atom_virial.resize(static_cast<size_t>(nframes) * fwd_map.size() * 9);
+    fill(atom_virial.begin(), atom_virial.end(), (VALUETYPE)0.0);
+    return;
+  }
+  std::vector<VALUETYPE> coord_wrapped = dcoord;
+  at::Tensor coord_wrapped_Tensor =
+      torch::from_blob(coord_wrapped.data(), {1, nall_real, 3}, options)
+          .to(device);
+  std::vector<int64_t> atype_64(datype.begin(), datype.end());
+  at::Tensor atype_Tensor =
+      torch::from_blob(atype_64.data(), {1, nall_real}, int_options).to(device);
+  if (ago == 0) {
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle_exclude_empty(fwd_map);
+    nlist_data.padding();
+  }
+  at::Tensor firstneigh = createNlistTensor(nlist_data.jlist);
+  firstneigh_tensor = firstneigh.to(torch::kInt64).to(device);
+  bool do_atom_virial_tensor = true;
+  c10::optional<torch::Tensor> optional_tensor;
+  c10::optional<torch::Tensor> fparam_tensor;
+  if (!fparam.empty()) {
+    fparam_tensor =
+        torch::from_blob(const_cast<VALUETYPE*>(fparam.data()),
+                         {1, static_cast<long int>(fparam.size())}, options)
+            .to(device);
+  }
+  c10::optional<torch::Tensor> aparam_tensor;
+  if (!aparam_.empty()) {
+    aparam_tensor = torch::from_blob(
+                        const_cast<VALUETYPE*>(aparam_.data()),
+                        {1, lmp_list.inum,
+                         static_cast<long int>(aparam_.size()) / lmp_list.inum},
+                        options)
+                        .to(device);
+  }
+  c10::Dict<c10::IValue, c10::IValue> outputs =
+      module
+          .run_method("forward_lower", coord_wrapped_Tensor, atype_Tensor,
+                      firstneigh_tensor, optional_tensor, fparam_tensor,
+                      aparam_tensor, do_atom_virial_tensor)
+          .toGenericDict();
+  c10::IValue energy_ = outputs.at("energy");
+  c10::IValue force_ = outputs.at("extended_force");
+  c10::IValue virial_ = outputs.at("virial");
+  c10::IValue atom_virial_ = outputs.at("extended_virial");
+  c10::IValue atom_energy_ = outputs.at("atom_energy");
+  torch::Tensor flat_energy_ = energy_.toTensor().view({-1});
+  torch::Tensor cpu_energy_ = flat_energy_.to(torch::kCPU);
+  ener.assign(cpu_energy_.data_ptr<ENERGYTYPE>(),
+              cpu_energy_.data_ptr<ENERGYTYPE>() + cpu_energy_.numel());
+  torch::Tensor flat_atom_energy_ =
+      atom_energy_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_atom_energy_ = flat_atom_energy_.to(torch::kCPU);
+  datom_energy.resize(nall_real,
+                      0.0);  // resize to nall to be consistenet with TF.
+  datom_energy.assign(
+      cpu_atom_energy_.data_ptr<VALUETYPE>(),
+      cpu_atom_energy_.data_ptr<VALUETYPE>() + cpu_atom_energy_.numel());
+  torch::Tensor flat_force_ = force_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_force_ = flat_force_.to(torch::kCPU);
+  dforce.assign(cpu_force_.data_ptr<VALUETYPE>(),
+                cpu_force_.data_ptr<VALUETYPE>() + cpu_force_.numel());
+  torch::Tensor flat_virial_ = virial_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_virial_ = flat_virial_.to(torch::kCPU);
+  virial.assign(cpu_virial_.data_ptr<VALUETYPE>(),
+                cpu_virial_.data_ptr<VALUETYPE>() + cpu_virial_.numel());
+  torch::Tensor flat_atom_virial_ =
+      atom_virial_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_atom_virial_ = flat_atom_virial_.to(torch::kCPU);
+  datom_virial.assign(
+      cpu_atom_virial_.data_ptr<VALUETYPE>(),
+      cpu_atom_virial_.data_ptr<VALUETYPE>() + cpu_atom_virial_.numel());
+  // bkw map
+  force.resize(static_cast<size_t>(nframes) * fwd_map.size() * 3);
+  atom_energy.resize(static_cast<size_t>(nframes) * fwd_map.size());
+  atom_virial.resize(static_cast<size_t>(nframes) * fwd_map.size() * 9);
+  select_map<VALUETYPE>(force, dforce, bkw_map, 3, nframes, fwd_map.size(),
+                        nall_real);
+  select_map<VALUETYPE>(atom_energy, datom_energy, bkw_map, 1, nframes,
+                        fwd_map.size(), nall_real);
+  select_map<VALUETYPE>(atom_virial, datom_virial, bkw_map, 9, nframes,
+                        fwd_map.size(), nall_real);
+}
+template void DeepPotPT::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<double>& force,
+    std::vector<double>& virial,
+    std::vector<double>& atom_energy,
+    std::vector<double>& atom_virial,
+    const std::vector<double>& coord,
+    const std::vector<int>& atype,
+    const std::vector<double>& box,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+template void DeepPotPT::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<float>& force,
+    std::vector<float>& virial,
+    std::vector<float>& atom_energy,
+    std::vector<float>& atom_virial,
+    const std::vector<float>& coord,
+    const std::vector<int>& atype,
+    const std::vector<float>& box,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+template <typename VALUETYPE, typename ENERGYVTYPE>
+void DeepPotPT::compute(ENERGYVTYPE& ener,
+                        std::vector<VALUETYPE>& force,
+                        std::vector<VALUETYPE>& virial,
+                        std::vector<VALUETYPE>& atom_energy,
+                        std::vector<VALUETYPE>& atom_virial,
+                        const std::vector<VALUETYPE>& coord,
+                        const std::vector<int>& atype,
+                        const std::vector<VALUETYPE>& box,
+                        const std::vector<VALUETYPE>& fparam,
+                        const std::vector<VALUETYPE>& aparam) {
+  torch::Device device(torch::kCUDA, gpu_id);
+  if (!gpu_enabled) {
+    device = torch::Device(torch::kCPU);
+  }
+  std::vector<VALUETYPE> coord_wrapped = coord;
+  int natoms = atype.size();
+  auto options = torch::TensorOptions().dtype(torch::kFloat64);
+  torch::ScalarType floatType = torch::kFloat64;
+  if (std::is_same_v<VALUETYPE, float>) {
+    options = torch::TensorOptions().dtype(torch::kFloat32);
+    floatType = torch::kFloat32;
+  }
+  auto int_options = torch::TensorOptions().dtype(torch::kInt64);
+  int nframes = 1;
+  if (natoms == 0) {
+    // no backward map needed
+    ener.resize(nframes);
+    // dforce of size nall * 3
+    force.resize(static_cast<size_t>(nframes) * natoms * 3);
+    fill(force.begin(), force.end(), (VALUETYPE)0.0);
+    // dvirial of size 9
+    virial.resize(static_cast<size_t>(nframes) * 9);
+    fill(virial.begin(), virial.end(), (VALUETYPE)0.0);
+    // datom_energy_ of size nall
+    atom_energy.resize(static_cast<size_t>(nframes) * natoms);
+    fill(atom_energy.begin(), atom_energy.end(), (VALUETYPE)0.0);
+    // datom_virial_ of size nall * 9
+    atom_virial.resize(static_cast<size_t>(nframes) * natoms * 9);
+    fill(atom_virial.begin(), atom_virial.end(), (VALUETYPE)0.0);
+    return;
+  }
+  std::vector<torch::jit::IValue> inputs;
+  at::Tensor coord_wrapped_Tensor =
+      torch::from_blob(coord_wrapped.data(), {1, natoms, 3}, options)
+          .to(device);
+  inputs.push_back(coord_wrapped_Tensor);
+  std::vector<int64_t> atype_64(atype.begin(), atype.end());
+  at::Tensor atype_Tensor =
+      torch::from_blob(atype_64.data(), {1, natoms}, int_options).to(device);
+  inputs.push_back(atype_Tensor);
+  c10::optional<torch::Tensor> box_Tensor;
+  if (!box.empty()) {
+    box_Tensor =
+        torch::from_blob(const_cast<VALUETYPE*>(box.data()), {1, 9}, options)
+            .to(device);
+  }
+  inputs.push_back(box_Tensor);
+  c10::optional<torch::Tensor> fparam_tensor;
+  if (!fparam.empty()) {
+    fparam_tensor =
+        torch::from_blob(const_cast<VALUETYPE*>(fparam.data()),
+                         {1, static_cast<long int>(fparam.size())}, options)
+            .to(device);
+  }
+  inputs.push_back(fparam_tensor);
+  c10::optional<torch::Tensor> aparam_tensor;
+  if (!aparam.empty()) {
+    aparam_tensor =
+        torch::from_blob(
+            const_cast<VALUETYPE*>(aparam.data()),
+            {1, natoms, static_cast<long int>(aparam.size()) / natoms}, options)
+            .to(device);
+  }
+  inputs.push_back(aparam_tensor);
+  bool do_atom_virial_tensor = true;
+  inputs.push_back(do_atom_virial_tensor);
+  c10::Dict<c10::IValue, c10::IValue> outputs =
+      module.forward(inputs).toGenericDict();
+  c10::IValue energy_ = outputs.at("energy");
+  c10::IValue force_ = outputs.at("force");
+  c10::IValue virial_ = outputs.at("virial");
+  c10::IValue atom_virial_ = outputs.at("atom_virial");
+  c10::IValue atom_energy_ = outputs.at("atom_energy");
+  torch::Tensor flat_energy_ = energy_.toTensor().view({-1});
+  torch::Tensor cpu_energy_ = flat_energy_.to(torch::kCPU);
+  ener.assign(cpu_energy_.data_ptr<ENERGYTYPE>(),
+              cpu_energy_.data_ptr<ENERGYTYPE>() + cpu_energy_.numel());
+  torch::Tensor flat_atom_energy_ =
+      atom_energy_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_atom_energy_ = flat_atom_energy_.to(torch::kCPU);
+  atom_energy.assign(
+      cpu_atom_energy_.data_ptr<VALUETYPE>(),
+      cpu_atom_energy_.data_ptr<VALUETYPE>() + cpu_atom_energy_.numel());
+  torch::Tensor flat_force_ = force_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_force_ = flat_force_.to(torch::kCPU);
+  force.assign(cpu_force_.data_ptr<VALUETYPE>(),
+               cpu_force_.data_ptr<VALUETYPE>() + cpu_force_.numel());
+  torch::Tensor flat_virial_ = virial_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_virial_ = flat_virial_.to(torch::kCPU);
+  virial.assign(cpu_virial_.data_ptr<VALUETYPE>(),
+                cpu_virial_.data_ptr<VALUETYPE>() + cpu_virial_.numel());
+  torch::Tensor flat_atom_virial_ =
+      atom_virial_.toTensor().view({-1}).to(floatType);
+  torch::Tensor cpu_atom_virial_ = flat_atom_virial_.to(torch::kCPU);
+  atom_virial.assign(
+      cpu_atom_virial_.data_ptr<VALUETYPE>(),
+      cpu_atom_virial_.data_ptr<VALUETYPE>() + cpu_atom_virial_.numel());
+}
+
+template void DeepPotPT::compute<double, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<double>& force,
+    std::vector<double>& virial,
+    std::vector<double>& atom_energy,
+    std::vector<double>& atom_virial,
+    const std::vector<double>& coord,
+    const std::vector<int>& atype,
+    const std::vector<double>& box,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+template void DeepPotPT::compute<float, std::vector<ENERGYTYPE>>(
+    std::vector<ENERGYTYPE>& ener,
+    std::vector<float>& force,
+    std::vector<float>& virial,
+    std::vector<float>& atom_energy,
+    std::vector<float>& atom_virial,
+    const std::vector<float>& coord,
+    const std::vector<int>& atype,
+    const std::vector<float>& box,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
+void DeepPotPT::get_type_map(std::string& type_map) {
+  auto ret = module.run_method("get_type_map").toList();
+  for (const torch::IValue& element : ret) {
+    type_map += torch::str(element);  // Convert each element to a string
+    type_map += " ";                  // Add a space between elements
+  }
+}
+
+// forward to template method
+void DeepPotPT::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+void DeepPotPT::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          fparam, aparam);
+}
+void DeepPotPT::computew(std::vector<double>& ener,
+                         std::vector<double>& force,
+                         std::vector<double>& virial,
+                         std::vector<double>& atom_energy,
+                         std::vector<double>& atom_virial,
+                         const std::vector<double>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<double>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<double>& fparam,
+                         const std::vector<double>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+void DeepPotPT::computew(std::vector<double>& ener,
+                         std::vector<float>& force,
+                         std::vector<float>& virial,
+                         std::vector<float>& atom_energy,
+                         std::vector<float>& atom_virial,
+                         const std::vector<float>& coord,
+                         const std::vector<int>& atype,
+                         const std::vector<float>& box,
+                         const int nghost,
+                         const InputNlist& inlist,
+                         const int& ago,
+                         const std::vector<float>& fparam,
+                         const std::vector<float>& aparam) {
+  compute(ener, force, virial, atom_energy, atom_virial, coord, atype, box,
+          nghost, inlist, ago, fparam, aparam);
+}
+void DeepPotPT::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<double>& force,
+                                    std::vector<double>& virial,
+                                    std::vector<double>& atom_energy,
+                                    std::vector<double>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<double>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<double>& box,
+                                    const std::vector<double>& fparam,
+                                    const std::vector<double>& aparam) {
+  throw deepmd::deepmd_exception("computew_mixed_type is not implemented");
+}
+void DeepPotPT::computew_mixed_type(std::vector<double>& ener,
+                                    std::vector<float>& force,
+                                    std::vector<float>& virial,
+                                    std::vector<float>& atom_energy,
+                                    std::vector<float>& atom_virial,
+                                    const int& nframes,
+                                    const std::vector<float>& coord,
+                                    const std::vector<int>& atype,
+                                    const std::vector<float>& box,
+                                    const std::vector<float>& fparam,
+                                    const std::vector<float>& aparam) {
+  throw deepmd::deepmd_exception("computew_mixed_type is not implemented");
+}
+#endif
diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc
index ef348fe14c..7bf2bebce4 100644
--- a/source/api_cc/src/DeepPotTF.cc
+++ b/source/api_cc/src/DeepPotTF.cc
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
+#ifdef BUILD_TENSORFLOW
 #include "DeepPotTF.h"
 
 #include <stdexcept>
@@ -1051,3 +1052,4 @@ void DeepPotTF::computew_mixed_type(std::vector<double>& ener,
   compute_mixed_type(ener, force, virial, atom_energy, atom_virial, nframes,
                      coord, atype, box, fparam, aparam);
 }
+#endif
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index 2c88ab2f4b..a0596e046f 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -3,7 +3,9 @@
 
 #include <memory>
 
+#ifdef BUILD_TENSORFLOW
 #include "DeepTensorTF.h"
+#endif
 #include "common.h"
 
 using namespace deepmd;
@@ -31,8 +33,11 @@ void DeepTensor::init(const std::string &model,
   // TODO: To implement detect_backend
   DPBackend backend = deepmd::DPBackend::TensorFlow;
   if (deepmd::DPBackend::TensorFlow == backend) {
-    // TODO: throw errors if TF backend is not built, without mentioning TF
+#ifdef BUILD_TENSORFLOW
     dt = std::make_shared<deepmd::DeepTensorTF>(model, gpu_rank, name_scope_);
+#else
+    throw deepmd::deepmd_exception("TensorFlow backend is not built.");
+#endif
   } else if (deepmd::DPBackend::PyTorch == backend) {
     throw deepmd::deepmd_exception("PyTorch backend is not supported yet");
   } else if (deepmd::DPBackend::Paddle == backend) {
diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc
index 436e389ad2..34a47bc6f3 100644
--- a/source/api_cc/src/DeepTensorTF.cc
+++ b/source/api_cc/src/DeepTensorTF.cc
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
+#ifdef BUILD_TENSORFLOW
 #include "DeepTensorTF.h"
 
 using namespace deepmd;
@@ -844,3 +845,4 @@ void DeepTensorTF::computew(std::vector<float> &global_tensor,
     atom_virial.clear();
   }
 }
+#endif
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 2f75aaa291..8f5e21c933 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -3,6 +3,11 @@
 
 #include <fcntl.h>
 
+#include <cstring>
+#include <fstream>
+#include <sstream>
+#include <string>
+
 #include "AtomMap.h"
 #include "device.h"
 #if defined(_WIN32)
@@ -20,10 +25,13 @@
 // not windows
 #include <dlfcn.h>
 #endif
+#ifdef BUILD_TENSORFLOW
+#include "commonTF.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "google/protobuf/text_format.h"
 
 using namespace tensorflow;
+#endif
 
 static std::vector<std::string> split(const std::string& input_,
                                       const std::string& delimiter) {
@@ -285,6 +293,16 @@ void deepmd::NeighborListData::shuffle_exclude_empty(
   ilist = new_ilist;
   jlist = new_jlist;
 }
+void deepmd::NeighborListData::padding() {
+  size_t max_length = 0;
+  for (const auto& row : jlist) {
+    max_length = std::max(max_length, row.size());
+  }
+
+  for (int i = 0; i < jlist.size(); i++) {
+    jlist[i].resize(max_length, -1);
+  }
+}
 
 void deepmd::NeighborListData::make_inlist(InputNlist& inlist) {
   int nloc = ilist.size();
@@ -300,17 +318,19 @@ void deepmd::NeighborListData::make_inlist(InputNlist& inlist) {
   inlist.firstneigh = &firstneigh[0];
 }
 
+#ifdef BUILD_TENSORFLOW
 void deepmd::check_status(const tensorflow::Status& status) {
   if (!status.ok()) {
     std::cout << status.ToString() << std::endl;
     throw deepmd::tf_exception(status.ToString());
   }
 }
+#endif
 
 void throw_env_not_set_warning(std::string env_name) {
   std::cerr << "DeePMD-kit WARNING: Environmental variable " << env_name
-            << " is not set. "
-            << "Tune " << env_name << " for the best performance. "
+            << " is not set. " << "Tune " << env_name
+            << " for the best performance. "
             << "See https://deepmd.rtfd.io/parallelism/ for more information."
             << std::endl;
 }
@@ -320,23 +340,36 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads,
   num_intra_nthreads = 0;
   num_inter_nthreads = 0;
   const char* env_intra_nthreads =
-      std::getenv("TF_INTRA_OP_PARALLELISM_THREADS");
+      std::getenv("DP_INTRA_OP_PARALLELISM_THREADS");
   const char* env_inter_nthreads =
+      std::getenv("DP_INTER_OP_PARALLELISM_THREADS");
+  // backward compatibility
+  const char* env_intra_nthreads_tf =
+      std::getenv("TF_INTRA_OP_PARALLELISM_THREADS");
+  const char* env_inter_nthreads_tf =
       std::getenv("TF_INTER_OP_PARALLELISM_THREADS");
   const char* env_omp_nthreads = std::getenv("OMP_NUM_THREADS");
   if (env_intra_nthreads &&
       std::string(env_intra_nthreads) != std::string("") &&
       atoi(env_intra_nthreads) >= 0) {
     num_intra_nthreads = atoi(env_intra_nthreads);
+  } else if (env_intra_nthreads_tf &&
+             std::string(env_intra_nthreads_tf) != std::string("") &&
+             atoi(env_intra_nthreads_tf) >= 0) {
+    num_intra_nthreads = atoi(env_intra_nthreads_tf);
   } else {
-    throw_env_not_set_warning("TF_INTRA_OP_PARALLELISM_THREADS");
+    throw_env_not_set_warning("DP_INTRA_OP_PARALLELISM_THREADS");
   }
   if (env_inter_nthreads &&
       std::string(env_inter_nthreads) != std::string("") &&
       atoi(env_inter_nthreads) >= 0) {
     num_inter_nthreads = atoi(env_inter_nthreads);
+  } else if (env_inter_nthreads_tf &&
+             std::string(env_inter_nthreads_tf) != std::string("") &&
+             atoi(env_inter_nthreads_tf) >= 0) {
+    num_inter_nthreads = atoi(env_inter_nthreads_tf);
   } else {
-    throw_env_not_set_warning("TF_INTER_OP_PARALLELISM_THREADS");
+    throw_env_not_set_warning("DP_INTER_OP_PARALLELISM_THREADS");
   }
   if (!(env_omp_nthreads && std::string(env_omp_nthreads) != std::string("") &&
         atoi(env_omp_nthreads) >= 0)) {
@@ -344,13 +377,12 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads,
   }
 }
 
-void deepmd::load_op_library() {
-  tensorflow::Env* env = tensorflow::Env::Default();
+static inline void _load_single_op_library(std::string library_name) {
 #if defined(_WIN32)
-  std::string dso_path = "deepmd_op.dll";
+  std::string dso_path = library_name + ".dll";
   void* dso_handle = LoadLibrary(dso_path.c_str());
 #else
-  std::string dso_path = "libdeepmd_op.so";
+  std::string dso_path = "lib" + library_name + ".so";
   void* dso_handle = dlopen(dso_path.c_str(), RTLD_NOW | RTLD_LOCAL);
 #endif
   if (!dso_handle) {
@@ -360,6 +392,15 @@ void deepmd::load_op_library() {
   }
 }
 
+void deepmd::load_op_library() {
+#ifdef BUILD_TENSORFLOW
+  _load_single_op_library("deepmd_op");
+#endif
+#ifdef BUILD_PYTORCH
+  _load_single_op_library("deepmd_op_pt");
+#endif
+}
+
 std::string deepmd::name_prefix(const std::string& scope) {
   std::string prefix = "";
   if (scope != "") {
@@ -368,6 +409,7 @@ std::string deepmd::name_prefix(const std::string& scope) {
   return prefix;
 }
 
+#ifdef BUILD_TENSORFLOW
 template <typename MODELTYPE, typename VALUETYPE>
 int deepmd::session_input_tensors(
     std::vector<std::pair<std::string, Tensor>>& input_tensors,
@@ -850,6 +892,7 @@ int deepmd::session_get_dtype(tensorflow::Session* session,
   // cast enum to int
   return (int)output_rc.dtype();
 }
+#endif
 
 template <typename VT>
 void deepmd::select_map(std::vector<VT>& out,
@@ -940,6 +983,7 @@ void deepmd::select_map_inv(typename std::vector<VT>::iterator out,
   }
 }
 
+#ifdef BUILD_TENSORFLOW
 template int deepmd::session_get_scalar<int>(Session*,
                                              const std::string,
                                              const std::string);
@@ -989,6 +1033,7 @@ template void deepmd::session_get_vector<float>(std::vector<float>&,
                                                 Session*,
                                                 const std::string,
                                                 const std::string);
+#endif
 
 template void deepmd::select_map<float>(std::vector<float>& out,
                                         const std::vector<float>& in,
@@ -1018,6 +1063,7 @@ template void deepmd::select_map_inv<float>(
     const std::vector<int>& idx_map,
     const int& stride);
 
+#ifdef BUILD_TENSORFLOW
 template double deepmd::session_get_scalar<double>(Session*,
                                                    const std::string,
                                                    const std::string);
@@ -1026,6 +1072,7 @@ template void deepmd::session_get_vector<double>(std::vector<double>&,
                                                  Session*,
                                                  const std::string,
                                                  const std::string);
+#endif
 
 template void deepmd::select_map<double>(std::vector<double>& out,
                                          const std::vector<double>& in,
@@ -1055,6 +1102,7 @@ template void deepmd::select_map_inv<double>(
     const std::vector<int>& idx_map,
     const int& stride);
 
+#ifdef BUILD_TENSORFLOW
 template deepmd::STRINGTYPE deepmd::session_get_scalar<deepmd::STRINGTYPE>(
     Session*, const std::string, const std::string);
 
@@ -1093,13 +1141,23 @@ template void deepmd::select_map_inv<deepmd::STRINGTYPE>(
     const typename std::vector<deepmd::STRINGTYPE>::const_iterator in,
     const std::vector<int>& idx_map,
     const int& stride);
+#endif
 
 void deepmd::read_file_to_string(std::string model, std::string& file_content) {
-  deepmd::check_status(tensorflow::ReadFileToString(tensorflow::Env::Default(),
-                                                    model, &file_content));
+  // generated by GitHub Copilot
+  std::ifstream file(model);
+  if (file.is_open()) {
+    std::stringstream buffer;
+    buffer << file.rdbuf();
+    file_content = buffer.str();
+    file.close();
+  } else {
+    throw deepmd::deepmd_exception("Failed to open file: " + model);
+  }
 }
 
 void deepmd::convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb) {
+#ifdef BUILD_TENSORFLOW
   int fd = open(fn_pb_txt.c_str(), O_RDONLY);
   tensorflow::protobuf::io::ZeroCopyInputStream* input =
       new tensorflow::protobuf::io::FileInputStream(fd);
@@ -1109,8 +1167,13 @@ void deepmd::convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb) {
   std::fstream output(fn_pb,
                       std::ios::out | std::ios::trunc | std::ios::binary);
   graph_def.SerializeToOstream(&output);
+#else
+  throw deepmd::deepmd_exception(
+      "convert_pbtxt_to_pb: TensorFlow backend is not enabled.");
+#endif
 }
 
+#ifdef BUILD_TENSORFLOW
 template int deepmd::session_input_tensors<double, double>(
     std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
     const std::vector<double>& dcoord_,
@@ -1272,6 +1335,7 @@ template int deepmd::session_input_tensors_mixed_type<float, float>(
     const deepmd::AtomMap& atommap,
     const std::string scope,
     const bool aparam_nall);
+#endif
 
 void deepmd::print_summary(const std::string& pre) {
   int num_intra_nthreads, num_inter_nthreads;
@@ -1283,17 +1347,19 @@ void deepmd::print_summary(const std::string& pre) {
   std::cout << pre << "source commit at:   " + global_git_date << "\n";
   std::cout << pre << "support model ver.: " + global_model_version << "\n";
 #if defined(GOOGLE_CUDA)
-  std::cout << pre << "build variant:      cuda"
-            << "\n";
+  std::cout << pre << "build variant:      cuda" << "\n";
 #elif defined(TENSORFLOW_USE_ROCM)
-  std::cout << pre << "build variant:      rocm"
-            << "\n";
+  std::cout << pre << "build variant:      rocm" << "\n";
 #else
-  std::cout << pre << "build variant:      cpu"
-            << "\n";
+  std::cout << pre << "build variant:      cpu" << "\n";
 #endif
+#ifdef BUILD_TENSORFLOW
   std::cout << pre << "build with tf inc:  " + global_tf_include_dir << "\n";
   std::cout << pre << "build with tf lib:  " + global_tf_lib << "\n";
+#endif
+#ifdef BUILD_PYTORCH
+  std::cout << pre << "build with pt lib:  " + global_pt_lib << "\n";
+#endif
   std::cout << pre
             << "set tf intra_op_parallelism_threads: " << num_intra_nthreads
             << "\n";
diff --git a/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc b/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc
new file mode 100644
index 0000000000..dfaf0abc06
--- /dev/null
+++ b/source/api_cc/tests/test_deeppot_a_fparam_aparam_pt.cc
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include <fcntl.h>
+#include <gtest/gtest.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <vector>
+
+#include "DeepPot.h"
+#include "neighbor_list.h"
+#include "test_utils.h"
+
+// 1e-10 cannot pass; unclear bug or not
+#undef EPSILON
+#define EPSILON (std::is_same<VALUETYPE, double>::value ? 1e-7 : 1e-4)
+
+template <class VALUETYPE>
+class TestInferDeepPotAFParamAParamPt : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 0, 0, 0, 0, 0};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
+  std::vector<VALUETYPE> fparam = {0.25852028};
+  std::vector<VALUETYPE> aparam = {0.25852028, 0.25852028, 0.25852028,
+                                   0.25852028, 0.25852028, 0.25852028};
+  std::vector<VALUETYPE> expected_e = {
+      -1.038271183039953804e-01, -7.285433575272914908e-02,
+      -9.467600174099155552e-02, -1.467050086239614082e-01,
+      -7.660561620618722145e-02, -7.277295998502930630e-02};
+  std::vector<VALUETYPE> expected_f = {
+      6.622266817497907132e-02,  5.278739055693523058e-02,
+      2.265727495541422845e-02,  -2.606047850915838363e-02,
+      -4.538811686410718776e-02, 1.058247569147072187e-02,
+      1.679392490937766935e-01,  -2.257828022687320690e-03,
+      -4.490145670355452645e-02, -1.148364103573685929e-01,
+      -1.169790466695089237e-02, 6.140402504113953025e-02,
+      -8.078778132132799494e-02, -5.838878056243369807e-02,
+      6.773639989682191109e-02,  -1.247724708090079161e-02,
+      6.494523955924384750e-02,  -1.174787188812918687e-01};
+  std::vector<VALUETYPE> expected_v = {
+      -1.589185553287162656e-01, 2.586163333170100279e-03,
+      -1.575127933809472624e-04, -1.855360380105876630e-02,
+      1.949822090859933826e-02,  -1.006552056166355388e-02,
+      3.177029853276916449e-02,  1.714349636720383010e-03,
+      -1.290389175187874483e-03, -8.553510339477603253e-02,
+      -5.654637257232508415e-03, -1.286954833787038420e-02,
+      2.464156457499515687e-02,  -2.398202886026797043e-02,
+      -1.957110465239037672e-02, 2.233492928605742764e-02,
+      6.107843207824020099e-03,  1.707078295947736047e-03,
+      -1.653994088976195043e-01, 3.894358678172111371e-02,
+      -2.169595969759342477e-02, 6.819704294738503786e-03,
+      -5.018242039618424008e-03, 2.640664428663210429e-03,
+      -1.985298275686078057e-03, -3.638421609610945767e-02,
+      2.342932331075030239e-02,  -8.501331914753691710e-02,
+      -2.181253413538992297e-03, 4.311300069651782287e-03,
+      -1.910329328333908129e-03, -1.808810159508548836e-03,
+      -1.540075281450827612e-03, -1.173703213175551763e-02,
+      -2.596306629910121507e-03, 6.705025662372287101e-03,
+      -9.038455005073858795e-02, 3.011717773578577451e-02,
+      -5.083054073419784880e-02, -2.951210292616929069e-03,
+      2.342445652898489383e-02,  -4.091207474993674431e-02,
+      -1.648470649301832236e-02, -2.872261885460645689e-02,
+      4.763924972552112391e-02,  -8.300036532764677732e-02,
+      1.020429228955421243e-03,  -1.026734151199098881e-03,
+      5.678534096113684732e-02,  1.273635718045938205e-02,
+      -1.530143225195957322e-02, -1.061671865629566225e-01,
+      -2.486859433265622629e-02, 2.875323131744185121e-02};
+  int natoms;
+  double expected_tot_e;
+  std::vector<VALUETYPE> expected_tot_v;
+
+  deepmd::DeepPot dp;
+
+  void SetUp() override {
+    dp.init("../../tests/infer/fparam_aparam.pth");
+
+    natoms = expected_e.size();
+    EXPECT_EQ(natoms * 3, expected_f.size());
+    EXPECT_EQ(natoms * 9, expected_v.size());
+    expected_tot_e = 0.;
+    expected_tot_v.resize(9);
+    std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
+    for (int ii = 0; ii < natoms; ++ii) {
+      expected_tot_e += expected_e[ii];
+    }
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
+      }
+    }
+  };
+
+  void TearDown() override { remove("fparam_aparam.pb"); };
+};
+
+TYPED_TEST_SUITE(TestInferDeepPotAFParamAParamPt, ValueTypes);
+
+TYPED_TEST(TestInferDeepPotAFParamAParamPt, cpu_build_nlist) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& fparam = this->fparam;
+  std::vector<VALUETYPE>& aparam = this->aparam;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial;
+  dp.compute(ener, force, virial, coord, atype, box, fparam, aparam);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAFParamAParamPt, cpu_build_nlist_atomic) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& fparam = this->fparam;
+  std::vector<VALUETYPE>& aparam = this->aparam;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;
+  dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box,
+             fparam, aparam);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAFParamAParamPt, cpu_lmp_nlist) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& fparam = this->fparam;
+  std::vector<VALUETYPE>& aparam = this->aparam;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  double ener;
+  std::vector<VALUETYPE> force_, virial;
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0, fparam, aparam);
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1, fparam, aparam);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAFParamAParamPt, cpu_lmp_nlist_atomic) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& fparam = this->fparam;
+  std::vector<VALUETYPE>& aparam = this->aparam;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  double ener;
+  std::vector<VALUETYPE> force_, atom_ener_, atom_vir_, virial;
+  std::vector<VALUETYPE> force, atom_ener, atom_vir;
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 0, fparam, aparam);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+  _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
+  _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  std::fill(atom_ener_.begin(), atom_ener_.end(), 0.0);
+  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 1, fparam, aparam);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+  _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
+  _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAFParamAParamPt, cpu_lmp_nlist_2rc) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& fparam = this->fparam;
+  std::vector<VALUETYPE>& aparam = this->aparam;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc * 2);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  double ener;
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0, fparam, aparam);
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1, fparam, aparam);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
diff --git a/source/api_cc/tests/test_deeppot_pt.cc b/source/api_cc/tests/test_deeppot_pt.cc
new file mode 100644
index 0000000000..cc30e606c0
--- /dev/null
+++ b/source/api_cc/tests/test_deeppot_pt.cc
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include <fcntl.h>
+#include <gtest/gtest.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <vector>
+
+#include "DeepPot.h"
+#include "neighbor_list.h"
+#include "test_utils.h"
+
+template <class VALUETYPE>
+class TestInferDeepPotAPt : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
+  std::vector<VALUETYPE> expected_e = {
+
+      -93.016873944029, -185.923296645958, -185.927096544970,
+      -93.019371018039, -185.926179995548, -185.924351901852};
+  std::vector<VALUETYPE> expected_f = {
+
+      0.006277522211,  -0.001117962774, 0.000618580445,  0.009928999655,
+      0.003026035654,  -0.006941982227, 0.000667853212,  -0.002449963843,
+      0.006506463508,  -0.007284129115, 0.000530662205,  -0.000028806821,
+      0.000068097781,  0.006121331983,  -0.009019754602, -0.009658343745,
+      -0.006110103225, 0.008865499697};
+  std::vector<VALUETYPE> expected_v = {
+      -0.000155238009, 0.000116605516,  -0.007869862476, 0.000465578340,
+      0.008182547185,  -0.002398713212, -0.008112887338, -0.002423738425,
+      0.007210716605,  -0.019203504012, 0.001724938709,  0.009909211091,
+      0.001153857542,  -0.001600015103, -0.000560024090, 0.010727836276,
+      -0.001034836404, -0.007973454377, -0.021517399106, -0.004064359664,
+      0.004866398692,  -0.003360038617, -0.007241406162, 0.005920941051,
+      0.004899151657,  0.006290788591,  -0.006478820311, 0.001921504710,
+      0.001313470921,  -0.000304091236, 0.001684345981,  0.004124109256,
+      -0.006396084465, -0.000701095618, -0.006356507032, 0.009818550859,
+      -0.015230664587, -0.000110244376, 0.000690319396,  0.000045953023,
+      -0.005726548770, 0.008769818495,  -0.000572380210, 0.008860603423,
+      -0.013819348050, -0.021227082558, -0.004977781343, 0.006646239696,
+      -0.005987066507, -0.002767831232, 0.003746502525,  0.007697590397,
+      0.003746130152,  -0.005172634748};
+  int natoms;
+  double expected_tot_e;
+  std::vector<VALUETYPE> expected_tot_v;
+
+  deepmd::DeepPot dp;
+
+  void SetUp() override {
+    std::string file_name = "../../tests/infer/deeppot_sea.pth";
+
+    dp.init(file_name);
+
+    natoms = expected_e.size();
+    EXPECT_EQ(natoms * 3, expected_f.size());
+    EXPECT_EQ(natoms * 9, expected_v.size());
+    expected_tot_e = 0.;
+    expected_tot_v.resize(9);
+    std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
+    for (int ii = 0; ii < natoms; ++ii) {
+      expected_tot_e += expected_e[ii];
+    }
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
+      }
+    }
+  };
+
+  void TearDown() override { remove("deeppot.pb"); };
+};
+
+TYPED_TEST_SUITE(TestInferDeepPotAPt, ValueTypes);
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_build_nlist) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial;
+  dp.compute(ener, force, virial, coord, atype, box);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_build_nlist_numfv) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  class MyModel : public EnergyModelTest<VALUETYPE> {
+    deepmd::DeepPot& mydp;
+    const std::vector<int> atype;
+
+   public:
+    MyModel(deepmd::DeepPot& dp_, const std::vector<int>& atype_)
+        : mydp(dp_), atype(atype_){};
+    virtual void compute(double& ener,
+                         std::vector<VALUETYPE>& force,
+                         std::vector<VALUETYPE>& virial,
+                         const std::vector<VALUETYPE>& coord,
+                         const std::vector<VALUETYPE>& box) {
+      mydp.compute(ener, force, virial, coord, atype, box);
+    }
+  };
+  MyModel model(dp, atype);
+  model.test_f(coord, box);
+  model.test_v(coord, box);
+  std::vector<VALUETYPE> box_(box);
+  box_[1] -= 0.4;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+  box_[2] += 0.5;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+  box_[4] += 0.2;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+  box_[3] -= 0.3;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+  box_[6] -= 0.7;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+  box_[7] += 0.6;
+  model.test_f(coord, box_);
+  model.test_v(coord, box_);
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_build_nlist_atomic) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;
+  dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_lmp_nlist) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  double ener;
+  std::vector<VALUETYPE> force_, virial;
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_lmp_nlist_atomic) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+  double ener;
+  std::vector<VALUETYPE> force_, atom_ener_, atom_vir_, virial;
+  std::vector<VALUETYPE> force, atom_ener, atom_vir;
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 0);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+  _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
+  _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  std::fill(atom_ener_.begin(), atom_ener_.end(), 0.0);
+  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 1);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+  _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
+  _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_EQ(atom_ener.size(), natoms);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms; ++ii) {
+    EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < natoms * 9; ++ii) {
+    EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_lmp_nlist_2rc) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc * 2);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  double ener;
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+
+  ener = 0.;
+  std::fill(force_.begin(), force_.end(), 0.0);
+  std::fill(virial.begin(), virial.end(), 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_lmp_nlist_type_sel) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+
+  // add vir atoms
+  int nvir = 2;
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
+  std::vector<int> atype_vir(nvir, 2);
+  for (int ii = 0; ii < nvir; ++ii) {
+    coord_vir[ii] = coord[ii];
+  }
+  coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
+  atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
+  natoms += nvir;
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
+
+  // build nlist
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  // dp compute
+  double ener;
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
+  // fold back
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, cpu_lmp_nlist_type_sel_atomic) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  float rc = dp.cutoff();
+
+  // add vir atoms
+  int nvir = 2;
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
+  std::vector<int> atype_vir(nvir, 2);
+  for (int ii = 0; ii < nvir; ++ii) {
+    coord_vir[ii] = coord[ii];
+  }
+  coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
+  atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
+  natoms += nvir;
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
+
+  // build nlist
+  int nloc = coord.size() / 3;
+  std::vector<VALUETYPE> coord_cpy;
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
+  int nall = coord_cpy.size() / 3;
+  std::vector<int> ilist(nloc), numneigh(nloc);
+  std::vector<int*> firstneigh(nloc);
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
+  convert_nlist(inlist, nlist_data);
+
+  // dp compute
+  double ener;
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0), atomic_energy,
+      atomic_virial;
+  dp.compute(ener, force_, virial, atomic_energy, atomic_virial, coord_cpy,
+             atype_cpy, box, nall - nloc, inlist, 0);
+  // fold back
+  std::vector<VALUETYPE> force;
+  _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
+
+TYPED_TEST(TestInferDeepPotAPt, print_summary) {
+  deepmd::DeepPot& dp = this->dp;
+  dp.print_summary("");
+}
+
+template <class VALUETYPE>
+class TestInferDeepPotAPtNoPbc : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {};
+  std::vector<VALUETYPE> expected_e = {-93.003304908874,  -185.915806542480,
+                                       -185.928116717624, -93.017934934346,
+                                       -185.924393412278, -185.923906740801};
+  std::vector<VALUETYPE> expected_f = {
+      0.000868182637,  -0.000363698132, -0.000657003077, -0.000868182637,
+      0.000363698132,  0.000657003077,  0.007932614680,  -0.001003609844,
+      0.000737731722,  -0.003883788858, 0.000686896282,  -0.000578400682,
+      0.004064895086,  0.006115547962,  -0.008747097814, -0.008113720908,
+      -0.005798834400, 0.008587766774};
+  std::vector<VALUETYPE> expected_v = {
+      0.007762485364,  -0.003251851977, -0.005874313248, -0.003251851977,
+      0.001362262315,  0.002460860955,  -0.005874313248, 0.002460860955,
+      0.004445426242,  -0.007120030212, 0.002982715359,  0.005388130971,
+      0.002982715359,  -0.001249515894, -0.002257190002, 0.005388130971,
+      -0.002257190002, -0.004077504519, -0.015805863589, 0.001952684835,
+      -0.001522876482, 0.001796574704,  -0.000358803950, 0.000369710813,
+      -0.001108943040, 0.000332585300,  -0.000395481309, 0.008873525623,
+      0.001919112114,  -0.001486235522, 0.002002929532,  0.004222469272,
+      -0.006517211126, -0.001656192522, -0.006501210045, 0.010118622295,
+      -0.006548889778, -0.000465126991, 0.001002876603,  0.000240398734,
+      -0.005794489784, 0.008940685179,  -0.000121727685, 0.008931999051,
+      -0.013852797563, -0.017962955675, -0.004645050453, 0.006214692837,
+      -0.005278283465, -0.002662692758, 0.003618275905,  0.007095320684,
+      0.003648086464,  -0.005023397513};
+  int natoms;
+  double expected_tot_e;
+  std::vector<VALUETYPE> expected_tot_v;
+
+  deepmd::DeepPot dp;
+
+  void SetUp() override {
+    std::string file_name = "../../tests/infer/deeppot_sea.pth";
+    dp.init(file_name);
+
+    natoms = expected_e.size();
+    EXPECT_EQ(natoms * 3, expected_f.size());
+    EXPECT_EQ(natoms * 9, expected_v.size());
+    expected_tot_e = 0.;
+    expected_tot_v.resize(9);
+    std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
+    for (int ii = 0; ii < natoms; ++ii) {
+      expected_tot_e += expected_e[ii];
+    }
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
+      }
+    }
+  };
+
+  void TearDown() override { remove("deeppot.pb"); };
+};
+
+TYPED_TEST_SUITE(TestInferDeepPotAPtNoPbc, ValueTypes);
+
+TYPED_TEST(TestInferDeepPotAPtNoPbc, cpu_build_nlist) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_e = this->expected_e;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  int& natoms = this->natoms;
+  double& expected_tot_e = this->expected_tot_e;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
+  deepmd::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial;
+  dp.compute(ener, force, virial, coord, atype, box);
+
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+
+  EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
+    EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
+  }
+}
diff --git a/source/api_cc/tests/test_ewald.cc b/source/api_cc/tests/test_ewald.cc
index 7eb433816d..d5aa6993a9 100644
--- a/source/api_cc/tests/test_ewald.cc
+++ b/source/api_cc/tests/test_ewald.cc
@@ -18,8 +18,8 @@ class TestInferEwald : public ::testing::Test {
                                   3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
   std::vector<VALUETYPE> charge = {-2, 1, 1, -2, 1, 1};
   std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
-  void SetUp() override{};
-  void TearDown() override{};
+  void SetUp() override {};
+  void TearDown() override {};
 };
 
 TYPED_TEST_SUITE(TestInferEwald, ValueTypes);
diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt
index 5473b91f29..b1ce17566f 100644
--- a/source/config/CMakeLists.txt
+++ b/source/config/CMakeLists.txt
@@ -1,5 +1,19 @@
 # config
 
+# cmake will treat true, false, on, off, 1, 0 as booleans we hope an easy way to
+# check it
+if(ENABLE_TENSORFLOW)
+  set(ENABLE_TENSORFLOW 1)
+else()
+  set(ENABLE_TENSORFLOW 0)
+endif()
+
+if(ENABLE_PYTORCH)
+  set(ENABLE_PYTORCH 1)
+else()
+  set(ENABLE_PYTORCH 0)
+endif()
+
 configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini"
                @ONLY)
 
diff --git a/source/config/run_config.ini b/source/config/run_config.ini
index 3f0a7a33a8..5cdaa35317 100644
--- a/source/config/run_config.ini
+++ b/source/config/run_config.ini
@@ -4,8 +4,10 @@ GIT_SUMM = @GIT_SUMM@
 GIT_HASH = @GIT_HASH@
 GIT_DATE = @GIT_DATE@
 GIT_BRANCH = @GIT_BRANCH@
+ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@
+ENABLE_PYTORCH = @ENABLE_PYTORCH@
 TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
-TF_LIBS = @TensorFlow_LIBRARY@
+TF_LIBS = @TensorFlow_LIBRARY_PATH@
 TF_VERSION = @TENSORFLOW_VERSION@
 TF_CXX11_ABI_FLAG = @OP_CXX_ABI@
 MODEL_VERSION=@MODEL_VERSION@
diff --git a/source/gmx/CMakeLists.txt b/source/gmx/CMakeLists.txt
index c119e4b212..d445479d39 100644
--- a/source/gmx/CMakeLists.txt
+++ b/source/gmx/CMakeLists.txt
@@ -19,6 +19,7 @@ else()
   target_link_libraries(${libgmxname} PUBLIC ${LIB_DEEPMD_CC})
   target_compile_definitions(${libgmxname} PUBLIC "DP_USE_CXX_API")
 endif()
+target_compile_definitions(${libgmxname} PRIVATE "DP_GMX_PLUGIN_INTERNAL")
 target_include_directories(${libgmxname}
                            PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_include_directories(${libgmxname}
diff --git a/source/gmx/include/gmx_plugin.h b/source/gmx/include/gmx_plugin.h
index 51eae8ca7e..430ca2fe0d 100644
--- a/source/gmx/include/gmx_plugin.h
+++ b/source/gmx/include/gmx_plugin.h
@@ -5,7 +5,11 @@
 #include "DeepPot.h"
 namespace deepmd_compat = deepmd;
 #else
+#ifdef DP_GMX_PLUGIN_INTERNAL
 #include "deepmd.hpp"
+#else
+#include "deepmd/deepmd.hpp"
+#endif
 namespace deepmd_compat = deepmd::hpp;
 #endif
 
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index fef9e82ebc..3db4c92a3e 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -20,7 +20,13 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DUSE_TF_PYTHON_LIBS=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_2Aug2023_update2 ..
+cmake -D ENABLE_TENSORFLOW=ON \
+	-D ENABLE_PYTORCH=ON \
+	-D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+	-D USE_TF_PYTHON_LIBS=TRUE \
+	${CUDA_ARGS} \
+	-D LAMMPS_VERSION=stable_2Aug2023_update3 \
+	..
 cmake --build . -j${NPROC}
 cmake --install .
 
diff --git a/source/install/build_from_c.sh b/source/install/build_from_c.sh
index c1188252ab..e8dcee945d 100755
--- a/source/install/build_from_c.sh
+++ b/source/install/build_from_c.sh
@@ -13,7 +13,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_2Aug2023_update2 ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DDEEPMD_C_ROOT=${DEEPMD_C_ROOT} -DLAMMPS_VERSION=stable_2Aug2023_update3 ..
 cmake --build . -j${NPROC}
 cmake --install .
 cmake --build . --target=lammps
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index 2b5bf0a643..fca3b3e5ad 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -14,7 +14,7 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_lammps
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
-LAMMPS_VERSION=stable_2Aug2023_update2
+LAMMPS_VERSION=stable_2Aug2023_update3
 if [ ! -d "lammps-${LAMMPS_VERSION}" ]; then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz
 	tar vxzf lammps.tar.gz
diff --git a/source/install/build_tf.py b/source/install/build_tf.py
index 15847d2c21..d639e2cf51 100755
--- a/source/install/build_tf.py
+++ b/source/install/build_tf.py
@@ -294,7 +294,7 @@ def set_directory(path: Path):
     Examples
     --------
     >>> with set_directory("some_path"):
-    ...    do_something()
+    ...     do_something()
     """
     cwd = Path().absolute()
     path.mkdir(exist_ok=True, parents=True)
@@ -423,14 +423,14 @@ def __init__(self, version="1.11.0") -> None:
         self.version = version
 
     @property
-    @lru_cache()
+    @lru_cache
     def resources(self) -> Dict[str, OnlineResource]:
         return {
             "bazelisk": RESOURCES["bazelisk-" + self.version],
         }
 
     @property
-    @lru_cache()
+    @lru_cache
     def dependencies(self) -> Dict[str, Build]:
         return {}
 
@@ -449,12 +449,12 @@ class BuildNumpy(Build):
     """Build NumPy."""
 
     @property
-    @lru_cache()
+    @lru_cache
     def resources(self) -> Dict[str, OnlineResource]:
         return {}
 
     @property
-    @lru_cache()
+    @lru_cache
     def dependencies(self) -> Dict[str, Build]:
         return {}
 
@@ -481,12 +481,12 @@ class BuildCUDA(Build):
     """Find CUDA."""
 
     @property
-    @lru_cache()
+    @lru_cache
     def resources(self) -> Dict[str, OnlineResource]:
         return {}
 
     @property
-    @lru_cache()
+    @lru_cache
     def dependencies(self) -> Dict[str, Build]:
         return {}
 
@@ -536,7 +536,7 @@ def cudnn_version(self):
         )
 
     @property
-    @lru_cache()
+    @lru_cache
     def cuda_compute_capabilities(self):
         """Get cuda compute capabilities."""
         cuda_version = tuple(map(int, self.cuda_version.split(".")))
@@ -554,12 +554,12 @@ class BuildROCM(Build):
     """Find ROCm."""
 
     @property
-    @lru_cache()
+    @lru_cache
     def resources(self) -> Dict[str, OnlineResource]:
         return {}
 
     @property
-    @lru_cache()
+    @lru_cache
     def dependencies(self) -> Dict[str, Build]:
         return {}
 
@@ -599,14 +599,14 @@ def __init__(
         self.enable_rocm = enable_rocm
 
     @property
-    @lru_cache()
+    @lru_cache
     def resources(self) -> Dict[str, OnlineResource]:
         return {
             "tensorflow": RESOURCES["tensorflow-" + self.version],
         }
 
     @property
-    @lru_cache()
+    @lru_cache
     def dependencies(self) -> Dict[str, Build]:
         optional_dep = {}
         if self.enable_cuda:
diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile
index 26b7be9f19..1e25fbb6d3 100644
--- a/source/install/docker/Dockerfile
+++ b/source/install/docker/Dockerfile
@@ -6,7 +6,8 @@ RUN python -m venv /opt/deepmd-kit
 ENV PATH="/opt/deepmd-kit/bin:$PATH"
 # Install package
 COPY dist /dist
-RUN pip install "$(ls /dist/deepmd_kit${VARIANT}-*manylinux*_x86_64.whl)[gpu,cu${CUDA_VERSION},lmp,ipi]" \
+RUN if [ "${CUDA_VERSION}" = 11 ]; then pip install torch --index-url https://download.pytorch.org/whl/cu118; fi \
+    && pip install "$(ls /dist/deepmd_kit${VARIANT}-*manylinux*_x86_64.whl)[gpu,cu${CUDA_VERSION},lmp,ipi,torch]" \
     && dp -h \
     && lmp -h \
     && dp_ipi \
diff --git a/source/install/test_cc.sh b/source/install/test_cc.sh
index 0dd35f5615..8c75b00762 100755
--- a/source/install/test_cc.sh
+++ b/source/install/test_cc.sh
@@ -17,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update2 ${CUDA_ARGS} ..
+cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update3 ${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure
diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh
index 22d22a27f6..13a10d78e3 100755
--- a/source/install/test_cc_local.sh
+++ b/source/install/test_cc_local.sh
@@ -18,7 +18,15 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023_update2 ${CUDA_ARGS} ..
+cmake \
+	-D ENABLE_TENSORFLOW=TRUE \
+	-D ENABLE_PYTORCH=TRUE \
+	-D INSTALL_TENSORFLOW=FALSE \
+	-D USE_TF_PYTHON_LIBS=TRUE \
+	-D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+	-D BUILD_TESTING:BOOL=TRUE \
+	-D LAMMPS_VERSION=stable_2Aug2023_update3 \
+	${CUDA_ARGS} ..
 cmake --build . -j${NPROC}
 cmake --install .
 ctest --output-on-failure
diff --git a/source/ipi/CMakeLists.txt b/source/ipi/CMakeLists.txt
index 158f98aea5..afb8a03261 100644
--- a/source/ipi/CMakeLists.txt
+++ b/source/ipi/CMakeLists.txt
@@ -14,7 +14,7 @@ add_executable(${ipiname} ${DRIVER_SOURCE_FILES})
 # link: libdeepmd_cc
 if(DP_USING_C_API)
   # SimulationRegion.h
-  target_link_libraries(${ipiname} PRIVATE ${LIB_DEEPMD_C} ${LIB_DEEPMD})
+  target_link_libraries(${ipiname} PRIVATE ${LIB_DEEPMD_C})
   target_precompile_headers(${ipiname} PRIVATE [["deepmd.hpp"]])
   remove_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
 else()
@@ -44,13 +44,13 @@ if(APPLE)
     ${ipiname}
     PROPERTIES LINK_FLAGS "${extra_link_flags}"
                INSTALL_RPATH
-               "@loader_path/../${LIB_DIR}:${TensorFlow_LIBRARY_PATH}")
+               "@loader_path/../${LIB_DIR};${TensorFlow_LIBRARY_PATH}")
 else()
   set_target_properties(
     ${ipiname}
     PROPERTIES LINK_FLAGS
                "-Wl,-rpath,'$ORIGIN'/../${LIB_DIR} ${extra_link_flags}"
-               INSTALL_RPATH "$ORIGIN/../${LIB_DIR}:${TensorFlow_LIBRARY_PATH}")
+               INSTALL_RPATH "$ORIGIN/../${LIB_DIR};${TensorFlow_LIBRARY_PATH}")
 endif()
 
 if(CMAKE_TESTING_ENABLED)
diff --git a/source/ipi/driver.cc b/source/ipi/driver.cc
index 1e3d92eb5e..977d76011a 100644
--- a/source/ipi/driver.cc
+++ b/source/ipi/driver.cc
@@ -12,7 +12,6 @@ namespace deepmd_compat = deepmd;
 #include "deepmd.hpp"
 namespace deepmd_compat = deepmd::hpp;
 #endif
-#include "SimulationRegion.h"
 #include "XyzFileManager.h"
 #include "json.hpp"
 #include "sockets.h"
@@ -49,25 +48,6 @@ char *trimwhitespace(char *str) {
   return str;
 }
 
-void normalize_coord(std::vector<double> &coord,
-                     const SimulationRegion<double> &region) {
-  int natoms = coord.size() / 3;
-
-  for (int ii = 0; ii < natoms; ++ii) {
-    double inter[3];
-    region.phys2Inter(inter, &coord[3 * ii]);
-    for (int dd = 0; dd < 3; ++dd) {
-      inter[dd] -= int(floor(inter[dd]));
-      if (inter[dd] < 0) {
-        inter[dd] += 1.;
-      } else if (inter[dd] >= 1) {
-        inter[dd] -= 1.;
-      }
-    }
-    region.inter2Phys(&coord[3 * ii], inter);
-  }
-}
-
 int main(int argc, char *argv[]) {
   if (argc == 1) {
     std::cerr << "usage " << std::endl;
@@ -122,7 +102,6 @@ int main(int argc, char *argv[]) {
   std::vector<double> dcoord_tmp;
   std::vector<int> dtype = cvt.get_type();
   std::vector<double> dbox(9, 0);
-  SimulationRegion<double> region;
   double *msg_buff = NULL;
   double ener;
   double virial[9];
@@ -147,20 +126,17 @@ int main(int argc, char *argv[]) {
       if (!isinit) {
         writebuffer_(&socket, msg_needinit, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "NEEDINIT" << std::endl;
+          std::cout << "# send back  " << "NEEDINIT" << std::endl;
         }
       } else if (hasdata) {
         writebuffer_(&socket, msg_havedata, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "HAVEDATA" << std::endl;
+          std::cout << "# send back  " << "HAVEDATA" << std::endl;
         }
       } else {
         writebuffer_(&socket, msg_ready, MSGLEN);
         if (b_verb) {
-          std::cout << "# send back  "
-                    << "READY" << std::endl;
+          std::cout << "# send back  " << "READY" << std::endl;
         }
       }
     } else if (header_str == "INIT") {
@@ -179,7 +155,6 @@ int main(int argc, char *argv[]) {
       for (int dd = 0; dd < 9; ++dd) {
         dbox[dd] = cell_h[(dd % 3) * 3 + (dd / 3)] * cvt_len;
       }
-      region.reinitBox(&dbox[0]);
 
       // get number of atoms
       readbuffer_(&socket, (char *)(&cbuf), sizeof(int32_t));
@@ -203,7 +178,6 @@ int main(int argc, char *argv[]) {
         dcoord_tmp[ii] = msg_buff[ii] * cvt_len;
       }
       cvt.forward(dcoord, dcoord_tmp, 3);
-      normalize_coord(dcoord, region);
 
       // nnp over writes ener, force and virial
       nnp_inter.compute(dener, dforce_tmp, dvirial, dcoord, dtype, dbox);
diff --git a/source/ipi/tests/test_driver.py b/source/ipi/tests/test_driver.py
index 9ab6ff53de..b0fbf53b01 100644
--- a/source/ipi/tests/test_driver.py
+++ b/source/ipi/tests/test_driver.py
@@ -18,7 +18,7 @@
     SocketIOCalculator,
 )
 
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
@@ -53,7 +53,7 @@ def write_input(self, atoms, **kwargs):
         atoms.write(self.xyz_file, format="xyz")
 
 
-class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
+class TestDPIPI(unittest.TestCase):
     # copy from test_deeppot_a.py
     @classmethod
     def setUpClass(cls):
@@ -193,8 +193,8 @@ def test_ase_unix(self):
                 cell=self.box.reshape((3, 3)),
                 calculator=calc,
             )
-        ee = water.get_potential_energy()
-        ff = water.get_forces()
+            ee = water.get_potential_energy()
+            ff = water.get_forces()
         nframes = 1
         np.testing.assert_almost_equal(
             ff.ravel(), self.expected_f.ravel(), default_places
@@ -213,11 +213,146 @@ def test_ase_nounix(self):
                 cell=self.box.reshape((3, 3)),
                 calculator=calc,
             )
-        ee = water.get_potential_energy()
-        ff = water.get_forces()
+            ee = water.get_potential_energy()
+            ff = water.get_forces()
         nframes = 1
         np.testing.assert_almost_equal(
             ff.ravel(), self.expected_f.ravel(), default_places
         )
         expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+
+    def test_normalize_coords(self):
+        # coordinate nomarlization should happen inside the interface
+        cell = self.box.reshape((3, 3))
+        coord = self.coords.reshape((-1, 3))
+        # random unwrap coords
+        coord[0] += np.array([3, 0, 0]) @ cell
+        coord[1] += np.array([0, -3, 0]) @ cell
+        coord[2] += np.array([0, 0, 3]) @ cell
+        coord[3] += np.array([-3, 0, 0]) @ cell
+        coord[4] += np.array([0, 3, 0]) @ cell
+        coord[5] += np.array([0, 0, -3]) @ cell
+        with SocketIOCalculator(
+            DPiPICalculator(self.model_file, use_unix=False),
+            log=sys.stdout,
+        ) as calc:
+            water = Atoms(
+                "OHHOHH",
+                positions=coord,
+                cell=cell,
+                calculator=calc,
+            )
+            ee = water.get_potential_energy()
+            ff = water.get_forces()
+        nframes = 1
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
+        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+
+
+class TestDPIPIPt(TestDPIPI):
+    @classmethod
+    def setUpClass(cls):
+        cls.model_file = str(tests_path / "infer" / "deeppot_sea.pth")
+
+    def setUp(self):
+        super().setUp()
+
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -93.016873944029,
+                -185.923296645958,
+                -185.927096544970,
+                -93.019371018039,
+                -185.926179995548,
+                -185.924351901852,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                0.006277522211,
+                -0.001117962774,
+                0.000618580445,
+                0.009928999655,
+                0.003026035654,
+                -0.006941982227,
+                0.000667853212,
+                -0.002449963843,
+                0.006506463508,
+                -0.007284129115,
+                0.000530662205,
+                -0.000028806821,
+                0.000068097781,
+                0.006121331983,
+                -0.009019754602,
+                -0.009658343745,
+                -0.006110103225,
+                0.008865499697,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -0.000155238009,
+                0.000116605516,
+                -0.007869862476,
+                0.000465578340,
+                0.008182547185,
+                -0.002398713212,
+                -0.008112887338,
+                -0.002423738425,
+                0.007210716605,
+                -0.019203504012,
+                0.001724938709,
+                0.009909211091,
+                0.001153857542,
+                -0.001600015103,
+                -0.000560024090,
+                0.010727836276,
+                -0.001034836404,
+                -0.007973454377,
+                -0.021517399106,
+                -0.004064359664,
+                0.004866398692,
+                -0.003360038617,
+                -0.007241406162,
+                0.005920941051,
+                0.004899151657,
+                0.006290788591,
+                -0.006478820311,
+                0.001921504710,
+                0.001313470921,
+                -0.000304091236,
+                0.001684345981,
+                0.004124109256,
+                -0.006396084465,
+                -0.000701095618,
+                -0.006356507032,
+                0.009818550859,
+                -0.015230664587,
+                -0.000110244376,
+                0.000690319396,
+                0.000045953023,
+                -0.005726548770,
+                0.008769818495,
+                -0.000572380210,
+                0.008860603423,
+                -0.013819348050,
+                -0.021227082558,
+                -0.004977781343,
+                0.006646239696,
+                -0.005987066507,
+                -0.002767831232,
+                0.003746502525,
+                0.007697590397,
+                0.003746130152,
+                -0.005172634748,
+            ]
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.dp = None
diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index 3bd24cc620..804e1c0506 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -10,8 +10,10 @@ if(USE_CUDA_TOOLKIT)
   endif()
   enable_language(CUDA)
   set(CMAKE_CUDA_STANDARD 11)
-  add_compile_definitions(
-    "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
+  if(DEFINED OP_CXX_ABI)
+    add_compile_definitions(
+      "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
+  endif()
 
   find_package(CUDAToolkit REQUIRED)
 
diff --git a/source/lib/src/gpu/prod_env_mat.cu b/source/lib/src/gpu/prod_env_mat.cu
index a69e014272..e8909edb44 100644
--- a/source/lib/src/gpu/prod_env_mat.cu
+++ b/source/lib/src/gpu/prod_env_mat.cu
@@ -486,7 +486,6 @@ __global__ void compute_env_mat_a(FPTYPE* em,
             std[type[bid] * ndescrpt + idx_value + ii];
       }
     } else {
-      // TODO: move it to the memset.
       row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
                                  std[type[bid] * ndescrpt + idx_value];
     }
@@ -562,7 +561,6 @@ __global__ void compute_env_mat_r(FPTYPE* em,
       row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) /
                           std[type[bid] * ndescrpt + idx_value];
     } else {
-      // TODO: move it to the memset.
       row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
                            std[type[bid] * ndescrpt + idx_value];
     }
diff --git a/source/lib/tests/test_ewald.cc b/source/lib/tests/test_ewald.cc
index 45c8ea7bf1..ca6f3a845e 100644
--- a/source/lib/tests/test_ewald.cc
+++ b/source/lib/tests/test_ewald.cc
@@ -30,7 +30,7 @@ class TestEwald : public ::testing::Test {
       1.9076542856278367e+00,  1.3101841366497322e+00, 1.9794445391572657e-01,
       -9.8010077026955389e-01, 1.9794445391572657e-01, 1.9232614011636004e+00};
 
-  void SetUp() override{};
+  void SetUp() override {};
 };
 
 TEST_F(TestEwald, cpu) {
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index ea60023e26..57bbd5765a 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -313,11 +313,6 @@ void FixDPLR::setup(int vflag) {
   // else {
   //   error->all(FLERR, "respa is not supported by this fix");
   // }
-  if (vflag) {
-    v_setup(vflag);
-  } else {
-    evflag = 0;
-  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -527,6 +522,7 @@ void FixDPLR::pre_force(int vflag) {
     // int res_idx = sort_fwd_map[sel_fwd[idx0]];
     int res_idx = sel_fwd[idx0];
     // int ret_idx = dpl_bwd[res_idx];
+    atom->image[idx1] = atom->image[idx0];
     for (int dd = 0; dd < 3; ++dd) {
       x[idx1][dd] =
           x[idx0][dd] + tensor[res_idx * 3 + dd] * dist_unit_cvt_factor;
diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt
index bfc2253412..4fdae7ac5b 100644
--- a/source/lmp/plugin/CMakeLists.txt
+++ b/source/lmp/plugin/CMakeLists.txt
@@ -126,7 +126,7 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
       install(
         CODE "execute_process( \
         COMMAND ${CMAKE_COMMAND} -E create_symlink \
-		../${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${CMAKE_SHARED_LIBRARY_SUFFIX} \
+		../${CMAKE_SHARED_MODULE_PREFIX}${libname}${CMAKE_SHARED_MODULE_SUFFIX} \
         ${CMAKE_INSTALL_PREFIX}/lib/${libname}/${PLUGINNAME}   \
         )")
     endif()
diff --git a/source/lmp/tests/run_mpi_pair_deepmd.py b/source/lmp/tests/run_mpi_pair_deepmd.py
new file mode 100644
index 0000000000..b27774ce11
--- /dev/null
+++ b/source/lmp/tests/run_mpi_pair_deepmd.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Use mpi4py to run a LAMMPS pair_deepmd + model deviation (atomic, relative) task."""
+
+import argparse
+
+import numpy as np
+from lammps import (
+    PyLammps,
+)
+from mpi4py import (
+    MPI,
+)
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+
+parser = argparse.ArgumentParser()
+parser.add_argument("DATAFILE", type=str)
+parser.add_argument("PBFILE", type=str)
+parser.add_argument("PBFILE2", type=str)
+parser.add_argument("MD_FILE", type=str)
+parser.add_argument("OUTPUT", type=str)
+parser.add_argument("--balance", action="store_true")
+
+args = parser.parse_args()
+data_file = args.DATAFILE
+pb_file = args.PBFILE
+pb_file2 = args.PBFILE2
+md_file = args.MD_FILE
+output = args.OUTPUT
+balance = args.balance
+
+lammps = PyLammps()
+if balance:
+    # 4 and 2 atoms
+    lammps.processors("2 1 1")
+else:
+    # 6 and 0 atoms
+    lammps.processors("1 2 1")
+lammps.units("metal")
+lammps.boundary("p p p")
+lammps.atom_style("atomic")
+lammps.neighbor("2.0 bin")
+lammps.neigh_modify("every 10 delay 0 check no")
+lammps.read_data(data_file)
+lammps.mass("1 16")
+lammps.mass("2 2")
+lammps.timestep(0.0005)
+lammps.fix("1 all nve")
+
+relative = 1.0
+lammps.pair_style(
+    f"deepmd {pb_file} {pb_file2} out_file {md_file} out_freq 1 atomic relative {relative}"
+)
+lammps.pair_coeff("* *")
+lammps.run(0)
+pe = lammps.eval("pe")
+if rank == 0:
+    arr = [pe]
+    np.savetxt(output, np.array(arr))
+MPI.Finalize()
diff --git a/source/lmp/tests/test_deeptensor.py b/source/lmp/tests/test_deeptensor.py
index 3e684b386e..6df0a8617a 100644
--- a/source/lmp/tests/test_deeptensor.py
+++ b/source/lmp/tests/test_deeptensor.py
@@ -57,19 +57,11 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_dplr.py b/source/lmp/tests/test_dplr.py
index 9c8f1c0d4f..2dd3531894 100644
--- a/source/lmp/tests/test_dplr.py
+++ b/source/lmp/tests/test_dplr.py
@@ -264,11 +264,7 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_lammps.py b/source/lmp/tests/test_lammps.py
index 028b403abf..0e7c289f24 100644
--- a/source/lmp/tests/test_lammps.py
+++ b/source/lmp/tests/test_lammps.py
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
 import os
+import shutil
 import subprocess as sp
 import sys
+import tempfile
 from pathlib import (
     Path,
 )
@@ -219,18 +222,10 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
@@ -348,9 +343,7 @@ def test_pair_deepmd_virial(lammps):
 
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -376,9 +369,7 @@ def test_pair_deepmd_model_devi(lammps):
 
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
@@ -417,9 +408,7 @@ def test_pair_deepmd_model_devi_virial(lammps):
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -448,9 +437,7 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -535,9 +522,7 @@ def test_pair_deepmd_virial_real(lammps_real):
 
 def test_pair_deepmd_model_devi_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -567,9 +552,7 @@ def test_pair_deepmd_model_devi_real(lammps_real):
 
 def test_pair_deepmd_model_devi_virial_real(lammps_real):
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.compute("virial all centroid/stress/atom NULL pair")
@@ -614,12 +597,7 @@ def test_pair_deepmd_model_devi_virial_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.force_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative * constants.force_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -652,12 +630,7 @@ def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
 def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
     relative = 1.0
     lammps_real.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(),
-            pb_file2.resolve(),
-            md_file.resolve(),
-            relative * constants.ener_metal2real,
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative * constants.ener_metal2real}"
     )
     lammps_real.pair_coeff("* *")
     lammps_real.run(0)
@@ -701,3 +674,52 @@ def test_pair_deepmd_si(lammps_si):
             expected_f[lammps_si.atoms[ii].id - 1] * constants.force_metal2si
         )
     lammps_si.run(1)
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+@pytest.mark.parametrize(
+    ("balance_args",),
+    [(["--balance"],), ([],)],
+)
+def test_pair_deepmd_mpi(balance_args: list):
+    with tempfile.NamedTemporaryFile() as f:
+        sp.check_call(
+            [
+                "mpirun",
+                "-n",
+                "2",
+                sys.executable,
+                Path(__file__).parent / "run_mpi_pair_deepmd.py",
+                data_file,
+                pb_file,
+                pb_file2,
+                md_file,
+                f.name,
+                *balance_args,
+            ]
+        )
+        arr = np.loadtxt(f.name, ndmin=1)
+    pe = arr[0]
+
+    relative = 1.0
+    assert pe == pytest.approx(expected_e)
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
diff --git a/source/lmp/tests/test_lammps_3types.py b/source/lmp/tests/test_lammps_3types.py
index 46e1a00c8f..e4e64d9ecf 100644
--- a/source/lmp/tests/test_lammps_3types.py
+++ b/source/lmp/tests/test_lammps_3types.py
@@ -245,18 +245,10 @@
 nktv2p = 1.6021765e6
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file2.resolve(),
-        pb_file2.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
 )
 
 
@@ -337,9 +329,7 @@ def test_pair_deepmd_virial(lammps):
 
 def test_pair_deepmd_model_devi(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -365,9 +355,7 @@ def test_pair_deepmd_model_devi(lammps):
 
 def test_pair_deepmd_model_devi_virial(lammps):
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
     )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
@@ -406,9 +394,7 @@ def test_pair_deepmd_model_devi_virial(lammps):
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
@@ -437,9 +423,7 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
     lammps.pair_style(
-        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
-            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
-        )
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
     )
     lammps.pair_coeff("* *")
     lammps.run(0)
diff --git a/source/lmp/tests/test_lammps_faparam.py b/source/lmp/tests/test_lammps_faparam.py
index 064928eeb1..f78639a96b 100644
--- a/source/lmp/tests/test_lammps_faparam.py
+++ b/source/lmp/tests/test_lammps_faparam.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test LAMMPS fparam and aparam input."""
+
 import os
 import subprocess as sp
 import sys
@@ -134,11 +135,7 @@
 
 
 sp.check_output(
-    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-        sys.executable,
-        pbtxt_file.resolve(),
-        pb_file.resolve(),
-    ).split()
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file.resolve()} -o {pb_file.resolve()}".split()
 )
 
 
diff --git a/source/lmp/tests/test_lammps_pt.py b/source/lmp/tests/test_lammps_pt.py
new file mode 100644
index 0000000000..245f3eaf6d
--- /dev/null
+++ b/source/lmp/tests/test_lammps_pt.py
@@ -0,0 +1,721 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
+import os
+import shutil
+import subprocess as sp
+import sys
+import tempfile
+from pathlib import (
+    Path,
+)
+
+import constants
+import numpy as np
+import pytest
+from lammps import (
+    PyLammps,
+)
+from write_lmp_data import (
+    write_lmp_data,
+)
+
+pbtxt_file2 = (
+    Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot-1.pbtxt"
+)
+pb_file = Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot_sea.pth"
+pb_file2 = Path(__file__).parent / "graph2.pb"
+system_file = Path(__file__).parent.parent.parent / "tests"
+data_file = Path(__file__).parent / "data.lmp"
+data_file_si = Path(__file__).parent / "data.si"
+data_type_map_file = Path(__file__).parent / "data_type_map.lmp"
+md_file = Path(__file__).parent / "md.out"
+
+# this is as the same as python and c++ tests, test_deeppot_a.py
+expected_ae = np.array(
+    [
+        -93.016873944029,
+        -185.923296645958,
+        -185.927096544970,
+        -93.019371018039,
+        -185.926179995548,
+        -185.924351901852,
+    ]
+)
+expected_e = np.sum(expected_ae)
+expected_f = np.array(
+    [
+        0.006277522211,
+        -0.001117962774,
+        0.000618580445,
+        0.009928999655,
+        0.003026035654,
+        -0.006941982227,
+        0.000667853212,
+        -0.002449963843,
+        0.006506463508,
+        -0.007284129115,
+        0.000530662205,
+        -0.000028806821,
+        0.000068097781,
+        0.006121331983,
+        -0.009019754602,
+        -0.009658343745,
+        -0.006110103225,
+        0.008865499697,
+    ]
+).reshape(6, 3)
+
+expected_f2 = np.array(
+    [
+        [-0.6454949, 1.72457783, 0.18897958],
+        [1.68936514, -0.36995299, -1.36044464],
+        [-1.09902692, -1.35487928, 1.17416702],
+        [1.68426111, -0.50835585, 0.98340415],
+        [0.05771758, 1.12515818, -1.77561531],
+        [-1.686822, -0.61654789, 0.78950921],
+    ]
+)
+
+expected_v = -np.array(
+    [
+        -0.000155238009,
+        0.000116605516,
+        -0.007869862476,
+        0.000465578340,
+        0.008182547185,
+        -0.002398713212,
+        -0.008112887338,
+        -0.002423738425,
+        0.007210716605,
+        -0.019203504012,
+        0.001724938709,
+        0.009909211091,
+        0.001153857542,
+        -0.001600015103,
+        -0.000560024090,
+        0.010727836276,
+        -0.001034836404,
+        -0.007973454377,
+        -0.021517399106,
+        -0.004064359664,
+        0.004866398692,
+        -0.003360038617,
+        -0.007241406162,
+        0.005920941051,
+        0.004899151657,
+        0.006290788591,
+        -0.006478820311,
+        0.001921504710,
+        0.001313470921,
+        -0.000304091236,
+        0.001684345981,
+        0.004124109256,
+        -0.006396084465,
+        -0.000701095618,
+        -0.006356507032,
+        0.009818550859,
+        -0.015230664587,
+        -0.000110244376,
+        0.000690319396,
+        0.000045953023,
+        -0.005726548770,
+        0.008769818495,
+        -0.000572380210,
+        0.008860603423,
+        -0.013819348050,
+        -0.021227082558,
+        -0.004977781343,
+        0.006646239696,
+        -0.005987066507,
+        -0.002767831232,
+        0.003746502525,
+        0.007697590397,
+        0.003746130152,
+        -0.005172634748,
+    ]
+).reshape(6, 9)
+expected_v2 = -np.array(
+    [
+        [
+            -0.70008436,
+            -0.06399891,
+            0.63678391,
+            -0.07642171,
+            -0.70580035,
+            0.20506145,
+            0.64098364,
+            0.20305781,
+            -0.57906794,
+        ],
+        [
+            -0.6372635,
+            0.14315552,
+            0.51952246,
+            0.04604049,
+            -0.06003681,
+            -0.02688702,
+            0.54489318,
+            -0.10951559,
+            -0.43730539,
+        ],
+        [
+            -0.25090748,
+            -0.37466262,
+            0.34085833,
+            -0.26690852,
+            -0.37676917,
+            0.29080825,
+            0.31600481,
+            0.37558276,
+            -0.33251064,
+        ],
+        [
+            -0.80195614,
+            -0.10273138,
+            0.06935364,
+            -0.10429256,
+            -0.29693811,
+            0.45643496,
+            0.07247872,
+            0.45604679,
+            -0.71048816,
+        ],
+        [
+            -0.03840668,
+            -0.07680205,
+            0.10940472,
+            -0.02374189,
+            -0.27610266,
+            0.4336071,
+            0.02465248,
+            0.4290638,
+            -0.67496763,
+        ],
+        [
+            -0.61475065,
+            -0.21163135,
+            0.26652929,
+            -0.26134659,
+            -0.11560267,
+            0.15415902,
+            0.34343952,
+            0.1589482,
+            -0.21370642,
+        ],
+    ]
+).reshape(6, 9)
+
+box = np.array([0, 13, 0, 13, 0, 13, 0, 0, 0])
+coord = np.array(
+    [
+        [12.83, 2.56, 2.18],
+        [12.09, 2.87, 2.74],
+        [0.25, 3.32, 1.68],
+        [3.36, 3.00, 1.81],
+        [3.51, 2.51, 2.60],
+        [4.27, 3.22, 1.56],
+    ]
+)
+type_OH = np.array([1, 2, 2, 1, 2, 2])
+type_HO = np.array([2, 1, 1, 2, 1, 1])
+
+
+sp.check_output(
+    f"{sys.executable} -m deepmd convert-from pbtxt -i {pbtxt_file2.resolve()} -o {pb_file2.resolve()}".split()
+)
+
+
+def setup_module():
+    write_lmp_data(box, coord, type_OH, data_file)
+    write_lmp_data(box, coord, type_HO, data_type_map_file)
+    write_lmp_data(
+        box * constants.dist_metal2si,
+        coord * constants.dist_metal2si,
+        type_OH,
+        data_file_si,
+    )
+
+
+def teardown_module():
+    os.remove(data_file)
+    os.remove(data_type_map_file)
+
+
+def _lammps(data_file, units="metal") -> PyLammps:
+    lammps = PyLammps()
+    lammps.units(units)
+    lammps.boundary("p p p")
+    lammps.atom_style("atomic")
+    if units == "metal" or units == "real":
+        lammps.neighbor("2.0 bin")
+    elif units == "si":
+        lammps.neighbor("2.0e-10 bin")
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.neigh_modify("every 10 delay 0 check no")
+    lammps.read_data(data_file.resolve())
+    if units == "metal" or units == "real":
+        lammps.mass("1 16")
+        lammps.mass("2 2")
+    elif units == "si":
+        lammps.mass("1 %.10e" % (16 * constants.mass_metal2si))
+        lammps.mass("2 %.10e" % (2 * constants.mass_metal2si))
+    else:
+        raise ValueError("units should be metal, real, or si")
+    if units == "metal":
+        lammps.timestep(0.0005)
+    elif units == "real":
+        lammps.timestep(0.5)
+    elif units == "si":
+        lammps.timestep(5e-16)
+    else:
+        raise ValueError("units should be metal, real, or si")
+    lammps.fix("1 all nve")
+    return lammps
+
+
+@pytest.fixture
+def lammps():
+    lmp = _lammps(data_file=data_file)
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_type_map():
+    lmp = _lammps(data_file=data_type_map_file)
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_real():
+    lmp = _lammps(data_file=data_file, units="real")
+    yield lmp
+    lmp.close()
+
+
+@pytest.fixture
+def lammps_si():
+    lmp = _lammps(data_file=data_file_si, units="si")
+    yield lmp
+    lmp.close()
+
+
+def test_pair_deepmd(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    lammps.run(1)
+
+
+def test_pair_deepmd_virial(lammps):
+    lammps.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps.pair_coeff("* *")
+    lammps.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    idx_map = lammps.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
+
+
+def test_pair_deepmd_model_devi(lammps):
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_virial(lammps):
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps.pair_coeff("* *")
+    lammps.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    idx_map = lammps.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / constants.nktv2p == pytest.approx(expected_v[idx_map, ii])
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_atomic_relative(lammps):
+    relative = 1.0
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative}"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
+    relative = 1.0
+    lammps.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative}"
+    )
+    lammps.pair_coeff("* *")
+    lammps.run(0)
+    assert lammps.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps.atoms[ii].force == pytest.approx(
+            expected_f[lammps.atoms[ii].id - 1]
+        )
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    norm = (
+        np.abs(
+            np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)
+        )
+        / 6
+    )
+    expected_md_v /= norm + relative
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
+
+
+def test_pair_deepmd_type_map(lammps_type_map):
+    lammps_type_map.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_type_map.pair_coeff("* * H O")
+    lammps_type_map.run(0)
+    assert lammps_type_map.eval("pe") == pytest.approx(expected_e)
+    for ii in range(6):
+        assert lammps_type_map.atoms[ii].force == pytest.approx(
+            expected_f[lammps_type_map.atoms[ii].id - 1]
+        )
+    lammps_type_map.run(1)
+
+
+def test_pair_deepmd_real(lammps_real):
+    lammps_real.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    lammps_real.run(1)
+
+
+def test_pair_deepmd_virial_real(lammps_real):
+    lammps_real.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_real.pair_coeff("* *")
+    lammps_real.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps_real.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps_real.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    idx_map = lammps_real.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps_real.variables[f"virial{ii}"].value
+        ) / constants.nktv2p_real == pytest.approx(
+            expected_v[idx_map, ii] * constants.ener_metal2real
+        )
+
+
+def test_pair_deepmd_model_devi_real(lammps_real):
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_virial_real(lammps_real):
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.compute("virial all centroid/stress/atom NULL pair")
+    for ii in range(9):
+        jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
+        lammps_real.variable(f"virial{jj} atom c_virial[{ii+1}]")
+    lammps_real.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    idx_map = lammps_real.lmp.numpy.extract_atom("id") - 1
+    for ii in range(9):
+        assert np.array(
+            lammps_real.variables[f"virial{ii}"].value
+        ) / constants.nktv2p_real == pytest.approx(
+            expected_v[idx_map, ii] * constants.ener_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_atomic_relative_real(lammps_real):
+    relative = 1.0
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative {relative * constants.force_metal2real}"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_model_devi_atomic_relative_v_real(lammps_real):
+    relative = 1.0
+    lammps_real.pair_style(
+        f"deepmd {pb_file.resolve()} {pb_file2.resolve()} out_file {md_file.resolve()} out_freq 1 atomic relative_v {relative * constants.ener_metal2real}"
+    )
+    lammps_real.pair_coeff("* *")
+    lammps_real.run(0)
+    assert lammps_real.eval("pe") == pytest.approx(
+        expected_e * constants.ener_metal2real
+    )
+    for ii in range(6):
+        assert lammps_real.atoms[ii].force == pytest.approx(
+            expected_f[lammps_real.atoms[ii].id - 1] * constants.force_metal2real
+        )
+    md = np.loadtxt(md_file.resolve())
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    assert md[7:] == pytest.approx(expected_md_f * constants.force_metal2real)
+    assert md[4] == pytest.approx(np.max(expected_md_f) * constants.force_metal2real)
+    assert md[5] == pytest.approx(np.min(expected_md_f) * constants.force_metal2real)
+    assert md[6] == pytest.approx(np.mean(expected_md_f) * constants.force_metal2real)
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    norm = (
+        np.abs(
+            np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)
+        )
+        / 6
+    )
+    expected_md_v /= norm + relative
+    assert md[1] == pytest.approx(np.max(expected_md_v) * constants.ener_metal2real)
+    assert md[2] == pytest.approx(np.min(expected_md_v) * constants.ener_metal2real)
+    assert md[3] == pytest.approx(
+        np.sqrt(np.mean(np.square(expected_md_v))) * constants.ener_metal2real
+    )
+
+
+def test_pair_deepmd_si(lammps_si):
+    lammps_si.pair_style(f"deepmd {pb_file.resolve()}")
+    lammps_si.pair_coeff("* *")
+    lammps_si.run(0)
+    assert lammps_si.eval("pe") == pytest.approx(expected_e * constants.ener_metal2si)
+    for ii in range(6):
+        assert lammps_si.atoms[ii].force == pytest.approx(
+            expected_f[lammps_si.atoms[ii].id - 1] * constants.force_metal2si
+        )
+    lammps_si.run(1)
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+@pytest.mark.parametrize(
+    ("balance_args",),
+    [(["--balance"],), ([],)],
+)
+def test_pair_deepmd_mpi(balance_args: list):
+    with tempfile.NamedTemporaryFile() as f:
+        sp.check_call(
+            [
+                "mpirun",
+                "-n",
+                "2",
+                sys.executable,
+                Path(__file__).parent / "run_mpi_pair_deepmd.py",
+                data_file,
+                pb_file,
+                pb_file2,
+                md_file,
+                f.name,
+                *balance_args,
+            ]
+        )
+        arr = np.loadtxt(f.name, ndmin=1)
+    pe = arr[0]
+
+    relative = 1.0
+    assert pe == pytest.approx(expected_e)
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
diff --git a/source/md/CMakeLists.txt b/source/md/CMakeLists.txt
deleted file mode 100644
index dfd1c547be..0000000000
--- a/source/md/CMakeLists.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-# md
-set(MAKE_FF_AD FALSE)
-
-list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
-find_package(xdrfile REQUIRED)
-
-list(APPEND MD_INCLUDE_PATH "include")
-list(APPEND MD_INCLUDE_PATH ${XDRFILE_INCLUDE_DIRS})
-
-file(GLOB MD_SRC src/*.cc src/*.cpp)
-
-set(MDNN_SOURCE_FILES mdnn.cc)
-if(MAKE_FF_AD)
-  set(MDAD_SOURCE_FILES mdad.cc)
-  set(MDFF_SOURCE_FILES mdff.cc)
-endif()
-
-function(_add_md_variant variant_name prec_def)
-  set(libname "${LIB_DEEPMD_NATIVE}${variant_name}")
-  set(dp_mdnn_name "dp_mdnn${variant_name}")
-  set(dp_mdff_name "dp_mdff${variant_name}")
-  set(dp_mdad_name "dp_mdad${variant_name}")
-
-  add_library(${libname} SHARED ${MD_SRC})
-  target_link_libraries(${libname} PRIVATE ${LIB_DEEPMD})
-  target_include_directories(${libname} PUBLIC ${MD_INCLUDE_PATH})
-  set_target_properties(${libname} PROPERTIES COMPILE_DEFINITIONS ${prec_def}
-                                              INSTALL_RPATH "$ORIGIN")
-
-  add_executable(${dp_mdnn_name} ${MDNN_SOURCE_FILES})
-  if(MAKE_FF_AD)
-    add_executable(${dp_mdff_name} ${MDFF_SOURCE_FILES})
-    add_executable(${dp_mdad_name} ${MDAD_SOURCE_FILES})
-  endif()
-
-  # link: libdeepmd_native libdeepmd_cc libxdr
-  target_link_libraries(
-    ${dp_mdnn_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
-                            ${XDRFILE_LIBRARIES})
-  target_include_directories(${dp_mdnn_name}
-                             PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-  if(MAKE_FF_AD)
-    target_link_libraries(
-      ${dp_mdad_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
-                              ${XDRFILE_LIBRARIES})
-    target_include_directories(${dp_mdad_name}
-                               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-    target_link_libraries(
-      ${dp_mdff_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
-                              ${XDRFILE_LIBRARIES})
-    target_include_directories(${dp_mdff_name}
-                               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-  endif()
-
-  set_target_properties(
-    ${dp_mdnn_name}
-    PROPERTIES COMPILE_DEFINITIONS ${prec_def}
-               LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-               INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
-  if(MAKE_FF_AD)
-    set_target_properties(
-      ${dp_mdad_name}
-      PROPERTIES COMPILE_DEFINITIONS ${prec_def}
-                 LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-                 INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
-    set_target_properties(
-      ${dp_mdff_name}
-      PROPERTIES COMPILE_DEFINITIONS ${prec_def}
-                 LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-                 INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
-  endif()
-
-  install(TARGETS ${LIB_DEEPMD_NATIVE} DESTINATION lib/)
-  install(TARGETS ${dp_mdnn_name} DESTINATION bin/)
-  if(MAKE_FF_AD)
-    install(TARGETS ${dp_mdad_name} DESTINATION bin/)
-    install(TARGETS ${dp_mdff_name} DESTINATION bin/)
-  endif()
-endfunction()
-_add_md_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
-# TODO: there is hard-code `DOUBLE` in the code
-# _add_md_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
diff --git a/source/md/include/AdWeight.h b/source/md/include/AdWeight.h
deleted file mode 100644
index 921185bf9f..0000000000
--- a/source/md/include/AdWeight.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class AdWeight {
- public:
-  AdWeight(const VALUETYPE& pl);
-  virtual void zone_tag(std::vector<int>& tag,
-                        const std::vector<VALUETYPE>& coord) const = 0;
-  virtual void atom_weight(std::vector<VALUETYPE>& weight,
-                           std::vector<VALUETYPE>& weight_x,
-                           const std::vector<VALUETYPE>& coord) const = 0;
-  virtual std::vector<VALUETYPE> get_center() const = 0;
-  void sel_nn_atom(std::vector<VALUETYPE>& nn_coord,
-                   std::vector<int>& nn_type,
-                   std::vector<int>& nn_idx,
-                   std::vector<int>& nn_tag,
-                   const std::vector<VALUETYPE>& dcoord,
-                   const std::vector<int>& dtype) const;
-  void force_intpl(std::vector<VALUETYPE>& of,
-                   const std::vector<VALUETYPE>& dcoord,
-                   const std::vector<VALUETYPE>& ff_force,
-                   const std::vector<VALUETYPE>& nn_force,
-                   const std::vector<int>& nn_idx) const;
-  void force_intpl(std::vector<VALUETYPE>& of,
-                   const std::vector<VALUETYPE>& dcoord,
-                   const std::vector<VALUETYPE>& ff_bd_force,
-                   const std::vector<VALUETYPE>& ff_nb_force,
-                   const std::vector<VALUETYPE>& nn_force,
-                   const std::vector<int>& nn_idx) const;
-
- private:
-  VALUETYPE protect_level;
-};
-
-// slab model, axis x
-class SlabWeight : public AdWeight {
- public:
-  SlabWeight(const std::vector<VALUETYPE>& box,
-             const VALUETYPE& rnn,
-             const VALUETYPE& rhy,
-             const VALUETYPE& rc,
-             const VALUETYPE& protect_level = 1e-3);
-  virtual void zone_tag(std::vector<int>& tag,
-                        const std::vector<VALUETYPE>& coord) const;
-  virtual void atom_weight(std::vector<VALUETYPE>& weight,
-                           std::vector<VALUETYPE>& weight_x,
-                           const std::vector<VALUETYPE>& coord) const;
-  virtual std::vector<VALUETYPE> get_center() const { return center; }
-
- private:
-  std::vector<VALUETYPE> center;
-  VALUETYPE rnn;
-  VALUETYPE rhy;
-  VALUETYPE rc;
-};
diff --git a/source/md/include/Convert.h b/source/md/include/Convert.h
deleted file mode 100644
index a0a11884e3..0000000000
--- a/source/md/include/Convert.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <map>
-#include <string>
-#include <vector>
-
-template <typename VALUETYPE>
-class Convert {
- public:
-  Convert(const std::vector<std::string>& atomname,
-          std::map<std::string, int>& name_type_map,
-          std::map<std::string, VALUETYPE>& name_mass_map,
-          std::map<std::string, VALUETYPE>& name_charge_map,
-          const bool sort = true);
-  void gro2nnp(std::vector<VALUETYPE>& coord,
-               std::vector<VALUETYPE>& veloc,
-               std::vector<VALUETYPE>& box,
-               const std::vector<std::vector<double> >& posi,
-               const std::vector<std::vector<double> >& velo,
-               const std::vector<double>& box_size) const;
-  void nnp2gro(std::vector<std::vector<double> >& posi,
-               std::vector<std::vector<double> >& velo,
-               std::vector<double>& box_size,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<VALUETYPE>& veloc,
-               const std::vector<VALUETYPE>& box) const;
-  void idx_gro2nnp(std::vector<int>& out, const std::vector<int>& in) const;
-  void idx_nnp2gro(std::vector<int>& out, const std::vector<int>& in) const;
-  const std::vector<int>& get_type() const { return atype; }
-  const std::vector<VALUETYPE>& get_mass() const { return amass; }
-  const std::vector<VALUETYPE>& get_charge() const { return acharge; }
-
- private:
-  std::vector<int> idx_map_nnp2gro;
-  std::vector<int> idx_map_gro2nnp;
-  std::vector<int> atype;
-  std::vector<VALUETYPE> amass;
-  std::vector<VALUETYPE> acharge;
-};
diff --git a/source/md/include/CosSwitch.h b/source/md/include/CosSwitch.h
deleted file mode 100644
index 150cf1a8ba..0000000000
--- a/source/md/include/CosSwitch.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-#include <cmath>
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class CosSwitch {
- public:
-  CosSwitch(const VALUETYPE& rmin_ = 0, const VALUETYPE& rmax_ = 0) {
-    reinit(rmin_, rmax_);
-  }
-  void reinit(const VALUETYPE& rmin_, const VALUETYPE& rmax_);
-
- public:
-  void eval(VALUETYPE& vv, const VALUETYPE xx) const;
-
- private:
-  VALUETYPE rmin, rmax;
-};
-
-void CosSwitch::reinit(const VALUETYPE& rmin_, const VALUETYPE& rmax_) {
-  rmin = rmin_;
-  rmax = rmax_;
-}
-
-void CosSwitch::eval(VALUETYPE& vv, const VALUETYPE xx) const {
-  VALUETYPE dd;
-  if (xx >= 0) {
-    if (xx < rmin) {
-      dd = 0;
-      vv = 1;
-    } else if (xx < rmax) {
-      VALUETYPE value = (xx - rmin) / (rmax - rmin) * M_PI;
-      dd = -0.5 * sin(value) * M_PI / (rmax - rmin);
-      vv = 0.5 * (cos(value) + 1);
-    } else {
-      dd = 0;
-      vv = 0;
-    }
-  } else {
-    if (xx > -rmin) {
-      dd = 0;
-      vv = 1;
-    } else if (xx > -rmax) {
-      VALUETYPE value = (-xx - rmin) / (rmax - rmin) * M_PI;
-      dd = 0.5 * sin(value) * M_PI / (rmax - rmin);
-      vv = 0.5 * (cos(value) + 1);
-    } else {
-      dd = 0;
-      vv = 0;
-    }
-  }
-}
diff --git a/source/md/include/Gaussian.h b/source/md/include/Gaussian.h
deleted file mode 100644
index c10cc11c2a..0000000000
--- a/source/md/include/Gaussian.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <cmath>
-#include <limits>
-
-#include "RandomGenerator.h"
-
-class Gaussian {
- public:
-  void set_seed(unsigned long seed);
-  void gen(double* vec, const int numb_gen);
-};
diff --git a/source/md/include/GroFileManager.h b/source/md/include/GroFileManager.h
deleted file mode 100644
index f80c3b57fe..0000000000
--- a/source/md/include/GroFileManager.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __GroFileManager_wanghan__
-#define __GroFileManager_wanghan__
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace GroFileManager {
-void read(const std::string& name,
-          std::vector<int>& resdindex,
-          std::vector<std::string>& resdname,
-          std::vector<std::string>& atomname,
-          std::vector<int>& atomindex,
-          std::vector<std::vector<double> >& posi,
-          std::vector<std::vector<double> >& velo,
-          std::vector<double>& boxsize);
-void write(const std::string& name,
-           const std::vector<int>& resdindex,
-           const std::vector<std::string>& resdname,
-           const std::vector<std::string>& atomname,
-           const std::vector<int>& atomindex,
-           const std::vector<std::vector<double> >& posi,
-           const std::vector<std::vector<double> >& velo,
-           const std::vector<double>& boxsize);
-
-bool readTop(const std::string& filename,
-             std::vector<std::string>& molnames,
-             std::vector<int>& nmols);
-
-template <typename UnitaryFunction1,
-          typename UnitaryFunction2,
-          typename UnitaryFunction3,
-          typename UnitaryFunction4,
-          typename UnitaryFunction5,
-          typename UnitaryFunction6>
-bool writePotenFile(const double& rmin,
-                    const double& rcut,
-                    const double& interval,
-                    UnitaryFunction1& f,
-                    UnitaryFunction2& fp,
-                    UnitaryFunction3& g,
-                    UnitaryFunction4& gp,
-                    UnitaryFunction5& h,
-                    UnitaryFunction6& hp,
-                    const std::string& filename);
-
-};  // namespace GroFileManager
-
-#endif
diff --git a/source/md/include/HarmonicAngle.h b/source/md/include/HarmonicAngle.h
deleted file mode 100644
index b20430f599..0000000000
--- a/source/md/include/HarmonicAngle.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class HarmonicAngle {
- public:
-  HarmonicAngle(const VALUETYPE& kk, const VALUETYPE& tt);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<int>& alist);
-
- private:
-  VALUETYPE ka, tt;
-  void hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
-};
diff --git a/source/md/include/HarmonicBond.h b/source/md/include/HarmonicBond.h
deleted file mode 100644
index dd13491cc6..0000000000
--- a/source/md/include/HarmonicBond.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class HarmonicBond {
- public:
-  HarmonicBond(const VALUETYPE& kk, const VALUETYPE& bb);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<int>& blist);
-
- private:
-  VALUETYPE kk, bb;
-  void hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
-};
diff --git a/source/md/include/Integrator.h b/source/md/include/Integrator.h
deleted file mode 100644
index 73e84e7097..0000000000
--- a/source/md/include/Integrator.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "Gaussian.h"
-#include "UnitManager.h"
-
-template <typename VALUETYPE>
-class Integrator {
- public:
-  Integrator() : massConst(UnitManager::IntegratorMassConstant){};
-
- public:
-  void stepVeloc(std::vector<VALUETYPE>& vv,
-                 const std::vector<VALUETYPE>& ff,
-                 const std::vector<VALUETYPE>& mass,
-                 const double& dt,
-                 const std::vector<int>& freez = std::vector<int>()) const;
-  void stepCoord(std::vector<VALUETYPE>& rr,
-                 const std::vector<VALUETYPE>& vv,
-                 const double& dt) const;
-
- private:
-  VALUETYPE massConst;
-};
-
-template <typename VALUETYPE>
-class ThermostatLangevin {
- public:
-  ThermostatLangevin(const VALUETYPE T = 300.,
-                     const VALUETYPE tau = 1.,
-                     const long long int seed = 0);
-  void reinit(const VALUETYPE T = 300.,
-              const VALUETYPE tau = 1.,
-              const long long int seed = 0);
-  void stepOU(std::vector<VALUETYPE>& vv,
-              const std::vector<VALUETYPE>& mass,
-              const double& dt,
-              const std::vector<int>& freez = std::vector<int>()) const;
-
- private:
-  mutable Gaussian gaussian;
-  std::string scheme;
-  VALUETYPE temperature;
-  VALUETYPE gamma;
-  VALUETYPE sigma;
-  VALUETYPE kT;
-  VALUETYPE sigmainvsqrt2gamma;
-};
diff --git a/source/md/include/Interpolation.h b/source/md/include/Interpolation.h
deleted file mode 100644
index ad64d6114e..0000000000
--- a/source/md/include/Interpolation.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __wanghan__Interpolation_h__
-#define __wanghan__Interpolation_h__
-
-#include <algorithm>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include "Poly.h"
-
-namespace Interpolation {
-// linear interpolations
-void pieceLinearInterpol(const double& a,
-                         const double& b,
-                         const double& va,
-                         const double& vb,
-                         Poly& p);
-void piecewiseLinear(const std::vector<double>& x,
-                     const std::vector<double>& y,
-                     PiecewisePoly& ps);
-// spline interpolations
-void pieceHermiteInterpol(const double& a,
-                          const double& b,
-                          const double& va,
-                          const double& vb,
-                          const double& da,
-                          const double& db,
-                          Poly& p);
-void pieceSecondDerivativeInterpol(const double& a,
-                                   const double& b,
-                                   const double& va,
-                                   const double& vb,
-                                   const double& dda,
-                                   const double& ddb,
-                                   Poly& p);
-void piece6OrderInterpol(const double& a,
-                         const double& b,
-                         const double& va,
-                         const double& vb,
-                         const double& da,
-                         const double& db,
-                         const double& dda,
-                         const double& ddb,
-                         Poly& p);
-
-bool spline(const std::vector<double>& x,
-            const std::vector<double>& y,
-            PiecewisePoly& ps);
-bool spline(const std::vector<double>::const_iterator xbegin,
-            const std::vector<double>::const_iterator xend,
-            const std::vector<double>::const_iterator ybegin,
-            PiecewisePoly& ps);
-bool splinePeriodic(const std::vector<double>& x,
-                    const std::vector<double>& y,
-                    PiecewisePoly& ps);
-bool solverForSplinePeriodic(const std::vector<double>::const_iterator& lbegin,
-                             const std::vector<double>::const_iterator& lend,
-                             const std::vector<double>::iterator& ubegin,
-                             const std::vector<double>::iterator& uend);
-void secondDerivativeInterpol(
-    const std::vector<double>::const_iterator& xbegin,
-    const std::vector<double>::const_iterator& xend,
-    const std::vector<double>::const_iterator& vbegin,
-    const std::vector<double>::const_iterator& ddbegin,
-    PiecewisePoly& ps);
-
-}  // namespace Interpolation
-
-#endif
diff --git a/source/md/include/LJInter.h b/source/md/include/LJInter.h
deleted file mode 100644
index fe507a0e90..0000000000
--- a/source/md/include/LJInter.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class LJInter {
- public:
-  LJInter(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<std::vector<int> >& nlist);
-
- private:
-  VALUETYPE c6, c12, rc, rc2, one_over_6, one_over_12, one_over_rc6,
-      one_over_rc12;
-  void lj_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
-};
diff --git a/source/md/include/LJTab.h b/source/md/include/LJTab.h
deleted file mode 100644
index 73f46697ba..0000000000
--- a/source/md/include/LJTab.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "Tabulated.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class LJTab {
- public:
-  LJTab(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<std::vector<int> >& nlist) {
-    lj_tab.compute(ener, force, virial, coord, atype, region, nlist);
-  };
-
- private:
-  Tabulated lj_tab;
-};
diff --git a/source/md/include/MaxShift.h b/source/md/include/MaxShift.h
deleted file mode 100644
index 21634df671..0000000000
--- a/source/md/include/MaxShift.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class MaxShift {
- public:
-  MaxShift(const std::vector<VALUETYPE>& dcoord, const VALUETYPE& shell);
-
-  bool rebuild(const std::vector<VALUETYPE>& coord,
-               const SimulationRegion<VALUETYPE>& region);
-
- private:
-  VALUETYPE
-  max_shift2(const std::vector<VALUETYPE>& coord,
-             const SimulationRegion<VALUETYPE>& region);
-  std::vector<VALUETYPE> record;
-  VALUETYPE shell;
-  VALUETYPE max_allow2;
-};
diff --git a/source/md/include/Poly.h b/source/md/include/Poly.h
deleted file mode 100644
index 1b3c3d2e15..0000000000
--- a/source/md/include/Poly.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __wanghan_Poly_h__
-#define __wanghan_Poly_h__
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-class Poly {
-  std::vector<double> a;
-  unsigned order;
-
- public:
-  Poly();
-  Poly(const std::vector<double>& out);
-  void reinit(const std::vector<double>& out);
-  void zero() {
-    a.clear();
-    a.resize(1, 0);
-    order = 0;
-  }
-  void one() {
-    a.clear();
-    a.resize(1, 1);
-    order = 0;
-  }
-
- public:
-  Poly& operator=(const Poly& poly);
-  Poly& operator+=(const Poly& poly);
-  Poly& operator+=(const double& b);
-  Poly& operator*=(const Poly& poly);
-  Poly& operator*=(const double& scale);
-  Poly& derivative();
-
- public:
-  unsigned& getOrder() { return order; }
-  const unsigned& getOrder() const { return order; }
-  std::vector<double>& getCoeffs() { return a; }
-  const std::vector<double>& getCoeffs() const { return a; }
-
- public:
-  void print();
-  void print(const std::string& x);
-  void printCode(const std::string& x);
-
- public:
-  double value(const double& x) const;
-
- public:
-  // p = f(ax + b)
-  Poly& valueLinearPoly(const double& a, const double& b, Poly& p);
-};
-
-class PiecewisePoly {
- public:
-  std::vector<double>& get_x() { return x; }
-  std::vector<Poly>& get_p() { return p; }
-  const std::vector<double>& get_x() const { return x; }
-  const std::vector<Poly>& get_p() const { return p; }
-
- public:
-  void clear() {
-    x.clear();
-    p.clear();
-  }
-  bool valid() const;
-
- public:
-  double value(const double& r) const;
-  void value(const std::vector<double>& r, std::vector<double>& y) const;
-  double value_periodic(const double& r) const;
-  void value_periodic(const std::vector<double>& r,
-                      std::vector<double>& y) const;
-
- private:
-  std::vector<double> x;
-  std::vector<Poly> p;
-  void value(const unsigned& xbegin,
-             const unsigned& xend,
-             const std::vector<double>& r,
-             const unsigned& rbegin,
-             const unsigned& rend,
-             std::vector<double>& y) const;
-  double value(const double& xx, unsigned& begin, unsigned& end) const;
-};
-
-#endif
diff --git a/source/md/include/RandomGenerator.h b/source/md/include/RandomGenerator.h
deleted file mode 100644
index 0f14648e8d..0000000000
--- a/source/md/include/RandomGenerator.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-namespace RandomGenerator_MT19937 {
-void init_by_array(unsigned long init_key[], int key_length);
-void init_genrand(unsigned long s);
-unsigned long genrand_int32(void);
-long genrand_int31(void);
-double genrand_real1(void);  // in [0,1]
-double genrand_real2(void);  // in [0,1)
-double genrand_real3(void);  // in (0,1)
-double genrand_res53(void);
-}  // namespace RandomGenerator_MT19937
diff --git a/source/md/include/Statistics.h b/source/md/include/Statistics.h
deleted file mode 100644
index 19e8896cbd..0000000000
--- a/source/md/include/Statistics.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-template <typename VALUETYPE>
-class Statistics {
- public:
-  Statistics(const VALUETYPE e_corr = 0, const VALUETYPE p_corr = 0);
-  void record(const VALUETYPE& ener,
-              const std::vector<VALUETYPE>& virial,
-              const std::vector<VALUETYPE>& veloc,
-              const std::vector<VALUETYPE>& mass,
-              const SimulationRegion<VALUETYPE>& region);
-
- public:
-  double get_T() const;
-  double get_V() const;
-  double get_P() const;
-  double get_E() const { return get_ekin() + get_epot(); };
-  double get_ekin() const { return r_kin_ener; }
-  double get_epot() const { return r_pot_ener + e_corr; }
-
- public:
-  void print(std::ostream& os, const int& step, const double time) const;
-  void print_head(std::ostream& os) const;
-
- private:
-  int natoms;
-  double r_ener;
-  double r_pot_ener;
-  double r_kin_ener;
-  // std::vector<double> r_box;
-  SimulationRegion<double> region;
-  std::vector<double> r_vir;
-  double e_corr;
-  double p_corr;
-};
diff --git a/source/md/include/StringSplit.h b/source/md/include/StringSplit.h
deleted file mode 100644
index 9243e8ec6d..0000000000
--- a/source/md/include/StringSplit.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __StringSplit_h_wanghan__
-#define __StringSplit_h_wanghan__
-
-#include <algorithm>
-#include <iterator>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace StringOperation {
-void split(const std::string& in, std::vector<std::string>& out);
-void split(const std::string& in,
-           const std::string& delimiter,
-           std::vector<std::string>& out);
-}  // namespace StringOperation
-
-#endif
diff --git a/source/md/include/TF.h b/source/md/include/TF.h
deleted file mode 100644
index be12a4f6ac..0000000000
--- a/source/md/include/TF.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "AdWeight.h"
-#include "common.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class TF {
- public:
-  TF(const std::string& filename);
-
- public:
-  void apply(std::vector<VALUETYPE>& force,
-             const std::vector<VALUETYPE>& coord,
-             const AdWeight& weight) const;
-
- private:
-  VALUETYPE meas(const VALUETYPE& xx) const;
-  std::vector<double> data;
-  double hh;
-  double xup;
-};
diff --git a/source/md/include/TableFileLoader.h b/source/md/include/TableFileLoader.h
deleted file mode 100644
index 31461f5014..0000000000
--- a/source/md/include/TableFileLoader.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __TableFileLoader_h_wanghan__
-#define __TableFileLoader_h_wanghan__
-
-#include <fstream>
-#include <vector>
-
-class TableFileLoader {
- public:
-  unsigned getNumbColumns();
-
- public:
-  TableFileLoader(const char* file);
-  void reinit(const char* file);
-  void setColumns(const std::vector<unsigned>& cols);
-  void setEvery(const unsigned every);
-
- public:
-  void loadAll(std::vector<std::vector<double> >& data);
-  bool loadLine(std::vector<double>& data);
-
- private:
-  std::ifstream data;
-  std::string file;
-  unsigned count_read;
-  unsigned every;
-  std::vector<unsigned> inter_cols;
-};
-
-#endif
diff --git a/source/md/include/Tabulated.h b/source/md/include/Tabulated.h
deleted file mode 100644
index 5ab6e02bc3..0000000000
--- a/source/md/include/Tabulated.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class Tabulated {
- public:
-  Tabulated(){};
-  Tabulated(const VALUETYPE rc,
-            const VALUETYPE hh,
-            const std::vector<VALUETYPE>& tab);
-  void reinit(const VALUETYPE rc,
-              const VALUETYPE hh,
-              const std::vector<VALUETYPE>& tab);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<std::vector<int> >& nlist);
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<VALUETYPE>& charge,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<std::vector<int> >& nlist);
-  void tb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
-
- private:
-  VALUETYPE rc2, hi;
-  std::vector<VALUETYPE> data;
-  void compute_posi(int& idx, VALUETYPE& eps, const VALUETYPE& rr);
-};
diff --git a/source/md/include/Trajectory.h b/source/md/include/Trajectory.h
deleted file mode 100644
index 862b393ea4..0000000000
--- a/source/md/include/Trajectory.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __MDFileManager_Trajectory_h_wanghan__
-#define __MDFileManager_Trajectory_h_wanghan__
-
-// #include "Defines.h"
-#include <vector>
-
-#include "xdrfile/xdrfile.h"
-#include "xdrfile/xdrfile_trr.h"
-#include "xdrfile/xdrfile_xtc.h"
-
-class XtcSaver {
- public:
-  XtcSaver() : inited(false), prec(1000){};
-  ~XtcSaver();
-  XtcSaver(const char *filename, const int &natoms);
-  bool reinit(const char *filename, const int &natoms);
-
- public:
-  void save(const int &step,
-            const double &time,
-            const std::vector<std::vector<double> > &frame,
-            const std::vector<double> &box);
-
- private:
-  XDRFILE *xd;
-  int natoms;
-  rvec *xx;
-  float prec;
-  bool inited;
-  void clear();
-};
-
-class TrrSaver {
- public:
-  TrrSaver() : inited(false), lambda(0){};
-  ~TrrSaver();
-  TrrSaver(const char *filename, const int &natoms);
-  bool reinit(const char *filename, const int &natoms);
-
- public:
-  void save(const int &step,
-            const double &time,
-            const std::vector<std::vector<double> > &ixx,
-            const std::vector<std::vector<double> > &ivv,
-            const std::vector<std::vector<double> > &iff,
-            const std::vector<double> &box);
-
- private:
-  XDRFILE *xd;
-  int natoms;
-  rvec *xx, *vv, *ff;
-  float lambda;
-  bool inited;
-  void clear();
-};
-
-#endif
diff --git a/source/md/include/UnitManager.h b/source/md/include/UnitManager.h
deleted file mode 100644
index 70393c406e..0000000000
--- a/source/md/include/UnitManager.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <string>
-
-class UnitManager {
- protected:
-  UnitManager(){};
-
- public:
-  static double Degree2Radian;
-  static double Radian2Degree;
-
-  static double IntegratorMassConstant;
-  static double PressureConstant;
-  static double BoltzmannConstant;
-  static double ElectrostaticConvertion;
-
-  static double DefaultTableUpperLimit;
-  static double DefaultTableStep;
-  static double DefaultTableExtension;
-  static void set(const std::string& name_of_system);
-
- private:
-  static std::string unit_names[];
-};
diff --git a/source/md/include/XyzFileManager.h b/source/md/include/XyzFileManager.h
deleted file mode 100644
index d557f0f771..0000000000
--- a/source/md/include/XyzFileManager.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __XyzFileManager_h_wanghan__
-#define __XyzFileManager_h_wanghan__
-
-#include <vector>
-
-namespace XyzFileManager {
-
-void read(const std::string& file,
-          std::vector<std::string>& atom_name,
-          std::vector<std::vector<double> >& posi,
-          std::vector<std::vector<double> >& velo,
-          std::vector<std::vector<double> >& forc,
-          std::vector<double>& boxsize);
-
-void getBoxSize(const std::string& name, std::vector<double>& boxsize);
-
-};  // namespace XyzFileManager
-
-#endif
diff --git a/source/md/include/ZM.h b/source/md/include/ZM.h
deleted file mode 100644
index 9255b0c17d..0000000000
--- a/source/md/include/ZM.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-#include "Tabulated.h"
-#include "ZMFunctions.h"
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-class ZM {
- public:
-  ZM(const int& order, const VALUETYPE& alpha, const VALUETYPE& rc);
-
- public:
-  void compute(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<VALUETYPE>& charge,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<std::vector<int> >& nlist) {
-    zm_tab.compute(ener, force, virial, coord, charge, atype, region, nlist);
-  };
-  void exclude(VALUETYPE& ener,
-               std::vector<VALUETYPE>& force,
-               std::vector<VALUETYPE>& virial,
-               const std::vector<VALUETYPE>& coord,
-               const std::vector<VALUETYPE>& charge,
-               const std::vector<int>& atype,
-               const SimulationRegion<VALUETYPE>& region,
-               const std::vector<int>& elist);
-  VALUETYPE e_corr(const std::vector<VALUETYPE>& charge) const;
-
- private:
-  Tabulated zm_tab;
-  void ex_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
-  ZeroMultipole::Potential potzm;
-};
diff --git a/source/md/include/ZMFunctions.h b/source/md/include/ZMFunctions.h
deleted file mode 100644
index aba6ce34d9..0000000000
--- a/source/md/include/ZMFunctions.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#ifndef __Functions_h_ZM_wanghan__
-#define __Functions_h_ZM_wanghan__
-
-#include <vector>
-
-namespace ZeroMultipole {
-double funcV(const double& alpha, const double& r);
-double funcD1V(const double& alpha, const double& r);
-double funcD2V(const double& alpha, const double& r);
-double funcD3V(const double& alpha, const double& r);
-double funcD4V(const double& alpha, const double& r);
-
-void calCoefficients(const int& ll,
-                     const double& alpha,
-                     const double& rc,
-                     std::vector<double>& coeff);
-
-class Potential {
-  double alpha, rc;
-  int ll;
-  std::vector<double> coeff;
-
- public:
-  Potential();
-  Potential(const int& ll, const double& alpha, const double& rc);
-  void reinit(const int& ll, const double& alpha, const double& rc);
-  double pot(const double& rr);
-  double ulpot(const double& rr);
-  double mpotp(const double& rr);
-  double mulpotp(const double& rr);
-
- public:
-  double energyCorr(const std::vector<double>& charges) const;
-};
-}  // namespace ZeroMultipole
-
-#endif
diff --git a/source/md/include/common.h b/source/md/include/common.h
deleted file mode 100644
index f1662f1206..0000000000
--- a/source/md/include/common.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-#include <vector>
-
-#include "SimulationRegion.h"
-
-const double b2m_l = 10;
-const double b2m_e = 1.660539040e-21 / 1.602176621e-19;
-
-template <typename VALUETYPE>
-void clear(VALUETYPE& ener,
-           std::vector<VALUETYPE>& force,
-           std::vector<VALUETYPE>& virial) {
-  ener = 0;
-  std::fill(force.begin(), force.end(), 0.);
-  std::fill(virial.begin(), virial.end(), 0.);
-}
-
-template <typename VALUETYPE>
-void normalize_coord(std::vector<VALUETYPE>& coord,
-                     const SimulationRegion<VALUETYPE>& region) {
-  int natoms = coord.size() / 3;
-  for (int ii = 0; ii < natoms; ++ii) {
-    double phys[3];
-    for (int dd = 0; dd < 3; ++dd) {
-      phys[dd] = coord[ii * 3 + dd];
-    }
-    double inter[3];
-    region.phys2Inter(inter, phys);
-    for (int dd = 0; dd < 3; ++dd) {
-      if (inter[dd] < 0) {
-        inter[dd] += 1.;
-      } else if (inter[dd] >= 1) {
-        inter[dd] -= 1.;
-      }
-    }
-    region.inter2Phys(phys, inter);
-    for (int dd = 0; dd < 3; ++dd) {
-      coord[ii * 3 + dd] = phys[dd];
-    }
-  }
-}
diff --git a/source/md/include/mymath.h b/source/md/include/mymath.h
deleted file mode 100644
index aaae92704e..0000000000
--- a/source/md/include/mymath.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#pragma once
-
-template <typename VALUETYPE>
-inline VALUETYPE dot(const VALUETYPE* r0, const VALUETYPE* r1) {
-  return (r0[0] * r1[0] + r0[1] * r1[1] + r0[2] * r1[2]);
-}
-
-template <typename TYPE>
-inline TYPE dot(const TYPE& x0,
-                const TYPE& y0,
-                const TYPE& z0,
-                const TYPE& x1,
-                const TYPE& y1,
-                const TYPE& z1) {
-  return x0 * x1 + y0 * y1 + z0 * z1;
-}
-
-template <typename VALUETYPE>
-inline VALUETYPE cos(const VALUETYPE* r0, const VALUETYPE* r1) {
-  double ip = dot<VALUETYPE>(r0, r1);
-  double ip0 = dot<VALUETYPE>(r0, r0);
-  double ip1 = dot<VALUETYPE>(r1, r1);
-  double ip01 = ip0 * ip1;
-
-  double cosval;
-  if (ip01 > 0) {
-    cosval = ip / sqrt(ip01);
-  } else {
-    cosval = 1.0;
-  }
-  if (cosval > 1.0) {
-    return 1.0;
-  }
-  if (cosval < -1.0) {
-    return -1.0;
-  }
-  return cosval;
-}
-
-template <typename TYPE>
-inline TYPE cos(const TYPE& x0,
-                const TYPE& y0,
-                const TYPE& z0,
-                const TYPE& x1,
-                const TYPE& y1,
-                const TYPE& z1) {
-  double dblx0 = (double)(x0);
-  double dblx1 = (double)(x1);
-  double dbly0 = (double)(y0);
-  double dbly1 = (double)(y1);
-  double dblz0 = (double)(z0);
-  double dblz1 = (double)(z1);
-
-  double ip = dot<double>(dblx0, dbly0, dblz0, dblx1, dbly1, dblz1);
-  double ip0 = dot<double>(dblx0, dbly0, dblz0, dblx0, dbly0, dblz0);
-  double ip1 = dot<double>(dblx1, dbly1, dblz1, dblx1, dbly1, dblz1);
-  double ip01 = ip0 * ip1;
-
-  double cosval;
-  if (ip01 > 0) {
-    cosval = ip / sqrt(ip01);
-  } else {
-    cosval = 1.0;
-  }
-  if (cosval > 1.0) {
-    return 1.0;
-  }
-  if (cosval < -1.0) {
-    return -1.0;
-  }
-  return cosval;
-}
diff --git a/source/md/mdnn.cc b/source/md/mdnn.cc
deleted file mode 100644
index 80287db891..0000000000
--- a/source/md/mdnn.cc
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Convert.h"
-#include "DeepPot.h"
-#include "GroFileManager.h"
-#include "Integrator.h"
-#include "Statistics.h"
-#include "Trajectory.h"
-#include "XyzFileManager.h"
-#include "common.h"
-#include "json.hpp"
-using json = nlohmann::json;
-
-#include <fstream>
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-#else
-typedef float VALUETYPE;
-#endif
-
-void print_vec(const vector<VALUETYPE>& vec) {
-  int nloc = vec.size() / 3;
-  for (int kk = 0; kk < nloc; ++kk) {
-    for (int dd = 0; dd < 3; ++dd) {
-      cout << vec[kk * 3 + dd] << " \t ";
-    }
-    cout << endl;
-  }
-}
-
-int main(int argc, char* argv[]) {
-  UnitManager::set("metal");
-
-  if (argc == 0) {
-    cerr << "usage " << endl;
-    cerr << argv[0] << " input_script " << endl;
-    return 1;
-  }
-
-  ifstream fp(argv[1]);
-  json jdata;
-  fp >> jdata;
-  cout << "# using data base" << endl;
-  cout << setw(4) << jdata << endl;
-
-  int nframes = 1;
-
-  vector<VALUETYPE> dcoord, dveloc, dbox, dmass;
-  vector<int> dtype;
-  vector<int> freez;
-
-  // load_raw (dcoord, dtype, dbox);
-  // dveloc.resize(dcoord.size(), 0.);
-  string conf_format = jdata["conf_format"];
-  string conf_file = jdata["conf_file"];
-  vector<int> resdindex, atomindex;
-  vector<string> resdname, atomname;
-  vector<vector<double> > posi, velo, tmp_forc;
-  vector<double> boxsize;
-  if (conf_format == "gro") {
-    GroFileManager::read(conf_file, resdindex, resdname, atomname, atomindex,
-                         posi, velo, boxsize);
-  } else if (conf_format == "xyz") {
-    XyzFileManager::read(conf_file, atomname, posi, velo, tmp_forc, boxsize);
-    if (velo.size() == 0) {
-      for (unsigned ii = 0; ii < posi.size(); ++ii) {
-        velo.push_back(vector<double>(3, 0.));
-      }
-    }
-    // convert to nanometer
-    for (unsigned ii = 0; ii < posi.size(); ++ii) {
-      for (unsigned dd = 0; dd < 3; ++dd) {
-        posi[ii][dd] *= .1;
-        velo[ii][dd] *= .1;
-      }
-    }
-    for (unsigned dd = 0; dd < 9; ++dd) {
-      boxsize[dd] *= .1;
-    }
-    for (unsigned ii = 0; ii < posi.size(); ++ii) {
-      resdindex.push_back(ii + 1);
-      atomindex.push_back(ii + 1);
-    }
-    resdname = atomname;
-  } else {
-    cerr << "unknown conf file format: " << conf_format << endl;
-    return 1;
-  }
-  map<string, int> name_type_map = jdata["atom_type"];
-  map<string, VALUETYPE> name_mass_map = jdata["atom_mass"];
-  map<string, VALUETYPE> name_charge_map;
-  if (jdata.find("atom_charge") == jdata.end()) {
-    for (map<string, VALUETYPE>::iterator iter = name_mass_map.begin();
-         iter != name_mass_map.end(); ++iter) {
-      name_charge_map[iter->first] = 0.;
-    }
-  } else {
-    map<string, VALUETYPE> name_charge_map_tmp = jdata["atom_charge"];
-    name_charge_map = name_charge_map_tmp;
-  }
-  if (jdata.find("freeze_atoms") != jdata.end()) {
-    freez = jdata["freeze_atoms"].get<vector<int> >();
-  }
-
-  // convert but do not sort
-  Convert<VALUETYPE> cvt(atomname, name_type_map, name_mass_map,
-                         name_charge_map, false);
-  cvt.gro2nnp(dcoord, dveloc, dbox, posi, velo, boxsize);
-  dtype = cvt.get_type();
-  dmass = cvt.get_mass();
-
-  int nloc = dtype.size();
-  SimulationRegion<double> region;
-  region.reinitBox(&dbox[0]);
-  normalize_coord<VALUETYPE>(dcoord, region);
-
-  vector<VALUETYPE> dforce(nloc * 3, 0.);
-  vector<VALUETYPE> dae(nloc * 1, 0.);
-  vector<VALUETYPE> dav(nloc * 9, 0.);
-  vector<VALUETYPE> dvirial(9, 0.0);
-  VALUETYPE dener = 0;
-
-  string graph_file = jdata["graph_file"];
-  VALUETYPE dt = jdata["dt"];
-  int nsteps = jdata["nsteps"];
-  int nener = jdata["ener_freq"];
-  int nxtc = jdata["xtc_freq"];
-  int ntrr = jdata["trr_freq"];
-  string ener_file = jdata["ener_file"];
-  string xtc_file = jdata["xtc_file"];
-  string trr_file = jdata["trr_file"];
-  double temperature = jdata["T"];
-  double tau_t = jdata["tau_T"];
-  long long int seed = 0;
-  if (jdata.find("rand_seed") != jdata.end()) {
-    seed = jdata["rand_seed"];
-  }
-  bool print_f = false;
-  if (jdata.find("print_force") != jdata.end()) {
-    print_f = jdata["print_force"];
-  }
-
-  Integrator<VALUETYPE> inte;
-  ThermostatLangevin<VALUETYPE> thm(temperature, tau_t, seed);
-  deepmd::DeepPot nnp(graph_file);
-
-  Statistics<VALUETYPE> st;
-  XtcSaver sxtc(xtc_file.c_str(), nloc);
-  TrrSaver strr(trr_file.c_str(), nloc);
-
-  // compute force at step 0
-  nnp.compute(dener, dforce, dvirial, dcoord, dtype, dbox);
-  // change virial to gromacs convention
-  for (int ii = 0; ii < 9; ++ii) {
-    dvirial[ii] *= -0.5;
-  }
-  st.record(dener, dvirial, dveloc, dmass, region);
-  ofstream efout(ener_file);
-  ofstream pforce;
-  if (print_f) {
-    pforce.open("force.out");
-  }
-  st.print_head(efout);
-  st.print(efout, 0, 0);
-
-  for (int ii = 0; ii < nsteps; ++ii) {
-    inte.stepVeloc(dveloc, dforce, dmass, 0.5 * dt, freez);
-    inte.stepCoord(dcoord, dveloc, 0.5 * dt);
-    thm.stepOU(dveloc, dmass, dt, freez);
-    inte.stepCoord(dcoord, dveloc, 0.5 * dt);
-    normalize_coord<VALUETYPE>(dcoord, region);
-    nnp.compute(dener, dforce, dvirial, dae, dav, dcoord, dtype, dbox);
-    // change virial to gromacs convention
-    for (int ii = 0; ii < 9; ++ii) {
-      dvirial[ii] *= -0.5;
-    }
-    inte.stepVeloc(dveloc, dforce, dmass, 0.5 * dt, freez);
-    if ((ii + 1) % nener == 0) {
-      st.record(dener, dvirial, dveloc, dmass, region);
-      st.print(efout, ii + 1, (ii + 1) * dt);
-      efout.flush();
-    }
-    if (nxtc > 0 && (ii + 1) % nxtc == 0) {
-      cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
-      sxtc.save(ii + 1, (ii + 1) * dt, posi, boxsize);
-    }
-    if (ntrr > 0 && (ii + 1) % ntrr == 0) {
-      cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
-      strr.save(ii + 1, (ii + 1) * dt, posi, velo, vector<vector<VALUETYPE> >(),
-                boxsize);
-      if (print_f) {
-        for (int jj = 0; jj < dforce.size(); ++jj) {
-          pforce << dforce[jj] << " ";
-        }
-        pforce << endl;
-      }
-    }
-  }
-
-  cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
-  GroFileManager::write("out.gro", resdindex, resdname, atomname, atomindex,
-                        posi, velo, boxsize);
-  // ofstream oxyz ("out.xyz");
-  // oxyz << nloc << endl;
-  // oxyz << setprecision(12) ;
-  // for (int ii = 0; ii < dbox.size(); ++ii) {
-  //   oxyz << dbox[ii] * 1 << " " ;
-  // }
-  // oxyz << endl;
-  // for (int ii = 0; ii < posi.size(); ++ii){
-  //   oxyz << atomname[ii] << " \t" ;
-  //   for (int dd = 0; dd < 3; ++dd){
-  //     oxyz << posi[ii][dd] * 10 << " ";
-  //   }
-  //   oxyz << endl;
-  // }
-
-  return 0;
-}
diff --git a/source/md/src/AdWeight.cc b/source/md/src/AdWeight.cc
deleted file mode 100644
index f1b9922f94..0000000000
--- a/source/md/src/AdWeight.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "AdWeight.h"
-
-#include <cassert>
-#include <cmath>
-#include <iostream>
-
-#include "CosSwitch.h"
-
-AdWeight::AdWeight(const VALUETYPE& pl) { protect_level = pl; }
-
-void AdWeight::sel_nn_atom(vector<VALUETYPE>& nn_coord,
-                           vector<int>& nn_type,
-                           vector<int>& nn_idx,
-                           vector<int>& nn_tag,
-                           const vector<VALUETYPE>& dcoord,
-                           const vector<int>& dtype) const {
-  nn_coord.clear();
-  nn_type.clear();
-  nn_idx.clear();
-
-  vector<int>& tag(nn_tag);
-  zone_tag(tag, dcoord);
-  for (int ii = 0; ii < tag.size(); ++ii) {
-    if (tag[ii] != 0) {
-      nn_coord.push_back(dcoord[3 * ii + 0]);
-      nn_coord.push_back(dcoord[3 * ii + 1]);
-      nn_coord.push_back(dcoord[3 * ii + 2]);
-      nn_type.push_back(dtype[ii]);
-      nn_idx.push_back(ii);
-    }
-  }
-}
-
-void AdWeight::force_intpl(vector<VALUETYPE>& of,
-                           const vector<VALUETYPE>& dcoord,
-                           const vector<VALUETYPE>& ff_force,
-                           const vector<VALUETYPE>& nn_force,
-                           const vector<int>& nn_idx) const {
-  int nall = dcoord.size() / 3;
-
-  vector<VALUETYPE> weight, weight_x;
-  atom_weight(weight, weight_x, dcoord);
-  assert(nall == weight.size());
-  // for (unsigned ii = 0; ii < weight.size(); ++ii){
-  //   cout << ii << " " << weight[ii] << " " << dcoord[ii*3] << endl;
-  // }
-
-  // cout << "of " << of.size() <<  endl;
-  // cout << "dcoord " << dcoord.size() <<  endl;
-  // cout << "ff_f " << ff_force.size() <<  endl;
-  // cout << "nn_f " << nn_force.size() <<  endl;
-  // cout << "nn_i " << nn_idx.size() <<  endl;
-  // cout << "w " << weight.size() <<  endl;
-  vector<VALUETYPE> nn_sum(3, 0.);
-  vector<VALUETYPE> ff_sum(3, 0.);
-  // for (int ii = 0; ii < ff_force.size() / 3; ++ii){
-  //   for (int dd = 0; dd < 3; ++dd){
-  //     ff_sum[dd] += ff_force[ii*3+dd];
-  //   }
-  // }
-  // for (int ii = 0; ii < nn_force.size() / 3; ++ii){
-  //   for (int dd = 0; dd < 3; ++dd){
-  //     nn_sum[dd] += nn_force[ii*3+dd];
-  //   }
-  // }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
-  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
-  // " " <<endl;
-
-  for (int ii = 0; ii < nn_idx.size(); ++ii) {
-    int idx = nn_idx[ii];
-    for (int dd = 0; dd < 3; ++dd) {
-      // nn_sum[dd] += weight[idx] * nn_force[ii*3+dd];
-      nn_sum[dd] += 1 * nn_force[ii * 3 + dd];
-      of[idx * 3 + dd] += weight[idx] * nn_force[ii * 3 + dd];
-    }
-    // cout << "nn " << dcoord[idx*3] << " " << weight[idx] << endl;
-  }
-  for (int ii = 0; ii < nall; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      of[ii * 3 + dd] += (1 - weight[ii]) * ff_force[ii * 3 + dd];
-    }
-    // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
-  }
-
-  for (int ii = 0; ii < of.size() / 3; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      ff_sum[dd] += ff_force[ii * 3 + dd];
-    }
-  }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
-  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
-  // " " <<endl; cout << endl;
-}
-
-void AdWeight::force_intpl(vector<VALUETYPE>& of,
-                           const vector<VALUETYPE>& dcoord,
-                           const vector<VALUETYPE>& ff_bd_force,
-                           const vector<VALUETYPE>& ff_nb_force,
-                           const vector<VALUETYPE>& nn_force,
-                           const vector<int>& nn_idx) const {
-  int nall = dcoord.size() / 3;
-
-  vector<VALUETYPE> weight, weight_x;
-  atom_weight(weight, weight_x, dcoord);
-  assert(nall == weight.size());
-
-  vector<VALUETYPE> nn_sum(3, 0.);
-  vector<VALUETYPE> ff_sum(3, 0.);
-
-  for (int ii = 0; ii < nn_idx.size(); ++ii) {
-    int idx = nn_idx[ii];
-    for (int dd = 0; dd < 3; ++dd) {
-      // nn_sum[dd] += weight[idx] * nn_force[ii*3+dd];
-      // nn_sum[dd] +=  1 * nn_force[ii*3+dd];
-      of[idx * 3 + dd] += weight[idx] * nn_force[ii * 3 + dd];
-      // if (fabs(nn_force[ii*3+dd]) > 1e6) {
-      // 	cout << " ii " << ii
-      // 	     << " dd " << dd
-      // 	     << " coord " << dcoord[ii*3+dd]
-      // 	     << " nn_f " << nn_force[ii*3+dd]
-      // 	     << " ww " << weight[ii]
-      // 	     << endl;
-      // }
-    }
-    // cout << "nn " << dcoord[idx*3] << " " << weight[idx] << endl;
-  }
-
-  // double protect_level = 1e-3;
-  // cout << "with protect_level " << protect_level << endl;
-  for (int ii = 0; ii < nall; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      double pref = (1 - weight[ii]);
-      if (fabs(pref) < protect_level) {
-        pref = protect_level;
-      }
-      of[ii * 3 + dd] += pref * ff_bd_force[ii * 3 + dd];
-      // if (fabs(ff_bd_force[ii*3+dd]) > 1e6) {
-      // 	cout << " ii " << ii
-      // 	     << " dd " << dd
-      // 	     << " coord " << dcoord[ii*3+dd]
-      // 	     << " ff_f " << ff_bd_force[ii*3+dd]
-      // 	     << " ww " << 1 - weight[ii]
-      // 	     << endl;
-      // }
-    }
-    // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
-  }
-  for (int ii = 0; ii < nall; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      of[ii * 3 + dd] += (1 - weight[ii]) * ff_nb_force[ii * 3 + dd];
-      // if (fabs(ff_nb_force[ii*3+dd]) > 1e6) {
-      // 	cout << " ii " << ii
-      // 	     << " dd " << dd
-      // 	     << " coord " << dcoord[ii*3+dd]
-      // 	     << " ff_f " << ff_nb_force[ii*3+dd]
-      // 	     << " ww " << 1 - weight[ii]
-      // 	     << endl;
-      // }
-    }
-    // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
-  }
-
-  for (int ii = 0; ii < of.size() / 3; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      ff_sum[dd] += ff_bd_force[ii * 3 + dd];
-    }
-  }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
-  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
-  // " " <<endl; cout << endl;
-}
-
-SlabWeight::SlabWeight(const vector<VALUETYPE>& box,
-                       const VALUETYPE& rnn_,
-                       const VALUETYPE& rhy_,
-                       const VALUETYPE& rc_,
-                       const VALUETYPE& protect_level_)
-    : AdWeight(protect_level_) {
-  assert(box.size() == 9);
-  center.resize(3);
-  for (int ii = 0; ii < 3; ++ii) {
-    center[ii] = 0.5 * box[3 * ii + ii];
-  }
-  rnn = rnn_;
-  rhy = rhy_;
-  rc = rc_;
-}
-
-void SlabWeight::zone_tag(vector<int>& tag,
-                          const vector<VALUETYPE>& coord) const {
-  int natoms = coord.size() / 3;
-  tag.resize(natoms, 0);
-
-  // slab axis x
-  VALUETYPE radius = rnn + rhy;
-  for (int ii = 0; ii < natoms; ++ii) {
-    VALUETYPE posi = fabs(coord[ii * 3] - center[0]);
-    if (posi < radius) {
-      tag[ii] = 3;
-    } else if (posi < radius + rc) {
-      tag[ii] = 2;
-    } else if (posi < radius + rc * 2) {
-      tag[ii] = 1;
-    } else {
-      tag[ii] = 0;
-    }
-  }
-}
-
-// dirty hacking
-void SlabWeight::atom_weight(vector<VALUETYPE>& weight,
-                             vector<VALUETYPE>& weight_x,
-                             const vector<VALUETYPE>& coord) const {
-  CosSwitch cs(rnn, rnn + rhy);
-
-  int natoms = coord.size() / 3;
-  weight.resize(natoms, 0);
-  weight_x.resize(natoms, 0);
-  // slab axis x
-  // for (int ii = 0; ii < natoms; ++ii){
-  //   VALUETYPE posi = fabs(coord[ii*3] - center[0]);
-  //   cs.eval (weight[ii], posi);
-  //   // if (posi < radius){
-  //   //   weight[ii] = 1.;
-  //   // }
-  //   // else {
-  //   //   weight[ii] = 0.;
-  //   // }
-  // }
-  // for (int ii = 0; ii < natoms/3; ++ii){
-  //   VALUETYPE posi = fabs(coord[ii*3] - center[0]);
-  //   cs.eval (weight[ii], posi);
-  //   weight[natoms/3 + ii*2 + 0] = weight[ii];
-  //   weight[natoms/3 + ii*2 + 1] = weight[ii];
-  //   // weight_x
-  //   weight_x[ii] = posi;
-  //   weight_x[natoms/3 + ii*2 + 0] = posi;
-  //   weight_x[natoms/3 + ii*2 + 1] = posi;
-  //   // if (posi < radius){
-  //   //   weight[ii] = 1.;
-  //   // }
-  //   // else {
-  //   //   weight[ii] = 0.;
-  //   // }
-  // }
-  for (int ii = 0; ii < natoms; ii += 3) {
-    VALUETYPE posi = fabs(coord[ii * 3] - center[0]);
-    cs.eval(weight[ii], posi);
-    weight[ii + 1] = weight[ii];
-    weight[ii + 2] = weight[ii];
-    // weight_x
-    weight_x[ii] = posi;
-    weight_x[ii + 1] = posi;
-    weight_x[ii + 2] = posi;
-  }
-}
diff --git a/source/md/src/Convert.cc b/source/md/src/Convert.cc
deleted file mode 100644
index b8014bf974..0000000000
--- a/source/md/src/Convert.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Convert.h"
-
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-
-template <typename VALUETYPE>
-Convert<VALUETYPE>::Convert(const vector<string>& atomname,
-                            map<string, int>& name_type_map,
-                            map<string, VALUETYPE>& name_mass_map,
-                            map<string, VALUETYPE>& name_charge_map,
-                            const bool sort_) {
-  int natoms = atomname.size();
-  atype.resize(natoms);
-  amass.resize(natoms);
-  vector<VALUETYPE> tmp_charge(natoms);
-  for (unsigned ii = 0; ii < atype.size(); ++ii) {
-    atype[ii] = name_type_map[atomname[ii]];
-    amass[ii] = name_mass_map[atomname[ii]];
-    tmp_charge[ii] = name_charge_map[atomname[ii]];
-  }
-  vector<pair<int, pair<int, VALUETYPE> > > sorting(natoms);
-  for (unsigned ii = 0; ii < sorting.size(); ++ii) {
-    sorting[ii] = pair<int, pair<int, VALUETYPE> >(
-        atype[ii], pair<int, VALUETYPE>(ii, amass[ii]));
-  }
-  if (sort_) {
-    sort(sorting.begin(), sorting.end());
-  }
-  idx_map_nnp2gro.resize(natoms);
-  idx_map_gro2nnp.resize(natoms);
-  for (unsigned ii = 0; ii < idx_map_nnp2gro.size(); ++ii) {
-    idx_map_nnp2gro[ii] = sorting[ii].second.first;
-    idx_map_gro2nnp[sorting[ii].second.first] = ii;
-    atype[ii] = sorting[ii].first;
-    amass[ii] = sorting[ii].second.second;
-  }
-  acharge.resize(natoms);
-  for (int ii = 0; ii < natoms; ++ii) {
-    int gro_i = idx_map_nnp2gro[ii];
-    acharge[ii] = tmp_charge[gro_i];
-  }
-}
-
-template <typename VALUETYPE>
-void Convert<VALUETYPE>::gro2nnp(vector<VALUETYPE>& coord,
-                                 vector<VALUETYPE>& veloc,
-                                 vector<VALUETYPE>& box,
-                                 const vector<vector<double> >& posi,
-                                 const vector<vector<double> >& velo,
-                                 const vector<double>& box_size) const {
-  assert(posi.size() == idx_map_nnp2gro.size());
-  assert(velo.size() == idx_map_nnp2gro.size());
-  int natoms = idx_map_nnp2gro.size();
-  coord.resize(3 * static_cast<size_t>(natoms));
-  veloc.resize(3 * static_cast<size_t>(natoms));
-  for (unsigned ii = 0; ii < natoms; ++ii) {
-    int gro_i = idx_map_nnp2gro[ii];
-    for (int dd = 0; dd < 3; ++dd) {
-      coord[ii * 3 + dd] = posi[gro_i][dd] * 10;
-      veloc[ii * 3 + dd] = velo[gro_i][dd] * 10;
-    }
-  }
-  box.resize(9);
-  for (int dd = 0; dd < 9; ++dd) {
-    box[dd] = box_size[dd] * 10;
-  }
-}
-
-template <typename VALUETYPE>
-void Convert<VALUETYPE>::nnp2gro(vector<vector<double> >& posi,
-                                 vector<vector<double> >& velo,
-                                 vector<double>& box_size,
-                                 const vector<VALUETYPE>& coord,
-                                 const vector<VALUETYPE>& veloc,
-                                 const vector<VALUETYPE>& box) const {
-  int natoms = idx_map_nnp2gro.size();
-  posi.resize(natoms);
-  velo.resize(natoms);
-  for (unsigned ii = 0; ii < posi.size(); ++ii) {
-    posi[ii].resize(3);
-    velo[ii].resize(3);
-  }
-  for (unsigned ii = 0; ii < posi.size(); ++ii) {
-    int gro_i = idx_map_nnp2gro[ii];
-    for (int dd = 0; dd < 3; ++dd) {
-      posi[gro_i][dd] = coord[ii * 3 + dd] * 0.1;
-      velo[gro_i][dd] = veloc[ii * 3 + dd] * 0.1;
-    }
-  }
-  box_size.resize(9);
-  for (int dd = 0; dd < 9; ++dd) {
-    box_size[dd] = box[dd] * 0.1;
-  }
-}
-
-template <typename VALUETYPE>
-void Convert<VALUETYPE>::idx_gro2nnp(vector<int>& out,
-                                     const vector<int>& in) const {
-  for (unsigned ii = 0; ii < in.size(); ++ii) {
-    out[ii] = idx_map_gro2nnp[in[ii]];
-  }
-}
-
-template <typename VALUETYPE>
-void Convert<VALUETYPE>::idx_nnp2gro(vector<int>& out,
-                                     const vector<int>& in) const {
-  for (unsigned ii = 0; ii < in.size(); ++ii) {
-    out[ii] = idx_map_nnp2gro[in[ii]];
-  }
-}
-
-template class Convert<float>;
-template class Convert<double>;
diff --git a/source/md/src/Gaussian.cc b/source/md/src/Gaussian.cc
deleted file mode 100644
index 80d78c9385..0000000000
--- a/source/md/src/Gaussian.cc
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Gaussian.h"
-
-void Gaussian::set_seed(unsigned long s) {
-  RandomGenerator_MT19937::init_genrand(s);
-}
-
-void Gaussian::gen(double* vec, const int numb_gen) {
-  const double epsilon = std::numeric_limits<double>::min();
-  const double two_pi = 2.0 * M_PI;
-
-  for (int ii = 0; ii < numb_gen; ++ii) {
-    double u0, u1;
-    do {
-      u0 = RandomGenerator_MT19937::genrand_real3();
-      u1 = RandomGenerator_MT19937::genrand_real3();
-    } while (u0 < epsilon);
-    vec[ii] = sqrt(-2.0 * log(u0)) * cos(two_pi * u1);
-  }
-}
diff --git a/source/md/src/GroFileManager.cc b/source/md/src/GroFileManager.cc
deleted file mode 100644
index 5969168a72..0000000000
--- a/source/md/src/GroFileManager.cc
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "GroFileManager.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-
-using namespace std;
-
-class WrongFileFormat {};
-
-bool GroFileManager::readTop(const std::string &filename,
-                             std::vector<std::string> &molnames,
-                             std::vector<int> &nmols) {
-  molnames.clear();
-  nmols.clear();
-
-  std::ifstream in(filename.c_str());
-  if (in.bad()) {
-    std::cerr << "cannot open file " << filename << std::endl;
-    return false;
-  }
-  char line[1024];
-  std::string target("[ molecules ]");
-  bool find = false;
-  while (!in.eof()) {
-    in.getline(line, 1024, '\n');
-    if (target == std::string(line)) {
-      find = true;
-      break;
-    }
-  }
-  if (!find) {
-    std::cerr << "cannot find [ molecules ] in file " << filename
-              << ". please check there is no space after \"]\"\n";
-    return false;
-  }
-
-  //   while (!(in.getline (line, 1024, '\n')).eof()){
-  // //     if (line[0] == '['){
-  // //       break;
-  // //     }
-  // //     char name[1024];
-  // //     int number;
-  // //     sscanf (line, "%s%d", name, &number);
-  // //     molnames.push_back (std::string(name));
-  // //     nmols.push_back (number);
-  //   }
-
-  std::string name;
-  int number;
-  while (!(in >> name >> number).eof()) {
-    if (name[0] == '[') {
-      break;
-    }
-    if (name.empty()) {
-      break;
-    }
-    //     std::cout << name << std::endl;
-    molnames.push_back(name);
-    nmols.push_back(number);
-  }
-
-  return true;
-}
-
-template <typename UnitaryFunction1,
-          typename UnitaryFunction2,
-          typename UnitaryFunction3,
-          typename UnitaryFunction4,
-          typename UnitaryFunction5,
-          typename UnitaryFunction6>
-bool GroFileManager::writePotenFile(const double &rmin,
-                                    const double &rcut,
-                                    const double &interval,
-                                    UnitaryFunction1 &f,
-                                    UnitaryFunction2 &fp,
-                                    UnitaryFunction3 &g,
-                                    UnitaryFunction4 &gp,
-                                    UnitaryFunction5 &h,
-                                    UnitaryFunction6 &hp,
-                                    const std::string &filename) {
-  FILE *filep = fopen(filename.c_str(), "w");
-  if (filep == NULL) {
-    std::cerr << "cannot open file " << filename << std::endl;
-    return false;
-  }
-
-  double upper = rcut + 1;
-  double nx;
-  if (int(upper / interval) != upper / interval) {
-    nx = int(upper / interval) + 1;
-  } else {
-    nx = int(upper / interval);
-  }
-  upper = interval * nx;
-
-  int i = 0;
-  for (i = 0; i <= nx + 1; ++i) {
-    double x = i * interval;
-    if (x < rmin) {
-      fprintf(filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n", x, 0.,
-              0., 0., 0., 0., 0.);
-    } else {
-      fprintf(filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n", x,
-              f(x), -fp(x), g(x), -gp(x), h(x), -hp(x));
-    }
-  }
-
-  fclose(filep);
-  return true;
-}
-
-void GroFileManager::read(const std::string &name,
-                          std::vector<int> &resdindex,
-                          std::vector<std::string> &resdname,
-                          std::vector<std::string> &atomname,
-                          std::vector<int> &atomindex,
-                          std::vector<std::vector<double> > &posi,
-                          std::vector<std::vector<double> > &velo,
-                          std::vector<double> &boxsize_) {
-  FILE *fp = fopen(name.c_str(), "r");
-  if (fp == NULL) {
-    std::cerr << "cannot open file " << name << std::endl;
-    return;
-  }
-  while (fgetc(fp) != '\n')
-    ;
-  int npart;
-  fscanf(fp, "%d\n", &npart);
-  fclose(fp);
-
-  resdindex.clear();
-  resdname.clear();
-  atomname.clear();
-  atomindex.clear();
-  posi.clear();
-  velo.clear();
-  vector<double> boxsize;
-  boxsize.resize(3);
-
-  fp = fopen(name.c_str(), "r");
-  while (fgetc(fp) != '\n')
-    ;
-  while (fgetc(fp) != '\n')
-    ;
-  char line[1024];
-  for (int i = 0; i < npart; ++i) {
-    fgets(line, 1024, fp);
-    char tmp[1024];
-    int tmpd;
-    char tmps[1024];
-    for (unsigned j = 0; j < 5; ++j) {
-      tmp[j] = line[j];
-    }
-    tmp[5] = '\0';
-    if (sscanf(tmp, "%d", &tmpd) != 1) {
-      throw WrongFileFormat();
-    }
-    resdindex.push_back(tmpd);
-
-    for (unsigned j = 0; j < 5; ++j) {
-      tmp[j] = line[j + 5];
-    }
-    tmp[5] = '\0';
-    if (sscanf(tmp, "%s", tmps) != 1) {
-      throw WrongFileFormat();
-    }
-    resdname.push_back(tmps);
-
-    for (unsigned j = 0; j < 5; ++j) {
-      tmp[j] = line[j + 10];
-    }
-    tmp[5] = '\0';
-    if (sscanf(tmp, "%s", tmps) != 1) {
-      throw WrongFileFormat();
-    }
-    atomname.push_back(tmps);
-
-    for (unsigned j = 0; j < 5; ++j) {
-      tmp[j] = line[j + 15];
-    }
-    tmp[5] = '\0';
-    if (sscanf(tmp, "%d", &tmpd) != 1) {
-      throw WrongFileFormat();
-    }
-    atomindex.push_back(tmpd);
-
-    double a, b, c;
-    double d, e, f;
-    std::vector<double> tmpp(3);
-    std::vector<double> tmpv(3);
-
-    int tag = sscanf(&line[20], "%lf%lf%lf%lf%lf%lf", &a, &b, &c, &d, &e, &f);
-    tmpp[0] = a;
-    tmpp[1] = b;
-    tmpp[2] = c;
-    switch (tag) {
-      case 6:
-        tmpv[0] = d;
-        tmpv[1] = e;
-        tmpv[2] = f;
-        break;
-      case 3:
-        tmpv[0] = 0.f;
-        tmpv[1] = 0.f;
-        tmpv[2] = 0.f;
-        break;
-      default:
-        throw WrongFileFormat();
-    }
-
-    posi.push_back(tmpp);
-    velo.push_back(tmpv);
-  }
-  int tag = 0;
-  double rbox[9];
-  tag = fscanf(fp, "%lf%lf%lf%lf%lf%lf%lf%lf%lf", rbox + 0, rbox + 1, rbox + 2,
-               rbox + 3, rbox + 4, rbox + 5, rbox + 6, rbox + 7, rbox + 8);
-  fclose(fp);
-
-  boxsize_.resize(9, 0.);
-  fill(boxsize_.begin(), boxsize_.end(), 0.);
-
-  if (tag == 9) {
-    boxsize_[0] = rbox[0];
-    boxsize_[4] = rbox[1];
-    boxsize_[8] = rbox[2];
-    boxsize_[0 * 3 + 1] = rbox[3];
-    boxsize_[0 * 3 + 2] = rbox[4];
-    boxsize_[1 * 3 + 0] = rbox[5];
-    boxsize_[1 * 3 + 2] = rbox[6];
-    boxsize_[2 * 3 + 0] = rbox[7];
-    boxsize_[2 * 3 + 1] = rbox[8];
-  } else {
-    assert(tag == 3);
-    boxsize_[0] = rbox[0];
-    boxsize_[4] = rbox[1];
-    boxsize_[8] = rbox[2];
-  }
-}
-
-void GroFileManager::write(const std::string &name,
-                           const std::vector<int> &resdindex,
-                           const std::vector<std::string> &resdname,
-                           const std::vector<std::string> &atomname,
-                           const std::vector<int> &atomindex,
-                           const std::vector<std::vector<double> > &posi,
-                           const std::vector<std::vector<double> > &velo,
-                           const std::vector<double> &boxsize) {
-  FILE *fp = fopen(name.c_str(), "w");
-  if (fp == NULL) {
-    std::cerr << "cannot open file " << name << std::endl;
-    return;
-  }
-  // std::copy (atomname.begin(), atomname.end(),
-  // std::ostream_iterator<std::string>(std::cout, "\n"));
-
-  fprintf(fp, "\n%d\n", int(resdindex.size()));
-  for (int i = 0; i < int(resdindex.size()); ++i) {
-    fprintf(fp, "%5d%5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n",
-            resdindex[i] % 100000, (char *)(resdname[i].c_str()),
-            (char *)(atomname[i].c_str()), atomindex[i] % 100000, posi[i][0],
-            posi[i][1], posi[i][2], velo[i][0], velo[i][1], velo[i][2]);
-  }
-  // vector<double > box(3);
-  // for (int ii = 0; ii < 3; ++ii) box[ii] = boxsize[3*ii+ii];
-  if (boxsize.size() == 3) {
-    fprintf(fp, "%f %f %f\n", boxsize[0], boxsize[1], boxsize[2]);
-  } else if (boxsize.size() == 9) {
-    fprintf(fp, "%f %f %f %f %f %f %f %f %f \n", boxsize[0 * 3 + 0],
-            boxsize[1 * 3 + 1], boxsize[2 * 3 + 2], boxsize[0 * 3 + 1],
-            boxsize[0 * 3 + 2], boxsize[1 * 3 + 0], boxsize[1 * 3 + 2],
-            boxsize[2 * 3 + 0], boxsize[2 * 3 + 1]);
-  }
-
-  fclose(fp);
-}
-
-struct F {
-  double operator()(double x) { return 1. / x; }
-};
-struct Zero {
-  double operator()(double x) { return 0; }
-};
diff --git a/source/md/src/HarmonicAngle.cc b/source/md/src/HarmonicAngle.cc
deleted file mode 100644
index 71915683ab..0000000000
--- a/source/md/src/HarmonicAngle.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "HarmonicAngle.h"
-
-#include <cmath>
-#include <iostream>
-
-#include "common.h"
-#include "mymath.h"
-
-HarmonicAngle::HarmonicAngle(const VALUETYPE& ka_, const VALUETYPE& tt_)
-    : ka(ka_), tt(tt_) {}
-
-inline bool compute_variable(const VALUETYPE* rij,
-                             const VALUETYPE* rkj,
-                             VALUETYPE* var,
-                             VALUETYPE* dvardcos,
-                             VALUETYPE* cos_theta) {
-  *cos_theta = cos<VALUETYPE>(rij[0], rij[1], rij[2], rkj[0], rkj[1], rkj[2]);
-  *var = acos(*cos_theta);
-
-  VALUETYPE cos_theta2 = *cos_theta * *cos_theta;
-  if (cos_theta2 >= 1) {
-    *dvardcos = 1.;
-    return false;
-  }
-  *dvardcos = -1. / sqrt(1. - cos_theta2);
-  return true;
-}
-
-void HarmonicAngle::compute(VALUETYPE& ener,
-                            vector<VALUETYPE>& force,
-                            vector<VALUETYPE>& virial,
-                            const vector<VALUETYPE>& coord,
-                            const vector<int>& atype,
-                            const SimulationRegion<VALUETYPE>& region,
-                            const vector<int>& alist) {
-  // all set zeros
-  for (unsigned _ = 0; _ < alist.size(); _ += 3) {
-    int ii = alist[_];
-    int jj = alist[_ + 1];
-    int kk = alist[_ + 2];
-
-    VALUETYPE rij[3], rkj[3];
-    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], rij);
-    region.diffNearestNeighbor(&coord[kk * 3], &coord[jj * 3], rkj);
-
-    VALUETYPE var(0), dvardcos(0), cos_theta(0);
-    bool apply_force = compute_variable(rij, rkj, &var, &dvardcos, &cos_theta);
-
-    VALUETYPE dudvar(0), angle_energy(0);
-    VALUETYPE diff = var - tt;
-    VALUETYPE pdiff = ka * diff;
-    dudvar = -pdiff;
-    angle_energy = VALUETYPE(0.5) * pdiff * diff;
-
-    ener += angle_energy;
-
-    // VALUETYPE fijx, fijy, fijz;
-    // VALUETYPE fkjx, fkjy, fkjz;
-    VALUETYPE fij[3];
-    VALUETYPE fkj[3];
-
-    if (apply_force) {
-      VALUETYPE dudcos = dudvar * dvardcos;
-      VALUETYPE rij2 = dot<VALUETYPE>(rij, rij);
-      VALUETYPE rkj2 = dot<VALUETYPE>(rkj, rkj);
-      VALUETYPE invrij = 1. / sqrt(rij2);
-      VALUETYPE invrkj = 1. / sqrt(rkj2);
-      VALUETYPE invrij2 = invrij * invrij;
-      VALUETYPE invrkj2 = invrkj * invrkj;
-      VALUETYPE invrijrkj = invrij * invrkj;
-      // can be further optimized:
-      fij[0] = dudcos * (rkj[0] * invrijrkj - rij[0] * invrij2 * cos_theta);
-      fij[1] = dudcos * (rkj[1] * invrijrkj - rij[1] * invrij2 * cos_theta);
-      fij[2] = dudcos * (rkj[2] * invrijrkj - rij[2] * invrij2 * cos_theta);
-      fkj[0] = dudcos * (rij[0] * invrijrkj - rkj[0] * invrkj2 * cos_theta);
-      fkj[1] = dudcos * (rij[1] * invrijrkj - rkj[1] * invrkj2 * cos_theta);
-      fkj[2] = dudcos * (rij[2] * invrijrkj - rkj[2] * invrkj2 * cos_theta);
-    } else {
-      fij[0] = fij[1] = fij[2] = fkj[0] = fkj[1] = fkj[2] = VALUETYPE(0);
-    }
-
-    force[3 * ii + 0] += fij[0];
-    force[3 * ii + 1] += fij[1];
-    force[3 * ii + 2] += fij[2];
-    force[3 * kk + 0] += fkj[0];
-    force[3 * kk + 1] += fkj[1];
-    force[3 * kk + 2] += fkj[2];
-    force[3 * jj + 0] -= fij[0] + fkj[0];
-    force[3 * jj + 1] -= fij[1] + fkj[1];
-    force[3 * jj + 2] -= fij[2] + fkj[2];
-    for (int dd0 = 0; dd0 < 3; ++dd0) {
-      for (int dd1 = 0; dd1 < 3; ++dd1) {
-        virial[dd0 * 3 + dd1] -= 0.5 * fij[dd0] * rij[dd1];
-        virial[dd0 * 3 + dd1] -= 0.5 * fkj[dd0] * rkj[dd1];
-      }
-    }
-  }
-}
diff --git a/source/md/src/HarmonicBond.cc b/source/md/src/HarmonicBond.cc
deleted file mode 100644
index cadbf46b53..0000000000
--- a/source/md/src/HarmonicBond.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "HarmonicBond.h"
-
-#include <cmath>
-#include <iostream>
-
-#include "common.h"
-
-HarmonicBond::HarmonicBond(const VALUETYPE& kk_, const VALUETYPE& bb_)
-    : kk(kk_), bb(bb_) {}
-
-void HarmonicBond::hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r1) {
-  VALUETYPE diff = r1 - bb;
-  // cout << bb << " " << r1 << endl;
-  VALUETYPE pdiff = kk * diff;
-  af = -pdiff / r1;
-  ae = 0.5 * pdiff * diff;
-}
-
-void HarmonicBond::compute(VALUETYPE& ener,
-                           vector<VALUETYPE>& force,
-                           vector<VALUETYPE>& virial,
-                           const vector<VALUETYPE>& coord,
-                           const vector<int>& atype,
-                           const SimulationRegion<VALUETYPE>& region,
-                           const vector<int>& blist) {
-  // all set zeros
-  for (unsigned _ = 0; _ < blist.size(); _ += 2) {
-    int ii = blist[_];
-    int jj = blist[_ + 1];
-    VALUETYPE diff[3];
-    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
-    VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-    VALUETYPE r1 = sqrt(r2);
-    VALUETYPE ae, af;
-    hb_inner(ae, af, r1);
-    for (int dd = 0; dd < 3; ++dd) {
-      force[ii * 3 + dd] += af * diff[dd];
-    }
-    for (int dd = 0; dd < 3; ++dd) {
-      force[jj * 3 + dd] -= af * diff[dd];
-    }
-    ener += ae;
-    for (int dd0 = 0; dd0 < 3; ++dd0) {
-      for (int dd1 = 0; dd1 < 3; ++dd1) {
-        virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-      }
-    }
-  }
-}
diff --git a/source/md/src/Integrator.cc b/source/md/src/Integrator.cc
deleted file mode 100644
index 333776c7f2..0000000000
--- a/source/md/src/Integrator.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Integrator.h"
-
-#include <cassert>
-
-template <typename VALUETYPE>
-void Integrator<VALUETYPE>::stepVeloc(vector<VALUETYPE> &vv,
-                                      const vector<VALUETYPE> &ff,
-                                      const vector<VALUETYPE> &mass,
-                                      const double &dt,
-                                      const vector<int> &freez) const {
-  int natoms = ff.size() / 3;
-  for (int kk = 0; kk < natoms; ++kk) {
-    VALUETYPE invmdt = dt / (mass[kk] * massConst);
-    vv[kk * 3 + 0] += ff[kk * 3 + 0] * invmdt;
-    vv[kk * 3 + 1] += ff[kk * 3 + 1] * invmdt;
-    vv[kk * 3 + 2] += ff[kk * 3 + 2] * invmdt;
-  }
-  for (unsigned ii = 0; ii < freez.size(); ++ii) {
-    int kk = freez[ii];
-    vv[kk * 3 + 0] = 0;
-    vv[kk * 3 + 1] = 0;
-    vv[kk * 3 + 2] = 0;
-  }
-}
-
-template <typename VALUETYPE>
-void Integrator<VALUETYPE>::stepCoord(vector<VALUETYPE> &rr,
-                                      const vector<VALUETYPE> &vv,
-                                      const double &dt) const {
-  for (unsigned kk = 0; kk < vv.size(); ++kk) {
-    rr[kk] += dt * vv[kk];
-  }
-}
-
-template <typename VALUETYPE>
-ThermostatLangevin<VALUETYPE>::ThermostatLangevin(const VALUETYPE T_,
-                                                  const VALUETYPE tau_,
-                                                  const long long int seed) {
-  reinit(T_, tau_, seed);
-}
-
-template <typename VALUETYPE>
-void ThermostatLangevin<VALUETYPE>::reinit(const VALUETYPE T_,
-                                           const VALUETYPE tau_,
-                                           const long long int seed) {
-  gaussian.set_seed(seed);
-  temperature = T_;
-  kT = UnitManager::BoltzmannConstant * T_;
-  gamma = 1. / tau_;
-  VALUETYPE twogammakT = 2. * gamma * kT;
-  sigma = 1. / sqrt(twogammakT) * twogammakT;
-  sigmainvsqrt2gamma = VALUETYPE(sigma / sqrt(2. * gamma));
-}
-
-template <typename VALUETYPE>
-void ThermostatLangevin<VALUETYPE>::stepOU(vector<VALUETYPE> &vv,
-                                           const vector<VALUETYPE> &mass,
-                                           const double &dt,
-                                           const vector<int> &freez) const {
-  VALUETYPE emgammat = exp(-gamma * dt);
-  VALUETYPE sqrt1memgammat2 = sqrt(1. - emgammat * emgammat);
-  VALUETYPE prefR = sigmainvsqrt2gamma * sqrt1memgammat2;
-
-  int numb_part = mass.size();
-  assert(int(vv.size()) == 3 * numb_part);
-
-  double *all_rands = (double *)malloc(sizeof(double) * numb_part * 3);
-  gaussian.gen(all_rands, numb_part * 3);
-
-  for (int kk = 0; kk < numb_part; ++kk) {
-    VALUETYPE sm = mass[kk] * UnitManager::IntegratorMassConstant;
-    VALUETYPE invsqrtm = 1. / sqrt(sm);
-    vv[kk * 3 + 0] =
-        emgammat * vv[kk * 3 + 0] + prefR * invsqrtm * all_rands[kk * 3 + 0];
-    vv[kk * 3 + 1] =
-        emgammat * vv[kk * 3 + 1] + prefR * invsqrtm * all_rands[kk * 3 + 1];
-    vv[kk * 3 + 2] =
-        emgammat * vv[kk * 3 + 2] + prefR * invsqrtm * all_rands[kk * 3 + 2];
-  }
-  for (unsigned ii = 0; ii < freez.size(); ++ii) {
-    int kk = freez[ii];
-    vv[kk * 3 + 0] = 0;
-    vv[kk * 3 + 1] = 0;
-    vv[kk * 3 + 2] = 0;
-  }
-
-  free(all_rands);
-}
-
-template class Integrator<float>;
-template class Integrator<double>;
-template class ThermostatLangevin<float>;
-template class ThermostatLangevin<double>;
diff --git a/source/md/src/Interpolation.cpp b/source/md/src/Interpolation.cpp
deleted file mode 100644
index 0e5e7a9214..0000000000
--- a/source/md/src/Interpolation.cpp
+++ /dev/null
@@ -1,553 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Interpolation.h"
-
-#include <iterator>
-
-void Interpolation::piece6OrderInterpol(const double& a,
-                                        const double& b,
-                                        const double& va,
-                                        const double& vb,
-                                        const double& da,
-                                        const double& db,
-                                        const double& dda,
-                                        const double& ddb,
-                                        Poly& p) {
-  std::vector<Poly> standardPolys(6);
-  for (unsigned i = 0; i < 6; ++i) {
-    standardPolys[i].getOrder() = 5;
-    standardPolys[i].getCoeffs().resize(6);
-  }
-  standardPolys[0].getCoeffs()[0] = 1;
-  standardPolys[0].getCoeffs()[1] = 0;
-  standardPolys[0].getCoeffs()[2] = 0;
-  standardPolys[0].getCoeffs()[3] = -10;
-  standardPolys[0].getCoeffs()[4] = 15;
-  standardPolys[0].getCoeffs()[5] = -6;
-
-  standardPolys[1].getCoeffs()[0] = 0;
-  standardPolys[1].getCoeffs()[1] = 0;
-  standardPolys[1].getCoeffs()[2] = 0;
-  standardPolys[1].getCoeffs()[3] = 10;
-  standardPolys[1].getCoeffs()[4] = -15;
-  standardPolys[1].getCoeffs()[5] = 6;
-
-  standardPolys[2].getCoeffs()[0] = 0;
-  standardPolys[2].getCoeffs()[1] = 1;
-  standardPolys[2].getCoeffs()[2] = 0;
-  standardPolys[2].getCoeffs()[3] = -6;
-  standardPolys[2].getCoeffs()[4] = 8;
-  standardPolys[2].getCoeffs()[5] = -3;
-
-  standardPolys[3].getCoeffs()[0] = 0;
-  standardPolys[3].getCoeffs()[1] = 0;
-  standardPolys[3].getCoeffs()[2] = 0;
-  standardPolys[3].getCoeffs()[3] = -4;
-  standardPolys[3].getCoeffs()[4] = 7;
-  standardPolys[3].getCoeffs()[5] = -3;
-
-  standardPolys[4].getCoeffs()[0] = 0;
-  standardPolys[4].getCoeffs()[1] = 0;
-  standardPolys[4].getCoeffs()[2] = 0.5;
-  standardPolys[4].getCoeffs()[3] = -1.5;
-  standardPolys[4].getCoeffs()[4] = 1.5;
-  standardPolys[4].getCoeffs()[5] = -0.5;
-
-  standardPolys[5].getCoeffs()[0] = 0;
-  standardPolys[5].getCoeffs()[1] = 0;
-  standardPolys[5].getCoeffs()[2] = 0;
-  standardPolys[5].getCoeffs()[3] = 0.5;
-  standardPolys[5].getCoeffs()[4] = -1;
-  standardPolys[5].getCoeffs()[5] = 0.5;
-
-  std::vector<Poly> scaledPolys(6);
-  double tmpa(1. / (b - a));
-  double tmpb(-a / (b - a));
-  for (unsigned i = 0; i < 6; ++i) {
-    standardPolys[i].valueLinearPoly(tmpa, tmpb, scaledPolys[i]);
-  }
-  scaledPolys[2] *= 1. / tmpa;
-  scaledPolys[3] *= 1. / tmpa;
-  scaledPolys[4] *= 1. / tmpa / tmpa;
-  scaledPolys[5] *= 1. / tmpa / tmpa;
-
-  p.zero();
-  p += (scaledPolys[0] *= va);
-  p += (scaledPolys[1] *= vb);
-  p += (scaledPolys[2] *= da);
-  p += (scaledPolys[3] *= db);
-  p += (scaledPolys[4] *= dda);
-  p += (scaledPolys[5] *= ddb);
-
-  return;
-}
-
-void Interpolation::pieceLinearInterpol(const double& a,
-                                        const double& b,
-                                        const double& va,
-                                        const double& vb,
-                                        Poly& p) {
-  double k = (vb - va) / (b - a);
-  std::vector<double> tmp(2, 0);
-  tmp[0] += va;
-  tmp[0] += k * (-a);
-  tmp[1] = k;
-  p.reinit(tmp);
-}
-
-void Interpolation::piecewiseLinear(const std::vector<double>& x,
-                                    const std::vector<double>& y,
-                                    PiecewisePoly& ps) {
-  std::vector<double>::const_iterator pxp1 = x.begin();
-  std::vector<double>::const_iterator px = (pxp1++);
-  std::vector<double>::const_iterator pyp1 = y.begin();
-  std::vector<double>::const_iterator py = (pyp1++);
-  ps.clear();
-  Poly tmpp;
-  for (; pxp1 != x.end(); ++pxp1, ++pyp1, ++px, ++py) {
-    pieceLinearInterpol(*px, *pxp1, *py, *pyp1, tmpp);
-    ps.get_x().push_back(*px);
-    ps.get_p().push_back(tmpp);
-  }
-  ps.get_x().push_back(*px);
-}
-
-void Interpolation::pieceSecondDerivativeInterpol(const double& a,
-                                                  const double& b,
-                                                  const double& va,
-                                                  const double& vb,
-                                                  const double& dda,
-                                                  const double& ddb,
-                                                  Poly& p) {
-  std::vector<double> tmp(2, 0);
-  double k = (vb - va) / (b - a);
-  tmp[0] += va;
-  tmp[0] += k * (-a);
-  tmp[1] = k;
-  p.reinit(tmp);
-
-  tmp[1] = 1;
-  tmp[0] = -a;
-  Poly l1(tmp);
-  tmp[0] = -b;
-  Poly l2(tmp);
-  l1 *= l2;
-
-  tmp[1] = 1. / 6. / (a - b);
-  tmp[0] = 1. / 6. * (a - 2 * b) / (a - b);
-  Poly p1(tmp);
-  p1 *= l1;
-  p1 *= dda;
-
-  tmp[1] *= -1;
-  tmp[0] = 1. / 6. * (b - 2 * a) / (b - a);
-  Poly p2(tmp);
-  p2 *= l1;
-  p2 *= ddb;
-
-  p += p1;
-  p += p2;
-}
-
-void Interpolation::secondDerivativeInterpol(
-    const std::vector<double>::const_iterator& xbegin,
-    const std::vector<double>::const_iterator& xend,
-    const std::vector<double>::const_iterator& vbegin,
-    const std::vector<double>::const_iterator& ddbegin,
-    PiecewisePoly& ps) {
-  ps.clear();
-  std::vector<double>::const_iterator xb(xbegin), vb(vbegin), ddb(ddbegin);
-  std::vector<double>::const_iterator xp(xbegin), vp(vbegin), ddp(ddbegin);
-  ++xp, ++vp, ++ddp;
-  while (xp != xend) {
-    ps.get_x().push_back(*xb);
-    Poly tmpp;
-    pieceSecondDerivativeInterpol(*(xb++), *(xp++), *(vb++), *(vp++), *(ddb++),
-                                  *(ddp++), tmpp);
-    ps.get_p().push_back(tmpp);
-  }
-  ps.get_x().push_back(*xb);
-}
-
-void Interpolation::pieceHermiteInterpol(const double& a,
-                                         const double& b,
-                                         const double& va,
-                                         const double& vb,
-                                         const double& da,
-                                         const double& db,
-                                         Poly& p) {
-  std::vector<double> tmp(2, 0);
-  Poly t;
-  tmp[0] = (-2 * a / (b - a) + 1);
-  tmp[1] = (2 / (b - a));
-  Poly a0(tmp);
-  tmp[0] = -b / (a - b);
-  tmp[1] = 1 / (a - b);
-  t.reinit(tmp);
-  a0 *= t;
-  a0 *= t;
-  tmp[0] = -2 * b / (a - b) + 1;
-  tmp[1] = 2 / (a - b);
-  Poly a1(tmp);
-  tmp[0] = -a / (b - a);
-  tmp[1] = 1 / (b - a);
-  t.reinit(tmp);
-  a1 *= t;
-  a1 *= t;
-
-  tmp[0] = -a;
-  tmp[1] = 1;
-  Poly b0(tmp);
-  tmp[0] = -b / (a - b);
-  tmp[1] = 1 / (a - b);
-  t.reinit(tmp);
-  b0 *= t;
-  b0 *= t;
-  tmp[0] = -b;
-  tmp[1] = 1;
-  Poly b1(tmp);
-  tmp[0] = -a / (b - a);
-  tmp[1] = 1 / (b - a);
-  t.reinit(tmp);
-  b1 *= t;
-  b1 *= t;
-
-  p.zero();
-  a0 *= va;
-  a1 *= vb;
-  b0 *= da;
-  b1 *= db;
-  p += a0;
-  p += a1;
-  p += b0;
-  p += b1;
-}
-
-// lbegin--lend, stores lambda
-// ubegin--uend, stores mu
-bool Interpolation::solverForSplinePeriodic(
-    const std::vector<double>::const_iterator& lbegin,
-    const std::vector<double>::const_iterator& lend,
-    const std::vector<double>::iterator& ubegin,
-    const std::vector<double>::iterator& uend) {
-  std::vector<double> la, lb, lc, ld;
-  for (std::vector<double>::const_iterator i = lbegin; i != lend; ++i) {
-    la.push_back(1 - *i);
-    lb.push_back(2);
-    lc.push_back(*i);
-    ld.push_back(0);
-  }
-  //  ld.front() = 1 - *lbegin;
-  ld[0] = 1 - lc[0];
-  int num = ld.size();
-  ld[num - 2] = lc[num - 2];
-  ld[num - 1] = lb[num - 1];
-
-  std::vector<double>::iterator pu = ubegin;
-  std::vector<double>::iterator pu_1 = pu++;
-  for (int i = 1; i < num - 1; ++i, ++pu, ++pu_1) {
-    if (lb[i - 1] == 0) {
-      return false;
-    }
-    double ratio = -la[i] / lb[i - 1];
-    lb[i] += lc[i - 1] * ratio;
-    ld[i] += ld[i - 1] * ratio;
-    *pu += *pu_1 * ratio;
-  }
-  int i = num - 1;
-  if (lb[i - 1] == 0) {
-    return false;
-  }
-  double ratio = -la[i] / lb[i - 1];
-  lb[i] += ld[i - 1] * ratio;
-  ld[i] = lb[i];
-  *pu += *pu_1 * ratio;
-
-  //   std::cout << lc.back() << std::endl;
-  //   std::cout << lc.front() << std::endl;
-  ratio = -lb[0] / lc.back();
-  ld[0] += ratio * ld[num - 1];
-  *ubegin += ratio * *pu;
-  lb[0] = 0;
-
-  //   std::cout << ld.size() << std::endl;
-  ld.insert(ld.begin(), ld.back());
-  //   std::cout << ld.size() << std::endl;
-  ld.pop_back();
-  //   std::cout << ld.size() << std::endl;
-  double before = 0.;
-  //   std::cout << "##############################" << std::endl;
-  //   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
-  //   std::cout << "##############################" << std::endl;
-  for (std::vector<double>::iterator tmpu = ubegin; tmpu != uend; ++tmpu) {
-    if (tmpu == ubegin) {
-      before = *tmpu;
-      *tmpu = *pu;
-    } else {
-      double beforetmp = *tmpu;
-      *tmpu = before;
-      before = beforetmp;
-    }
-  }
-  //   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
-  //   std::cout << "##############################" << std::endl;
-  lc.insert(lc.begin(), *lbegin);
-  lc.pop_back();
-  lc.back() = ld.back();
-  lb.insert(lb.begin(), 0.);
-  lb.pop_back();
-
-  pu = ubegin;
-  pu++;
-  pu_1 = pu++;
-  for (int i = 2; i < num - 1; ++i, ++pu, ++pu_1) {
-    if (lc[i - 1] == 0) {
-      return false;
-    }
-    double ratio = -lb[i] / lc[i - 1];
-    ld[i] += ld[i - 1] * ratio;
-    *pu += *pu_1 * ratio;
-  }
-  i = num - 1;
-  if (lc[i - 1] == 0) {
-    return false;
-  }
-  ratio = -lb[i] / lc[i - 1];
-  lc[i] += ld[i - 1] * ratio;
-  ld[i] = lc[i];
-  *pu += *pu_1 * ratio;
-
-  *pu /= lc[num - 1];
-  for (int i = num - 2; i >= 0; --i, --pu_1) {
-    *pu_1 = (*pu_1 - *pu * ld[i]) / lc[i];
-  }
-
-  return true;
-}
-
-bool Interpolation::splinePeriodic(const std::vector<double>& x,
-                                   const std::vector<double>& y,
-                                   PiecewisePoly& ps) {
-  std::vector<double> lambda(x.size() - 1);
-  std::vector<double> mu(x.size() - 1);
-  std::vector<double> dx;
-
-  std::vector<double>::const_iterator i = x.begin();
-  std::vector<double>::const_iterator j = i;
-  for (++j; j != x.end(); ++i, ++j) {
-    dx.push_back(*j - *i);
-  }
-  lambda[0] = dx.back() / (dx.back() + dx.front());
-  mu[0] = 3 * ((1 - lambda.front()) / dx.back() * (y[0] - y[y.size() - 2]) +
-               lambda.front() / dx.front() * (y[1] - y[0]));
-  for (unsigned i = 1; i < lambda.size(); ++i) {
-    lambda[i] = dx[i - 1] / (dx[i - 1] + dx[i]);
-    mu[i] = 3 * ((1 - lambda[i]) / dx[i - 1] * (y[i] - y[i - 1]) +
-                 lambda[i] / dx[i] * (y[i + 1] - y[i]));
-  }
-
-  bool tag = solverForSplinePeriodic(lambda.begin(), lambda.end(), mu.begin(),
-                                     mu.end());
-  if (!tag) {
-    return false;
-  }
-
-  ps.get_x() = x;
-  ps.get_p().clear();
-  for (unsigned i = 0; i < x.size() - 2; ++i) {
-    Poly tmpp;
-    pieceHermiteInterpol(x[i], x[i + 1], y[i], y[i + 1], mu[i], mu[i + 1],
-                         tmpp);
-    ps.get_p().push_back(tmpp);
-  }
-  Poly tmpp;
-  pieceHermiteInterpol(x[x.size() - 2], x[x.size() - 2 + 1], y[x.size() - 2],
-                       y[x.size() - 2 + 1], mu[x.size() - 2], mu[0], tmpp);
-  ps.get_p().push_back(tmpp);
-  return true;
-}
-
-bool Interpolation::spline(const std::vector<double>& x,
-                           const std::vector<double>& y,
-                           PiecewisePoly& ps) {
-  std::vector<double> lambda(x.size());
-  std::vector<double> mu(x.size());
-  std::vector<double> m(x.size());
-  std::vector<double> dx;
-
-  std::vector<double>::const_iterator i = x.begin();
-  std::vector<double>::const_iterator j = i;
-  for (++j; j != x.end(); ++i, ++j) {
-    dx.push_back(*j - *i);
-  }
-
-  lambda.front() = 1;
-  lambda.back() = 0;
-  mu.front() = 3 * ((*(++(y.begin()))) - y.front()) / dx.front();
-  mu.back() = 3 * (y.back() - (*(++(y.rbegin())))) / dx.back();
-  std::vector<double>::iterator pdx0 = dx.begin();
-  std::vector<double>::iterator pdx1 = pdx0;
-  ++pdx1;
-  std::vector<double>::const_iterator py0 = y.begin();
-  std::vector<double>::const_iterator py1 = py0;
-  ++py1;
-  std::vector<double>::const_iterator py2 = py1;
-  ++py2;
-  std::vector<double>::iterator plambda = lambda.begin();
-  ++plambda;
-  std::vector<double>::iterator pmu = mu.begin();
-  ++pmu;
-  for (; py2 != y.end();
-       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu) {
-    *plambda = *pdx0 / (*pdx0 + *pdx1);
-    *pmu = 3 * ((1 - *plambda) / *pdx0 * (*py1 - *py0) +
-                *plambda / *pdx1 * (*py2 - *py1));
-  }
-
-  //   for (unsigned i = 1; i < x.size()-1; ++i){
-  //     lambda[i] = dx[i-1] / (dx[i-1] + dx[i]);
-  //     mu[i] = 3 * ((1-lambda[i]) / dx[i-1] * (y[i] - y[i-1]) +
-  // 		 lambda[i] / dx[i] * (y[i+1] - y[i]));
-  //   }
-
-  double bet;
-  std::vector<double> gam(x.size());
-  m[0] = mu[0] / (bet = 2);
-  for (unsigned j = 1; j < x.size(); ++j) {
-    gam[j] = lambda[j - 1] / bet;
-    bet = 2 - (1 - lambda[j]) * gam[j];
-    if (bet == 0) {
-      std::cerr << "a error in triangle solver\n";
-      return false;
-    }
-    m[j] = (mu[j] - (1 - lambda[j]) * m[j - 1]) / bet;
-  }
-  for (int j = x.size() - 2; j >= 0; --j) {
-    m[j] -= gam[j + 1] * m[j + 1];
-  }
-
-  ps.clear();
-  ps.get_x() = x;
-  std::vector<double>::const_iterator px0 = x.begin();
-  std::vector<double>::const_iterator px1 = px0;
-  ++px1;
-  py0 = y.begin();
-  py1 = py0;
-  ++py1;
-  std::vector<double>::iterator pm0 = m.begin();
-  std::vector<double>::iterator pm1 = pm0;
-  ++pm1;
-  for (; px1 != x.end(); ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1) {
-    Poly tmpp;
-    pieceHermiteInterpol(*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
-    ps.get_p().push_back(tmpp);
-  }
-
-  return true;
-}
-
-bool Interpolation::spline(const std::vector<double>::const_iterator xbegin,
-                           const std::vector<double>::const_iterator xend,
-                           const std::vector<double>::const_iterator ybegin,
-                           PiecewisePoly& ps) {
-  int xsize = 0;
-  std::vector<double>::const_iterator itmp = xbegin;
-  while (itmp++ != xend) {
-    ++xsize;
-  }
-
-  std::vector<double> lambda(xsize);
-  std::vector<double> mu(xsize);
-  std::vector<double> m(xsize);
-  std::vector<double> dx;
-
-  // setup linear system
-  std::vector<double>::const_iterator i = xbegin;
-  std::vector<double>::const_iterator j = i;
-  for (++j; j != xend; ++i, ++j) {
-    dx.push_back(*j - *i);
-  }
-  lambda.front() = 1;
-  lambda.back() = 0;
-  mu.front() = 3 * ((*(++(itmp = ybegin))) - *ybegin) / dx.front();
-  std::vector<double>::iterator pdx0 = dx.begin();
-  std::vector<double>::iterator pdx1 = pdx0;
-  ++pdx1;
-  std::vector<double>::const_iterator py0 = ybegin;
-  std::vector<double>::const_iterator py1 = py0;
-  ++py1;
-  std::vector<double>::const_iterator py2 = py1;
-  ++py2;
-  std::vector<double>::iterator plambda = lambda.begin();
-  ++plambda;
-  std::vector<double>::iterator pmu = mu.begin();
-  ++pmu;
-  for (; pdx1 != dx.end();
-       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu) {
-    *plambda = *pdx0 / (*pdx0 + *pdx1);
-    *pmu = 3 * ((1 - *plambda) / *pdx0 * (*py1 - *py0) +
-                *plambda / *pdx1 * (*py2 - *py1));
-  }
-  mu.back() = 3 * (*py1 - *py0) / dx.back();
-
-  // solve tridiangonal linear system
-  double bet;
-  std::vector<double> gam(xsize);
-  m[0] = mu[0] / (bet = 2);
-  for (int j = 1; j < xsize; ++j) {
-    gam[j] = lambda[j - 1] / bet;
-    bet = 2 - (1 - lambda[j]) * gam[j];
-    if (bet == 0) {
-      std::cerr << "a error in triangle solver\n";
-      return false;
-    }
-    m[j] = (mu[j] - (1 - lambda[j]) * m[j - 1]) / bet;
-  }
-  for (int j = xsize - 2; j >= 0; --j) {
-    m[j] -= gam[j + 1] * m[j + 1];
-  }
-
-  // make piecewise polynominal
-  ps.get_p().clear();
-  ps.get_x().resize(xsize);
-  std::copy(xbegin, xend, ps.get_x().begin());
-  std::vector<double>::const_iterator px0 = xbegin;
-  std::vector<double>::const_iterator px1 = px0;
-  ++px1;
-  py0 = ybegin;
-  py1 = py0;
-  ++py1;
-  std::vector<double>::iterator pm0 = m.begin();
-  std::vector<double>::iterator pm1 = pm0;
-  ++pm1;
-  for (; px1 != xend; ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1) {
-    Poly tmpp;
-    pieceHermiteInterpol(*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
-    ps.get_p().push_back(tmpp);
-  }
-
-  return true;
-}
-
-// void tridag(float a[], float b[], float c[], float r[], float u[],
-// 	    unsigned long n)
-// //Solves for a vector u[1..n] the tridiagonal linear set given by equation
-// (2.4.1). a[1..n],
-// //  b[1..n], c[1..n], and r[1..n] are input vectors and are not modified.
-// {
-//   unsigned long j;
-//   float bet,*gam;
-//   gam=vector(1,n); //One vector of workspace, gam is needed.
-//   //If this happens then you should rewrite your equations as a set of order
-//   N-1, w ith u2
-//   //trivially eliminated.
-//   u[0]=r[0]/(bet=2);
-//   for (j=1;j<=n;j++) { //Decomposition and forward substitution.
-//     gam[j]=c[j-1]/bet;
-//     bet=2-a[j]*gam[j];
-//     if (bet == 0.0) //nrerror("Error 2 in tridag"); //Algorithm fails; see be
-//     u[j]=(r[j]-a[j]*u[j-1])/bet; //low.
-//   }
-//   for (j=(n-1);j>=1;j--)
-//     u[j] -= gam[j+1]*u[j+1]; //Backsubstitution.
-//   free_vector(gam,1,n);
-// }
diff --git a/source/md/src/LJInter.cc b/source/md/src/LJInter.cc
deleted file mode 100644
index e4c4f1097e..0000000000
--- a/source/md/src/LJInter.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "LJInter.h"
-
-#include <cmath>
-
-#include "common.h"
-
-LJInter::LJInter(const VALUETYPE& c6_,
-                 const VALUETYPE& c12_,
-                 const VALUETYPE& rc_)
-    : c6(6. * c6_), c12(12. * c12_), rc(rc_), rc2(rc * rc) {
-  one_over_6 = 1. / 6.;
-  one_over_12 = 1. / 12.;
-  VALUETYPE rc6 = rc2 * rc2 * rc2;
-  one_over_rc6 = 1. / rc6;
-  one_over_rc12 = 1. / rc6 / rc6;
-}
-
-void LJInter::lj_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2) {
-  VALUETYPE rinv = 1. / sqrt(r2);
-  VALUETYPE rinv2 = rinv * rinv;
-  VALUETYPE rinv6 = rinv2 * rinv2 * rinv2;
-  VALUETYPE vvdw6 = c6 * rinv6;
-  VALUETYPE vvdw12 = c12 * rinv6 * rinv6;
-  ae = (vvdw12 - c12 * one_over_rc12) * one_over_12 -
-       (vvdw6 - c6 * one_over_rc6) * one_over_6;
-  af = (vvdw12 - vvdw6) * rinv2;
-}
-
-void LJInter::compute(VALUETYPE& ener,
-                      vector<VALUETYPE>& force,
-                      vector<VALUETYPE>& virial,
-                      const vector<VALUETYPE>& coord,
-                      const vector<int>& atype,
-                      const SimulationRegion<VALUETYPE>& region,
-                      const vector<vector<int> >& nlist) {
-  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
-      int jj = nlist[ii][_];
-      if (jj < ii) {
-        continue;
-      }
-      VALUETYPE diff[3];
-      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
-      VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-      if (r2 < rc2) {
-        VALUETYPE ae, af;
-        lj_inner(ae, af, r2);
-        for (int dd = 0; dd < 3; ++dd) {
-          force[ii * 3 + dd] += af * diff[dd];
-        }
-        for (int dd = 0; dd < 3; ++dd) {
-          force[jj * 3 + dd] -= af * diff[dd];
-        }
-        ener += ae;
-        for (int dd0 = 0; dd0 < 3; ++dd0) {
-          for (int dd1 = 0; dd1 < 3; ++dd1) {
-            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-          }
-        }
-      }
-    }
-  }
-
-  // for (int ii = 0; ii < natoms; ++ii){
-  //   for (int jj = ii+1; jj < natoms; ++jj){
-  //     VALUETYPE diff[3];
-  //     for (int dd = 0; dd < 3; ++dd) diff[dd] = coord[ii*3+dd] -
-  //     coord[jj*3+dd]; diff_pbc (diff, box); VALUETYPE r2 = diff[0] * diff[0]
-  //     + diff[1] * diff[1] + diff[2] * diff[2]; if (r2 < rc2) {
-  // 	VALUETYPE ae, af;
-  // 	lj_inner (ae, af, r2);
-  // 	for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
-  // 	for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] -= af * diff[dd];
-  // 	ener += ae;
-  //     }
-  //   }
-  // }
-}
diff --git a/source/md/src/LJTab.cc b/source/md/src/LJTab.cc
deleted file mode 100644
index a605c172fe..0000000000
--- a/source/md/src/LJTab.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "LJTab.h"
-
-LJTab::LJTab(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc) {
-  VALUETYPE rcp = rc + 1;
-  VALUETYPE hh = 2e-3;
-  int nn = rcp / hh;
-  vector<VALUETYPE> tab;
-  VALUETYPE rc6 = rc * rc * rc * rc * rc * rc;
-  VALUETYPE one_over_rc6 = 1. / rc6;
-  VALUETYPE one_over_rc12 = 1. / rc6 / rc6;
-  for (int ii = 0; ii < nn; ++ii) {
-    VALUETYPE xx = ii * hh;
-    VALUETYPE value, deriv;
-    if (xx <= rc) {
-      VALUETYPE xx3 = xx * xx * xx;
-      VALUETYPE xx6 = xx3 * xx3;
-      VALUETYPE xx12 = xx6 * xx6;
-      value = -c6 / xx6 + c12 / xx12 + c6 * one_over_rc6 - c12 * one_over_rc12;
-      deriv = -(6. * c6 / xx6 - 12. * c12 / xx12) / xx;
-    } else {
-      value = deriv = 0;
-    }
-    tab.push_back(value);
-    tab.push_back(deriv);
-  }
-  lj_tab.reinit(rcp, hh, tab);
-}
diff --git a/source/md/src/MaxShift.cc b/source/md/src/MaxShift.cc
deleted file mode 100644
index 484078d51a..0000000000
--- a/source/md/src/MaxShift.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "MaxShift.h"
-
-#include <cassert>
-
-#include "common.h"
-
-MaxShift::MaxShift(const vector<VALUETYPE>& dcoord, const VALUETYPE& shell_) {
-  record = dcoord;
-  shell = shell_;
-  max_allow2 = shell * 0.5 * shell * 0.5;
-}
-
-VALUETYPE
-MaxShift::max_shift2(const vector<VALUETYPE>& coord,
-                     const SimulationRegion<VALUETYPE>& region) {
-  assert(coord.size() == record.size());
-  int natoms = coord.size() / 3;
-
-  VALUETYPE maxv = 0;
-
-  for (int ii = 0; ii < natoms; ++ii) {
-    VALUETYPE diff[3];
-    region.diffNearestNeighbor(&coord[ii * 3], &record[ii * 3], diff);
-    VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-    if (r2 > maxv) {
-      maxv = r2;
-    }
-  }
-
-  return maxv;
-}
-
-bool MaxShift::rebuild(const vector<VALUETYPE>& coord,
-                       const SimulationRegion<VALUETYPE>& region) {
-  VALUETYPE maxv2 = max_shift2(coord, region);
-  if (maxv2 > max_allow2) {
-    record = coord;
-    return true;
-  } else {
-    return false;
-  }
-}
diff --git a/source/md/src/Poly.cpp b/source/md/src/Poly.cpp
deleted file mode 100644
index 49d2897f14..0000000000
--- a/source/md/src/Poly.cpp
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Poly.h"
-
-bool PiecewisePoly::valid() const {
-  if (x.size() != p.size() + 1) {
-    return false;
-  }
-  std::vector<double>::const_iterator i = x.begin();
-  std::vector<double>::const_iterator j = x.begin();
-  for (++j; j != x.end(); ++i, ++j) {
-    if (*i > *j) {
-      return false;
-    }
-  }
-  return true;
-}
-
-double PiecewisePoly::value(const double& xx) const {
-  unsigned begin = 0;
-  unsigned end = x.size() - 1;
-  unsigned mid = end / 2;
-  if (end == begin) {
-    return 0;
-  }
-  while (end - begin > 1) {
-    if (xx < x[mid]) {
-      end = mid;
-      mid = (begin + end) / 2;
-    } else {
-      begin = mid;
-      mid = (begin + end) / 2;
-    }
-  }
-  return p[begin].value(xx);
-}
-
-double PiecewisePoly::value_periodic(const double& xx_) const {
-  double xx(xx_);
-  double T = x.back() - x.front();
-  if (xx < x.front()) {
-    while ((xx += T) < x.front())
-      ;
-  } else if (xx >= x.back()) {
-    while ((xx -= T) >= x.back())
-      ;
-  }
-  unsigned begin = 0;
-  unsigned end = x.size() - 1;
-  unsigned mid = end / 2;
-  if (end == begin) {
-    return 0;
-  }
-  while (end - begin > 1) {
-    if (xx < x[mid]) {
-      end = mid;
-      mid = (begin + end) / 2;
-    } else {
-      begin = mid;
-      mid = (begin + end) / 2;
-    }
-  }
-  return p[begin].value(xx);
-}
-
-double PiecewisePoly::value(const double& xx,
-                            unsigned& begin,
-                            unsigned& end) const {
-  if (end <= begin) {
-    return 0;
-  }
-  if (end - begin == 1) {
-    return p[begin].value(xx);
-  }
-  unsigned mid = (begin + end) / 2;
-  while (end - begin > 1) {
-    if (xx < x[mid]) {
-      end = mid;
-      mid = (begin + end) / 2;
-    } else {
-      begin = mid;
-      mid = (begin + end) / 2;
-    }
-  }
-  return p[begin].value(xx);
-}
-
-void PiecewisePoly::value(const unsigned& xbegin,
-                          const unsigned& xend,
-                          const std::vector<double>& r,
-                          const unsigned& rbegin,
-                          const unsigned& rend,
-                          std::vector<double>& y) const {
-  unsigned xbegin1 = xbegin;
-  unsigned xend1 = xend;
-  if (rend - rbegin <= 1) {
-    y[rbegin] = value(r[rbegin], xbegin1, xend1);
-    xbegin1 = xbegin;
-    xend1 = xend;
-    y[rend] = value(r[rend], xbegin1, xend1);
-  } else {
-    unsigned rmid = (rbegin + rend) / 2;
-    y[rmid] = value(r[rmid], xbegin1, xend1);
-    value(xbegin, xend1, r, rbegin, rmid - 1, y);
-    value(xbegin1, xend, r, rmid + 1, rend, y);
-  }
-}
-
-// suppose that
-void PiecewisePoly::value(const std::vector<double>& r,
-                          std::vector<double>& y) const {
-  y.resize(r.size());
-  value(0, x.size() - 1, r, 0, r.size() - 1, y);
-}
-
-// suppose that
-void PiecewisePoly::value_periodic(const std::vector<double>& r,
-                                   std::vector<double>& y) const {
-  std::vector<double> tmpr;
-  std::vector<double> tmpy;
-  std::vector<std::vector<double> > values;
-  unsigned presentEnd(0), presentStart(0);
-  double T = x.back() - x.front();
-
-  while (presentEnd < r.size()) {
-    tmpr.clear();
-    presentStart = presentEnd;
-    double shift = 0;
-    if (r[presentStart] < x.front()) {
-      while (r[presentStart] + (shift += T) < x.front())
-        ;
-    } else if (r[presentStart] >= x.back()) {
-      while (r[presentStart] + (shift -= T) >= x.back())
-        ;
-    }
-    while (presentEnd < r.size() && r[presentEnd] + shift >= x.front() &&
-           r[presentEnd] + shift < x.back()) {
-      tmpr.push_back(r[presentEnd++] + shift);
-    }
-    // while (presentEnd < r.size() && r[presentEnd] - r[presentStart] < T){
-    //   tmpr.push_back (r[presentEnd++]);
-    // }
-    // for (unsigned i = 0; i < tmpr.size(); ++i){
-    //   tmpr[i] += shift;
-    // }
-    value(tmpr, tmpy);
-    values.push_back(tmpy);
-  }
-
-  y.clear();
-  for (unsigned i = 0; i < values.size(); ++i) {
-    y.insert(y.end(), values[i].begin(), values[i].end());
-  }
-}
-
-Poly& Poly::valueLinearPoly(const double& a_, const double& b_, Poly& p) {
-  std::vector<double> tmp(2, a_);
-  tmp[0] = b_;
-  Poly axb(tmp);
-  p.one();
-  p *= a.back();
-  for (int i = order - 1; i >= 0; i--) {
-    (p *= axb) += a[i];
-  }
-  return p;
-}
-
-double Poly::value(const double& x) const {
-  double value = a[a.size() - 1];
-  for (int i = a.size() - 2; i >= 0; --i) {
-    value = value * x + a[i];
-  }
-  return value;
-}
-
-Poly::Poly() : a(1, 0.), order(0.) {}
-
-Poly::Poly(const std::vector<double>& out) : a(out) { order = out.size() - 1; }
-
-void Poly::reinit(const std::vector<double>& out) {
-  a = out;
-  order = out.size() - 1;
-}
-
-Poly& Poly::operator=(const Poly& p) {
-  a = p.a;
-  order = p.order;
-  return *this;
-}
-
-Poly& Poly::operator+=(const Poly& p) {
-  if (p.a.size() > a.size()) {
-    a.resize(p.a.size(), 0);
-    order = p.order;
-    for (unsigned i = 0; i <= order; i++) {
-      a[i] += p.a[i];
-    }
-  } else {
-    for (unsigned i = 0; i <= p.order; i++) {
-      a[i] += p.a[i];
-    }
-  }
-  return *this;
-}
-
-Poly& Poly::operator+=(const double& b) {
-  a[0] += b;
-  return *this;
-}
-
-Poly& Poly::derivative() {
-  if (order == 0) {
-    a[0] = 0;
-    return *this;
-  }
-  for (unsigned i = 0; i < order; i++) {
-    a[i] = a[i + 1] * (i + 1);
-  }
-  order--;
-  a.pop_back();
-  return *this;
-}
-
-Poly& Poly::operator*=(const double& scale) {
-  if (scale == 0) {
-    order = 0;
-    a.resize(1);
-    a[0] = 0;
-  } else {
-    for (std::vector<double>::iterator i = a.begin(); i != a.end(); i++) {
-      *i *= scale;
-    }
-  }
-  return *this;
-}
-
-Poly& Poly::operator*=(const Poly& p) {
-  std::vector<double> a1(a);
-  unsigned order1(order);
-
-  order += p.order;
-  a.resize(order + 1, 0);
-
-  for (std::vector<double>::iterator i = a.begin(); i != a.end(); i++) {
-    *i *= p.a[0];
-  }
-  if (p.order >= 1) {
-    for (unsigned i = 1; i <= p.order; i++) {
-      for (unsigned j = 0; j <= order1; j++) {
-        a[i + j] += a1[j] * p.a[i];
-      }
-    }
-  }
-  return *this;
-}
-
-void Poly::print() {
-  for (unsigned i = 0; i <= order; i++) {
-    std::cout << a[i] << '\t';
-  }
-  std::cout << std::endl;
-}
-
-void Poly::print(const std::string& x) {
-  std::cout << a[0];
-  for (unsigned i = 1; i <= order; i++) {
-    std::cout << " + " << a[i] << x << "^" << i;
-  }
-  std::cout << std::endl;
-}
-
-void Poly::printCode(const std::string& x) {
-  std::cout.precision(16);
-  if (order == 0) {
-    std::cout << a[0] << std::endl;
-    return;
-  }
-
-  for (unsigned i = 0; i < order - 1; i++) {
-    std::cout << "(";
-  }
-  std::vector<double>::reverse_iterator p = a.rbegin();
-  std::cout << *(p++) << " * " << x << " + ";
-  std::cout << *(p++);
-  for (; p != a.rend(); p++) {
-    std::cout << ") * " << x << " + " << *p;
-  }
-  std::cout << std::endl;
-}
diff --git a/source/md/src/RandomGenerator_MT19937.cc b/source/md/src/RandomGenerator_MT19937.cc
deleted file mode 100644
index 12f445874a..0000000000
--- a/source/md/src/RandomGenerator_MT19937.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include <cmath>
-
-#include "RandomGenerator.h"
-
-/*
-   A C-program for MT19937, with initialization improved 2002/1/26.
-   Coded by Takuji Nishimura and Makoto Matsumoto.
-
-   Before using, initialize the state by using init_genrand(seed)
-   or init_by_array(init_key, key_length).
-
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-
-     3. The names of its contributors may not be used to endorse or promote
-        products derived from this software without specific prior written
-        permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-   Any feedback is very welcome.
-   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
-   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-*/
-
-/* Period parameters */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
-#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
-
-static unsigned long mt[N]; /* the array for the state vector  */
-static int mti = N + 1;     /* mti==N+1 means mt[N] is not initialized */
-
-// using namespace RandomGenerator_MT19937;
-
-/* initializes mt[N] with a seed */
-void RandomGenerator_MT19937::init_genrand(unsigned long s) {
-  mt[0] = s & 0xffffffffUL;
-  for (mti = 1; mti < N; mti++) {
-    mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
-    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-    /* In the previous versions, MSBs of the seed affect   */
-    /* only MSBs of the array mt[].                        */
-    /* 2002/01/09 modified by Makoto Matsumoto             */
-    mt[mti] &= 0xffffffffUL;
-    /* for >32 bit machines */
-  }
-}
-
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-/* slight change for C++, 2004/2/26 */
-void RandomGenerator_MT19937::init_by_array(unsigned long init_key[],
-                                            int key_length) {
-  int i, j, k;
-  init_genrand(19650218UL);
-  i = 1;
-  j = 0;
-  k = (N > key_length ? N : key_length);
-  for (; k; k--) {
-    mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) +
-            init_key[j] + j; /* non linear */
-    mt[i] &= 0xffffffffUL;   /* for WORDSIZE > 32 machines */
-    i++;
-    j++;
-    if (i >= N) {
-      mt[0] = mt[N - 1];
-      i = 1;
-    }
-    if (j >= key_length) {
-      j = 0;
-    }
-  }
-  for (k = N - 1; k; k--) {
-    mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) -
-            i;             /* non linear */
-    mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-    i++;
-    if (i >= N) {
-      mt[0] = mt[N - 1];
-      i = 1;
-    }
-  }
-
-  mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-unsigned long RandomGenerator_MT19937::genrand_int32(void) {
-  unsigned long y;
-  static unsigned long mag01[2] = {0x0UL, MATRIX_A};
-  /* mag01[x] = x * MATRIX_A  for x=0,1 */
-
-  if (mti >= N) { /* generate N words at one time */
-    int kk;
-
-    if (mti == N + 1) {     /* if init_genrand() has not been called, */
-      init_genrand(5489UL); /* a default initial seed is used */
-    }
-
-    for (kk = 0; kk < N - M; kk++) {
-      y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
-      mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & 0x1UL];
-    }
-    for (; kk < N - 1; kk++) {
-      y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
-      mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
-    }
-    y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
-    mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-
-    mti = 0;
-  }
-
-  y = mt[mti++];
-
-  /* Tempering */
-  y ^= (y >> 11);
-  y ^= (y << 7) & 0x9d2c5680UL;
-  y ^= (y << 15) & 0xefc60000UL;
-  y ^= (y >> 18);
-
-  return y;
-}
-
-/* generates a random number on [0,0x7fffffff]-interval */
-long RandomGenerator_MT19937::genrand_int31(void) {
-  return (long)(genrand_int32() >> 1);
-}
-
-/* generates a random number on [0,1]-real-interval */
-double RandomGenerator_MT19937::genrand_real1(void) {
-  return genrand_int32() * (1.0 / 4294967295.0);
-  /* divided by 2^32-1 */
-}
-
-/* generates a random number on [0,1)-real-interval */
-double RandomGenerator_MT19937::genrand_real2(void) {
-  return genrand_int32() * (1.0 / 4294967296.0);
-  /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1)-real-interval */
-double RandomGenerator_MT19937::genrand_real3(void) {
-  return (((double)genrand_int32()) + 0.5) * (1.0 / 4294967296.0);
-  /* divided by 2^32 */
-}
-
-/* generates a random number on [0,1) with 53-bit resolution*/
-double RandomGenerator_MT19937::genrand_res53(void) {
-  unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6;
-  return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
-}
-/* These real versions are due to Isaku Wada, 2002/01/09 added */
diff --git a/source/md/src/Statistics.cc b/source/md/src/Statistics.cc
deleted file mode 100644
index 6241d43d06..0000000000
--- a/source/md/src/Statistics.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include <fstream>
-#include <iostream>
-// #include <iomanip>
-#include <stdio.h>
-
-#include "Statistics.h"
-#include "UnitManager.h"
-
-template <typename VALUETYPE>
-Statistics<VALUETYPE>::Statistics(const VALUETYPE e_corr_,
-                                  const VALUETYPE p_corr_)
-    : e_corr(e_corr_), p_corr(p_corr_) {}
-
-template <typename VALUETYPE>
-void Statistics<VALUETYPE>::record(const VALUETYPE& ener,
-                                   const vector<VALUETYPE>& virial,
-                                   const vector<VALUETYPE>& veloc,
-                                   const vector<VALUETYPE>& mass,
-                                   const SimulationRegion<VALUETYPE>& region_) {
-  r_pot_ener = ener;
-  r_vir.resize(9);
-  for (unsigned ii = 0; ii < 9; ++ii) {
-    r_vir[ii] = virial[ii];
-  }
-  // r_box.resize(6);
-  // for (unsigned ii = 0; ii < 6; ++ii){
-  //   r_box[ii] = box[ii];
-  // }
-  region.reinitBox(region_.getBoxTensor());
-  natoms = mass.size();
-  r_kin_ener = 0;
-  double pref = 0.5 * UnitManager::IntegratorMassConstant;
-  for (int ii = 0; ii < natoms; ++ii) {
-    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 0] * veloc[3 * ii + 0];
-    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 1] * veloc[3 * ii + 1];
-    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 2] * veloc[3 * ii + 2];
-  }
-}
-
-template <typename VALUETYPE>
-double Statistics<VALUETYPE>::get_T() const {
-  return get_ekin() / (natoms * 3. * UnitManager::BoltzmannConstant) * 2.;
-}
-
-template <typename VALUETYPE>
-double Statistics<VALUETYPE>::get_V() const {
-  // return (r_box[1] - r_box[0]) * (r_box[3] - r_box[2]) * (r_box[5] -
-  // r_box[4]);
-  return region.getVolume();
-}
-
-template <typename VALUETYPE>
-double Statistics<VALUETYPE>::get_P() const {
-  return (get_ekin() - (r_vir[0] + r_vir[4] + r_vir[8])) * 2. / 3. / get_V() *
-             UnitManager::PressureConstant +
-         p_corr;
-}
-
-template <typename VALUETYPE>
-void Statistics<VALUETYPE>::print(ostream& os,
-                                  const int& step,
-                                  const double time) const {
-  char tmps[65536];
-  sprintf(tmps,
-          "%13.4f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f\n",
-          time, get_ekin(), get_epot(), get_ekin() + get_epot(), get_T(),
-          get_P(), r_vir[0], r_vir[4], r_vir[8]);
-  os << tmps;
-  // os << setw(7) << setprecision(6) << time << setprecision (8) << setfill ('
-  // ')
-  //    << setw(15) << get_ekin() << " "
-  //    << setw(15) << get_epot() << " "
-  //    << setw(15) << get_ekin() + get_epot() << " "
-  //    << setw(15) << get_T() << " "
-  //    << setw(15) << get_P() << " "
-  //    << setw(15) << r_vir[0] << " "
-  //    << setw(15) << r_vir[4] << " "
-  //    << setw(15) << r_vir[8] << " "
-  //    << endl;
-}
-
-template <typename VALUETYPE>
-void Statistics<VALUETYPE>::print_head(ostream& os) const {
-  char tmps[65536];
-  sprintf(tmps, "#%12s %15s %15s %15s %15s %15s %15s %15s %15s\n", "time",
-          "Kinetic", "Potential", "E_tot", "Temperature", "Pressure", "Vxx",
-          "Vyy", "Vzz");
-  os << tmps;
-  // os << "#";
-  // os << setw(6) <<  "time" << setfill (' ')
-  //    << setw(15) << "Kinetic" << " "
-  //    << setw(15) << "Potential" << " "
-  //    << setw(15) << "E_tot" << " "
-  //    << setw(15) << "Temperature" << " "
-  //    << setw(15) << "Pressure" << " "
-  //    << setw(15) << "Vxx" << " "
-  //    << setw(15) << "Vyy" << " "
-  //    << setw(15) << "Vzz" << " "
-  //    << endl;
-}
-
-template class Statistics<float>;
-template class Statistics<double>;
diff --git a/source/md/src/StringSplit.cpp b/source/md/src/StringSplit.cpp
deleted file mode 100644
index a34c6a43ab..0000000000
--- a/source/md/src/StringSplit.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "StringSplit.h"
-
-void StringOperation::split(const std::string& in,
-                            std::vector<std::string>& out) {
-  std::istringstream iss(in);
-  out.clear();
-
-  do {
-    std::string sub;
-    iss >> sub;
-    out.push_back(sub);
-    // std::vector<std::string > tokens;
-    // tokens.push_back (" ");
-    // tokens.push_back ("\t");
-    // std::copy(std::istream_iterator<std::string>(iss),
-    // 	    std::istream_iterator<std::string>(),
-    // 	    std::back_inserter<std::vector<std::string> >(tokens));
-  } while (iss);
-
-  out.pop_back();
-}
-
-void StringOperation::split(const std::string& in,
-                            const std::string& delimiter,
-                            std::vector<std::string>& out) {
-  size_t pos = 0;
-  size_t len = delimiter.length();
-  std::string s(in);
-  std::string token;
-
-  while ((pos = s.find(delimiter)) != std::string::npos) {
-    token = s.substr(0, pos);
-    out.push_back(token);
-    s.erase(0, pos + len);
-  }
-  if (!s.empty()) {
-    out.push_back(s);
-  }
-}
diff --git a/source/md/src/TF.cc b/source/md/src/TF.cc
deleted file mode 100644
index dc37f34c26..0000000000
--- a/source/md/src/TF.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "TF.h"
-
-#include <iostream>
-
-#include "Interpolation.h"
-#include "TableFileLoader.h"
-
-TF::TF(const string& filename) {
-  vector<vector<double> > tmpdata;
-  TableFileLoader tfl(filename.c_str());
-  tfl.setColumns({1, 3});
-  tfl.loadAll(tmpdata);
-  data = tmpdata[1];
-  hh = tmpdata[0][1] - tmpdata[0][0];
-  xup = tmpdata[0].back();
-  xup *= b2m_l;
-  hh *= b2m_l;
-  for (unsigned ii = 0; ii < data.size(); ++ii) {
-    data[ii] *= b2m_e / b2m_l;
-  }
-}
-
-VALUETYPE
-TF::meas(const VALUETYPE& xx) const {
-  VALUETYPE ff = 0;
-  if (xx >= xup) {
-    ff = 0;
-  } else {
-    int posi = int(xx / hh);
-    if (posi < 0) {
-      posi = 0;
-    } else if (posi >= data.size() - 1) {
-      posi = data.size() - 2;
-    }
-    Poly p;
-    Interpolation::pieceLinearInterpol(posi * hh, (posi + 1) * hh, data[posi],
-                                       data[posi + 1], p);
-    ff = p.value(xx);
-  }
-  return ff;
-}
-
-void TF::apply(vector<VALUETYPE>& dforce,
-               const vector<VALUETYPE>& dcoord,
-               const AdWeight& adw) const {
-  vector<VALUETYPE> weight, weight_x;
-  adw.atom_weight(weight, weight_x, dcoord);
-  vector<VALUETYPE> center = adw.get_center();
-
-  for (unsigned ii = 0; ii < weight_x.size(); ++ii) {
-    VALUETYPE ff = meas(weight_x[ii]);
-    if (dcoord[ii * 3] < center[0]) {
-      ff = -ff;
-    }
-    dforce[ii * 3] += ff;
-  }
-}
diff --git a/source/md/src/TableFileLoader.cpp b/source/md/src/TableFileLoader.cpp
deleted file mode 100644
index 99fec6f9cb..0000000000
--- a/source/md/src/TableFileLoader.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "TableFileLoader.h"
-
-#include <algorithm>
-#include <iostream>
-
-#include "StringSplit.h"
-
-#define MaxLineLength 65536
-
-using namespace std;
-
-TableFileLoader::TableFileLoader(const char* file) : every(1) { reinit(file); }
-
-unsigned TableFileLoader::getNumbColumns() {
-  char valueline[MaxLineLength];
-  while (data.getline(valueline, MaxLineLength)) {
-    if (valueline[0] == '#' || valueline[0] == '@') {
-      continue;
-    }
-    break;
-  }
-  if (data.eof()) {
-    return 0;
-  } else if (!data.good()) {
-    cerr << "error file reading state!" << endl;
-    throw;
-  }
-  vector<string> words;
-  StringOperation::split(string(valueline), words);
-
-  data.close();
-  reinit(file.c_str());
-  return words.size();
-}
-
-void TableFileLoader::reinit(const char* file_) {
-  file = string(file_);
-  data.open(file.c_str());
-  if (!data) {
-    cerr << "cannot open file \"" << file << "\"" << endl;
-    throw;
-  }
-  count_read = 0;
-  // inter_cols.push_back (0);
-}
-
-void TableFileLoader::setColumns(const vector<unsigned>& cols) {
-  inter_cols = cols;
-  for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
-    if (inter_cols[ii] == 0) {
-      cerr << "invalid col index, should be larger than 0" << endl;
-      throw;
-    }
-    inter_cols[ii] -= 1;
-  }
-}
-
-void TableFileLoader::setEvery(const unsigned every_) { every = every_; }
-
-bool TableFileLoader::loadLine(vector<double>& odata) {
-  char valueline[MaxLineLength];
-
-  while (data.getline(valueline, MaxLineLength)) {
-    if (valueline[0] == '#' || valueline[0] == '@') {
-      continue;
-    } else if (count_read++ % every == 0) {
-      break;
-    }
-  }
-
-  if (data.eof()) {
-    return false;
-  } else if (!data.good()) {
-    cerr << "error file reading state!" << endl;
-    throw;
-  }
-
-  vector<string> words;
-  StringOperation::split(string(valueline), words);
-  odata.resize(inter_cols.size());
-
-  for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
-    odata[ii] = atof(words[inter_cols[ii]].c_str());
-  }
-
-  return true;
-}
-
-void TableFileLoader::loadAll(vector<vector<double> >& odata) {
-  odata.resize(inter_cols.size());
-  vector<double> line;
-  while (loadLine(line)) {
-    for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
-      odata[ii].push_back(line[ii]);
-    }
-  }
-}
diff --git a/source/md/src/Tabulated.cc b/source/md/src/Tabulated.cc
deleted file mode 100644
index 6e9777ea29..0000000000
--- a/source/md/src/Tabulated.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Tabulated.h"
-
-#include <cmath>
-#include <iostream>
-
-#include "UnitManager.h"
-#include "common.h"
-
-Tabulated::Tabulated(const VALUETYPE rc,
-                     const VALUETYPE hh,
-                     const vector<VALUETYPE> &tab) {
-  reinit(rc, hh, tab);
-}
-
-void Tabulated::reinit(const VALUETYPE rc,
-                       const VALUETYPE hh,
-                       const vector<VALUETYPE> &tab) {
-  int numbFunc = 1;
-  int stride = numbFunc * 4;
-  int mystride = numbFunc * 2;
-  unsigned tableLength = tab.size() / mystride;
-
-  hi = 1. / hh;
-  rc2 = rc * rc;
-
-  data.resize(static_cast<size_t>(tableLength) * stride);
-
-  int ii;
-  for (ii = 0; ii < tableLength - 1; ++ii) {
-    const double &v0(tab[ii * mystride + 0]);
-    const double &f0(tab[ii * mystride + 1]);
-    const double &v1(tab[(ii + 1) * mystride + 0]);
-    const double &f1(tab[(ii + 1) * mystride + 1]);
-    VALUETYPE &dv(data[ii * stride + 0]);
-    VALUETYPE &df(data[ii * stride + 1]);
-    VALUETYPE &dg(data[ii * stride + 2]);
-    VALUETYPE &dh(data[ii * stride + 3]);
-    dv = v0;
-    df = -f0 * hh;
-    dg = 3 * (v1 - v0) + (f1 + 2 * f0) * hh;
-    dh = -2 * (v1 - v0) - (f1 + f0) * hh;
-  }
-  {
-    const double &v0(tab[ii * mystride + 0]);
-    const double &f0(tab[ii * mystride + 1]);
-    VALUETYPE &dv(data[ii * stride + 0]);
-    VALUETYPE &df(data[ii * stride + 1]);
-    VALUETYPE &dg(data[ii * stride + 2]);
-    VALUETYPE &dh(data[ii * stride + 3]);
-    dv = v0;
-    df = -f0 * hh;
-    dg = 0;
-    dh = 0;
-  }
-}
-
-inline void Tabulated::compute_posi(int &idx,
-                                    VALUETYPE &eps,
-                                    const VALUETYPE &rr) {
-  VALUETYPE rt = rr * hi;
-  idx = int(rt);
-  eps = rt - idx;
-}
-
-inline void Tabulated::tb_inner(VALUETYPE &ae,
-                                VALUETYPE &af,
-                                const VALUETYPE &r2) {
-  if (r2 > rc2) {
-    ae = af = 0;
-    return;
-  }
-
-  VALUETYPE rr = sqrt(r2);
-  int idx;
-  VALUETYPE eps;
-  compute_posi(idx, eps, rr);
-  idx *= 4;
-
-  VALUETYPE table_param[4];
-  for (int ii = 0; ii < 4; ++ii) {
-    table_param[ii] = data[ii + idx];
-  }
-  const VALUETYPE &Y(table_param[0]);
-  const VALUETYPE &F(table_param[1]);
-  const VALUETYPE &G(table_param[2]);
-  const VALUETYPE &H(table_param[3]);
-
-  VALUETYPE Heps = eps * H;
-  VALUETYPE Fp = (F + eps * (G + Heps));
-  VALUETYPE FF = (Fp + (eps * (G + (Heps + Heps))));
-
-  af = FF * hi;
-  af = -af / rr;
-  ae = (Y + (eps * Fp));
-}
-
-void Tabulated::compute(VALUETYPE &ener,
-                        vector<VALUETYPE> &force,
-                        vector<VALUETYPE> &virial,
-                        const vector<VALUETYPE> &coord,
-                        const vector<int> &atype,
-                        const SimulationRegion<VALUETYPE> &region,
-                        const vector<vector<int> > &nlist) {
-  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
-      int jj = nlist[ii][_];
-      if (jj < ii) {
-        continue;
-      }
-      VALUETYPE diff[3];
-      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
-      VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-      if (r2 < rc2) {
-        VALUETYPE ae, af;
-        tb_inner(ae, af, r2);
-        for (int dd = 0; dd < 3; ++dd) {
-          force[ii * 3 + dd] += af * diff[dd];
-        }
-        for (int dd = 0; dd < 3; ++dd) {
-          force[jj * 3 + dd] -= af * diff[dd];
-        }
-        ener += ae;
-        for (int dd0 = 0; dd0 < 3; ++dd0) {
-          for (int dd1 = 0; dd1 < 3; ++dd1) {
-            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-          }
-        }
-      }
-    }
-  }
-}
-
-void Tabulated::compute(VALUETYPE &ener,
-                        vector<VALUETYPE> &force,
-                        vector<VALUETYPE> &virial,
-                        const vector<VALUETYPE> &coord,
-                        const vector<VALUETYPE> &charge,
-                        const vector<int> &atype,
-                        const SimulationRegion<VALUETYPE> &region,
-                        const vector<vector<int> > &nlist) {
-  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
-      int jj = nlist[ii][_];
-      if (jj < ii) {
-        continue;
-      }
-      VALUETYPE diff[3];
-      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
-      VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-      if (r2 < rc2) {
-        VALUETYPE ae, af;
-        tb_inner(ae, af, r2);
-        {
-          VALUETYPE qiqj =
-              charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
-          ae *= qiqj;
-          af *= qiqj;
-        }
-        for (int dd = 0; dd < 3; ++dd) {
-          force[ii * 3 + dd] += af * diff[dd];
-        }
-        for (int dd = 0; dd < 3; ++dd) {
-          force[jj * 3 + dd] -= af * diff[dd];
-        }
-        ener += ae;
-        for (int dd0 = 0; dd0 < 3; ++dd0) {
-          for (int dd1 = 0; dd1 < 3; ++dd1) {
-            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-          }
-        }
-      }
-    }
-  }
-}
diff --git a/source/md/src/Trajectory.cc b/source/md/src/Trajectory.cc
deleted file mode 100644
index 8da1cff67e..0000000000
--- a/source/md/src/Trajectory.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "Trajectory.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include <cassert>
-#include <iostream>
-
-bool XtcSaver::reinit(const char *filename, const int &natoms_) {
-  char tmpname[2048];
-  strncpy(tmpname, filename, 2047);
-
-  xd = xdrfile_open(filename, "w");
-  if (xd == NULL) {
-    std::cerr << "cannot open file " << filename << std::endl;
-    return false;
-  }
-  natoms = natoms_;
-
-  xx = (rvec *)malloc(sizeof(rvec) * natoms);
-  inited = true;
-  return true;
-}
-
-XtcSaver::~XtcSaver() { clear(); }
-
-XtcSaver::XtcSaver(const char *filename, const int &natoms_)
-    : inited(false), prec(1000) {
-  reinit(filename, natoms_);
-}
-
-void XtcSaver::clear() {
-  if (inited) {
-    free(xx);
-    xdrfile_close(xd);
-    inited = false;
-  }
-}
-
-void XtcSaver::save(const int &step,
-                    const double &time,
-                    const vector<vector<double> > &frame,
-                    const vector<double> &box) {
-  assert(box.size() == 9);
-  assert(inited);
-  matrix tmpBox;
-  for (int dd0 = 0; dd0 < 3; ++dd0) {
-    for (int dd1 = 0; dd1 < 3; ++dd1) {
-      tmpBox[dd0][dd1] = 0;
-    }
-  }
-  for (int dd = 0; dd < 3; ++dd) {
-    tmpBox[dd][dd] = box[3 * dd + dd];
-  }
-  for (int ii = 0; ii < frame.size(); ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      xx[ii][dd] = frame[ii][dd];
-    }
-  }
-  write_xtc(xd, natoms, step, time, tmpBox, xx, prec);
-}
-
-bool TrrSaver::reinit(const char *filename, const int &natoms_) {
-  char tmpname[2048];
-  strncpy(tmpname, filename, 2047);
-
-  xd = xdrfile_open(filename, "w");
-  if (xd == NULL) {
-    std::cerr << "cannot open file " << filename << std::endl;
-    return false;
-  }
-  natoms = natoms_;
-
-  xx = (rvec *)malloc(sizeof(rvec) * natoms);
-  vv = (rvec *)malloc(sizeof(rvec) * natoms);
-  ff = (rvec *)malloc(sizeof(rvec) * natoms);
-  for (int ii = 0; ii < natoms; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      vv[ii][dd] = 0;
-      ff[ii][dd] = 0;
-    }
-  }
-  inited = true;
-  return true;
-}
-
-TrrSaver::~TrrSaver() { clear(); }
-
-TrrSaver::TrrSaver(const char *filename, const int &natoms_)
-    : inited(false), lambda(0) {
-  reinit(filename, natoms_);
-}
-
-void TrrSaver::clear() {
-  if (inited) {
-    free(xx);
-    free(vv);
-    free(ff);
-    xdrfile_close(xd);
-    inited = false;
-  }
-}
-
-void TrrSaver::save(const int &step,
-                    const double &time,
-                    const vector<vector<double> > &ixx,
-                    const vector<vector<double> > &ivv,
-                    const vector<vector<double> > &iff,
-                    const vector<double> &box) {
-  assert(box.size() == 9);
-  assert(inited);
-  matrix tmpBox;
-  for (int dd0 = 0; dd0 < 3; ++dd0) {
-    for (int dd1 = 0; dd1 < 3; ++dd1) {
-      tmpBox[dd0][dd1] = box[3 * dd0 + dd1];
-    }
-  }
-  for (int ii = 0; ii < ixx.size(); ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      xx[ii][dd] = ixx[ii][dd];
-    }
-  }
-  for (int ii = 0; ii < natoms; ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      vv[ii][dd] = 0;
-      ff[ii][dd] = 0;
-    }
-  }
-  for (int ii = 0; ii < ivv.size(); ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      vv[ii][dd] = ivv[ii][dd];
-    }
-  }
-  for (int ii = 0; ii < iff.size(); ++ii) {
-    for (int dd = 0; dd < 3; ++dd) {
-      ff[ii][dd] = iff[ii][dd];
-    }
-  }
-  write_trr(xd, natoms, step, time, lambda, tmpBox, xx, vv, ff);
-}
diff --git a/source/md/src/UnitManager.cc b/source/md/src/UnitManager.cc
deleted file mode 100644
index 1982b67de3..0000000000
--- a/source/md/src/UnitManager.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "UnitManager.h"
-
-#include <cmath>
-
-// unit independent constants
-double UnitManager::Degree2Radian = M_PI / 180.;
-double UnitManager::Radian2Degree = 180. / M_PI;
-// unit dependent
-double UnitManager::IntegratorMassConstant = 1.;
-double UnitManager::PressureConstant = 16.60539040;
-double UnitManager::BoltzmannConstant = 8.31445986144858164e-3;
-double UnitManager::ElectrostaticConvertion = 138.93545756169981341199;
-
-string UnitManager::unit_names[] = {"biology", "metal", "unitless"};
-
-void UnitManager::set(const string& unit) {
-  if (unit == "metal") {
-    IntegratorMassConstant = 1.03642695707516506071e-4;
-    PressureConstant = 1.602176621e6;
-    BoltzmannConstant = 8.6173303e-5;
-    ElectrostaticConvertion = 14.39964535475696995031;
-  } else if (unit == "unitless") {
-    IntegratorMassConstant = 1.;
-    PressureConstant = 1.;
-    BoltzmannConstant = 1.;
-    ElectrostaticConvertion = 1.;
-  }
-}
diff --git a/source/md/src/XyzFileManager.cc b/source/md/src/XyzFileManager.cc
deleted file mode 100644
index c29a456b9f..0000000000
--- a/source/md/src/XyzFileManager.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "XyzFileManager.h"
-
-#include <iostream>
-
-#include "StringSplit.h"
-// #include <iomanip>
-#include <assert.h>
-
-#include <fstream>
-
-void XyzFileManager::read(const string& file,
-                          vector<string>& atom_name,
-                          vector<vector<double> >& posi,
-                          vector<vector<double> >& velo,
-                          vector<vector<double> >& forc,
-                          vector<double>& boxsize) {
-  getBoxSize(file, boxsize);
-
-  posi.clear();
-  velo.clear();
-
-  ifstream data0(file.c_str());
-  if (!data0.is_open()) {
-    cerr << "cannot open file " << file << endl;
-    exit(1);
-  }
-
-  string valueline;
-  vector<string> words;
-  words.reserve(10);
-  string tmpname;
-  vector<double> tmpp(3);
-  vector<double> tmpv(3);
-  vector<double> tmpf(3);
-  std::getline(data0, valueline);
-  long long int numb_atom = atoll(valueline.c_str());
-  std::getline(data0, valueline);
-
-  for (long long int ii = 0; ii < numb_atom; ++ii) {
-    std::getline(data0, valueline);
-    StringOperation::split(string(valueline), words);
-    if (words.size() == 10) {
-      tmpp[0] = atof(words[1 + 0].c_str());
-      tmpp[1] = atof(words[1 + 1].c_str());
-      tmpp[2] = atof(words[1 + 2].c_str());
-      tmpv[0] = atof(words[1 + 3].c_str());
-      tmpv[1] = atof(words[1 + 4].c_str());
-      tmpv[2] = atof(words[1 + 5].c_str());
-      tmpf[0] = atof(words[1 + 6].c_str());
-      tmpf[1] = atof(words[1 + 7].c_str());
-      tmpf[2] = atof(words[1 + 8].c_str());
-      posi.push_back(tmpp);
-      velo.push_back(tmpv);
-      forc.push_back(tmpf);
-      atom_name.push_back(words[0]);
-    } else if (words.size() == 7) {
-      tmpp[0] = atof(words[1 + 0].c_str());
-      tmpp[1] = atof(words[1 + 1].c_str());
-      tmpp[2] = atof(words[1 + 2].c_str());
-      tmpv[0] = atof(words[1 + 3].c_str());
-      tmpv[1] = atof(words[1 + 4].c_str());
-      tmpv[2] = atof(words[1 + 5].c_str());
-      posi.push_back(tmpp);
-      velo.push_back(tmpv);
-      atom_name.push_back(words[0]);
-    } else if (words.size() == 4) {
-      tmpp[0] = atof(words[1 + 0].c_str());
-      tmpp[1] = atof(words[1 + 1].c_str());
-      tmpp[2] = atof(words[1 + 2].c_str());
-      posi.push_back(tmpp);
-      atom_name.push_back(words[0]);
-    } else {
-      cerr << "XyzFileManager::read: wrong format, line has " << words.size()
-           << " words" << endl;
-      exit(1);
-    }
-  }
-}
-
-void XyzFileManager::getBoxSize(const string& file, vector<double>& boxsize) {
-  ifstream data0(file.c_str());
-  if (!data0.is_open()) {
-    cerr << "cannot open file " << file << endl;
-  }
-  string valueline;
-  vector<string> words;
-  words.reserve(9);
-  std::getline(data0, valueline);
-  std::getline(data0, valueline);
-  StringOperation::split(valueline, words);
-
-  boxsize.resize(9);
-  fill(boxsize.begin(), boxsize.end(), 0.);
-  if (words.size() == 3) {
-    for (int ii = 0; ii < 3; ++ii) {
-      boxsize[3 * ii + ii] = atof(words[ii].c_str());
-    }
-  } else if (words.size() == 9) {
-    for (int ii = 0; ii < 9; ++ii) {
-      boxsize[ii] = atof(words[ii].c_str());
-    }
-  } else {
-    cerr << "XyzFileManager::getBoxSize: wrong format, line has "
-         << words.size() << " words" << endl;
-    exit(1);
-  }
-}
diff --git a/source/md/src/ZM.cc b/source/md/src/ZM.cc
deleted file mode 100644
index a1495ee650..0000000000
--- a/source/md/src/ZM.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "ZM.h"
-
-#include <cmath>
-#include <iostream>
-
-#include "UnitManager.h"
-#include "common.h"
-
-ZM::ZM(const int& order, const VALUETYPE& alpha, const VALUETYPE& rc)
-    : potzm(order, alpha, rc) {
-  VALUETYPE rcp = rc + 2;
-  VALUETYPE hh = 2e-3;
-  int nn = rcp / hh;
-  vector<VALUETYPE> tab;
-
-  for (int ii = 0; ii < nn; ++ii) {
-    VALUETYPE xx = ii * hh;
-    VALUETYPE value, deriv;
-    if (xx <= rc) {
-      value = potzm.pot(xx);
-      deriv = potzm.mpotp(xx);
-    } else {
-      value = deriv = 0;
-    }
-    tab.push_back(value);
-    tab.push_back(deriv);
-  }
-  zm_tab.reinit(rcp, hh, tab);
-}
-
-VALUETYPE
-ZM::e_corr(const vector<VALUETYPE>& charge) const {
-  double sum = 0;
-  sum += potzm.energyCorr(charge);
-  return sum;
-}
-
-inline void ZM::ex_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2) {
-  VALUETYPE r1 = sqrt(r2);
-  ae = 1. / r1;
-  af = 1. / (r2 * r1);
-}
-
-void ZM::exclude(VALUETYPE& ener,
-                 vector<VALUETYPE>& force,
-                 vector<VALUETYPE>& virial,
-                 const vector<VALUETYPE>& coord,
-                 const vector<VALUETYPE>& charge,
-                 const vector<int>& atype,
-                 const SimulationRegion<VALUETYPE>& region,
-                 const vector<int>& elist) {
-  for (unsigned _ = 0; _ < elist.size(); _ += 2) {
-    int ii = elist[_];
-    int jj = elist[_ + 1];
-    VALUETYPE diff[3];
-    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
-    VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-    VALUETYPE ae, af;
-    ex_inner(ae, af, r2);
-    // VALUETYPE ae1, af1;
-    // zm_tab.tb_inner (ae1, af1, r2);
-    // cout << ae << " " << ae1 << endl;
-    {
-      VALUETYPE qiqj =
-          charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
-      ae *= qiqj;
-      af *= qiqj;
-    }
-    for (int dd = 0; dd < 3; ++dd) {
-      force[ii * 3 + dd] -= af * diff[dd];
-    }
-    for (int dd = 0; dd < 3; ++dd) {
-      force[jj * 3 + dd] += af * diff[dd];
-    }
-    ener -= ae;
-    for (int dd0 = 0; dd0 < 3; ++dd0) {
-      for (int dd1 = 0; dd1 < 3; ++dd1) {
-        virial[dd0 * 3 + dd1] += 0.5 * diff[dd0] * af * diff[dd1];
-      }
-    }
-  }
-}
diff --git a/source/md/src/ZMFunctions.cpp b/source/md/src/ZMFunctions.cpp
deleted file mode 100644
index fd85e04bdd..0000000000
--- a/source/md/src/ZMFunctions.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-// SPDX-License-Identifier: LGPL-3.0-or-later
-#include "ZMFunctions.h"
-
-#include <cmath>
-#include <iostream>
-
-#include "UnitManager.h"
-
-#define M_inv2 (0.5)
-#define M_inv4 (0.25)
-#define M_inv8 (0.125)
-#define M_inv16 (0.06250000000000000000)
-#define M_inv48 (.02083333333333333333)
-
-static double f(const double& r) { return 1. / r; }
-
-static double D1f(const double& r) { return -1. / (r * r); }
-
-static double D2f(const double& r) { return 2. / (r * r * r); }
-
-static double D3f(const double& r) { return -6. / (r * r * r * r); }
-
-static double D4f(const double& r) { return 24. / (r * r * r * r * r); }
-
-static double g(const double& alpha, const double& r) {
-  return erfc(alpha * r);
-}
-
-static double D1g(const double& alpha, const double& r) {
-  double tmp = alpha * r;
-  return -M_2_SQRTPI * alpha * exp(-tmp * tmp);
-}
-
-static double D2g(const double& alpha, const double& r) {
-  double tmp = alpha * r;
-  return M_2_SQRTPI * 2 * alpha * alpha * alpha * r * exp(-tmp * tmp);
-}
-
-static double D3g(const double& alpha, const double& r) {
-  double tmp = alpha * r;
-  return M_2_SQRTPI * 2 * alpha * alpha * alpha * (1. - 2. * tmp * tmp) *
-         exp(-tmp * tmp);
-}
-
-static double D4g(const double& alpha, const double& r) {
-  double tmp = alpha * r;
-  double alpha5 = alpha * alpha;
-  alpha5 = alpha5 * alpha5 * alpha;
-  return M_2_SQRTPI * 4. * alpha5 * (-3. + 2. * tmp * tmp) * r *
-         exp(-tmp * tmp);
-}
-
-double ZeroMultipole::funcV(const double& alpha, const double& r) {
-  return f(r) * g(alpha, r);
-}
-
-double ZeroMultipole::funcD1V(const double& alpha, const double& r) {
-  return D1f(r) * g(alpha, r) + f(r) * D1g(alpha, r);
-}
-
-double ZeroMultipole::funcD2V(const double& alpha, const double& r) {
-  return D2f(r) * g(alpha, r) + 2. * D1f(r) * D1g(alpha, r) +
-         f(r) * D2g(alpha, r);
-}
-
-double ZeroMultipole::funcD3V(const double& alpha, const double& r) {
-  return D3f(r) * g(alpha, r) + 3. * D2f(r) * D1g(alpha, r) +
-         3. * D1f(r) * D2g(alpha, r) + f(r) * D3g(alpha, r);
-}
-
-double ZeroMultipole::funcD4V(const double& alpha, const double& r) {
-  return D4f(r) * g(alpha, r) + 4. * D3f(r) * D1g(alpha, r) +
-         6. * D2f(r) * D2g(alpha, r) + 4. * D1f(r) * D3g(alpha, r) +
-         f(r) * D4g(alpha, r);
-}
-
-void ZeroMultipole::calCoefficients(const int& ll,
-                                    const double& alpha,
-                                    const double& rc,
-                                    vector<double>& coeff) {
-  coeff.clear();
-  coeff.resize(ll + 1);
-  double b0, b1, b2, b3, b4;
-  double invrc, invrc2, invrc3, invrc4;
-  double rc2;
-
-  switch (ll) {
-    case 0:
-      b0 = funcV(alpha, rc);
-      coeff[0] = b0;
-      break;
-    case 1:
-      b0 = funcV(alpha, rc);
-      b1 = funcD1V(alpha, rc);
-      coeff[0] = b0 - M_inv2 * b1 * rc;
-      coeff[1] = M_inv2 * b1 / rc;
-      break;
-    case 2:
-      b0 = funcV(alpha, rc);
-      b1 = funcD1V(alpha, rc);
-      b2 = funcD2V(alpha, rc);
-      invrc = 1. / rc;
-      coeff[0] = M_inv8 * b2 * rc * rc - 5. * M_inv8 * b1 * rc + b0;
-      coeff[1] = 3. * M_inv4 * b1 * invrc - M_inv4 * b2;
-      coeff[2] =
-          M_inv8 * b2 * invrc * invrc - M_inv8 * b1 * invrc * invrc * invrc;
-      break;
-    case 3:
-      b0 = funcV(alpha, rc);
-      b1 = funcD1V(alpha, rc);
-      b2 = funcD2V(alpha, rc);
-      b3 = funcD3V(alpha, rc);
-      invrc = 1. / rc;
-      invrc2 = invrc * invrc;
-      coeff[0] = -M_inv48 * b3 * rc * rc * rc + 3. * M_inv16 * b2 * rc * rc -
-                 11. * M_inv16 * b1 * rc + b0;
-      coeff[1] =
-          15. * M_inv16 * b1 * invrc - 7. * M_inv16 * b2 + M_inv16 * b3 * rc;
-      coeff[2] = 5. * M_inv16 * b2 * invrc2 -
-                 5. * M_inv16 * b1 * invrc2 * invrc - M_inv16 * b3 * invrc;
-      coeff[3] = M_inv16 * b1 * invrc2 * invrc2 * invrc -
-                 M_inv16 * b2 * invrc2 * invrc2 + M_inv48 * b3 * invrc2 * invrc;
-      break;
-    case 4:
-      b0 = funcV(alpha, rc);
-      b1 = funcD1V(alpha, rc);
-      b2 = funcD2V(alpha, rc);
-      b3 = funcD3V(alpha, rc);
-      b4 = funcD4V(alpha, rc);
-      rc2 = rc * rc;
-      invrc = 1. / rc;
-      invrc2 = invrc * invrc;
-      invrc3 = invrc2 * invrc;
-      invrc4 = invrc2 * invrc2;
-      coeff[0] = 1. / 384. * b4 * rc2 * rc2 - 7. / 192. * b3 * rc2 * rc +
-                 29. / 128. * b2 * rc2 - 93. / 128. * b1 * rc + b0;
-      coeff[1] = 35. / 32. * b1 * invrc - 19. / 32. * b2 - 1. / 96. * b4 * rc2 +
-                 M_inv8 * b3 * rc;
-      coeff[2] = 1. / 64. * b4 - 35. / 64. * b1 * invrc3 +
-                 35. / 64. * b2 * invrc2 - 5. / 32. * b3 * invrc;
-      coeff[3] = 7. / 32. * b1 * invrc4 * invrc - 7. / 32. * b2 * invrc4 +
-                 1. / 12. * b3 * invrc3 - 1. / 96. * b4 * invrc2;
-      coeff[4] = 5. / 128. * b2 * invrc4 * invrc2 -
-                 5. / 128. * b1 * invrc4 * invrc3 -
-                 1. / 64. * b3 * invrc4 * invrc + 1. / 384 * b4 * invrc4;
-      break;
-    default:
-      cerr << "ll larger than 4 is not implemented" << endl;
-      break;
-  }
-}
-
-ZeroMultipole::Potential::Potential() : alpha(0), rc(1.0), ll(0) {
-  calCoefficients(ll, alpha, rc, coeff);
-}
-
-ZeroMultipole::Potential::Potential(const int& ll,
-                                    const double& alpha,
-                                    const double& rc) {
-  reinit(ll, alpha, rc);
-}
-
-void ZeroMultipole::Potential::reinit(const int& ll_,
-                                      const double& alpha_,
-                                      const double& rc_) {
-  ll = ll_;
-  alpha = alpha_;
-  rc = rc_;
-  calCoefficients(ll, alpha, rc, coeff);
-}
-
-double ZeroMultipole::Potential::pot(const double& rr) {
-  if (rr > rc) {
-    return 0.;
-  }
-  double tmp0 = funcV(alpha, rr);
-  // double tmp0 = 0.;
-  double tmp1 = coeff.back();
-  for (int ii = ll - 1; ii >= 0; --ii) {
-    tmp1 = tmp1 * rr * rr + coeff[ii];
-  }
-  return tmp0 - tmp1;
-}
-
-double ZeroMultipole::Potential::ulpot(const double& rr) {
-  return pot(rr) + coeff[0];
-}
-
-double ZeroMultipole::Potential::mpotp(const double& rr) {
-  if (rr > rc) {
-    return 0.;
-  }
-  double tmp0 = -funcD1V(alpha, rr);
-  double tmp1 = 2 * ll * coeff[ll];
-  for (int ii = ll - 1; ii >= 1; --ii) {
-    tmp1 = tmp1 * rr * rr + coeff[ii] * 2 * ii;
-  }
-  return tmp0 + tmp1 * rr;
-}
-
-double ZeroMultipole::Potential::mulpotp(const double& rr) { return mpotp(rr); }
-
-double ZeroMultipole::Potential::energyCorr(
-    const vector<double>& charges) const {
-  double sum = 0.;
-  double factor = UnitManager::ElectrostaticConvertion;
-  for (unsigned ii = 0; ii < charges.size(); ++ii) {
-    sum += charges[ii] * charges[ii];
-  }
-
-  // return - (coeff[0] * 0.5 + alpha / sqrt(M_PI)) * sum;
-  return -(coeff[0] * 0.5 + alpha / sqrt(M_PI)) * sum * factor;
-}
diff --git a/source/nodejs/prepublish.py b/source/nodejs/prepublish.py
index 2f607a7d07..cb60659f02 100644
--- a/source/nodejs/prepublish.py
+++ b/source/nodejs/prepublish.py
@@ -4,6 +4,7 @@
 The NPM package downloads the C library binary from GitHub releases.
 This script changes the package.json to make it work.
 """
+
 import json
 import shutil
 
diff --git a/source/nodejs/tests/test_deeppot.js b/source/nodejs/tests/test_deeppot.js
index 91ba4fcaf4..f3bd40b47d 100644
--- a/source/nodejs/tests/test_deeppot.js
+++ b/source/nodejs/tests/test_deeppot.js
@@ -2,13 +2,15 @@
 const deepmd = require('deepmd-kit');
 
 deepmd.convert_pbtxt_to_pb(
-    __dirname + '/../../tests/infer/deeppot.pbtxt', 'deeppot.pb');
+    __dirname + '/../../tests/infer/deeppot.pbtxt',
+    'deeppot.pb',
+);
 
 const dp = new deepmd.DeepPot('deeppot.pb');
 
-const coord = [1., 0., 0., 0., 0., 1.5, 1., 0., 3.];
+const coord = [1, 0, 0, 0, 0, 1.5, 1, 0, 3];
 const atype = [1, 0, 1];
-const cell = [10., 0., 0., 0., 10., 0., 0., 0., 10.];
+const cell = [10, 0, 0, 0, 10, 0, 0, 0, 10];
 
 const v_coord = new deepmd.vectord(coord.length);
 const v_atype = new deepmd.vectori(atype.length);
@@ -17,7 +19,7 @@ for (var i = 0; i < coord.length; i++) v_coord.set(i, coord[i]);
 for (var i = 0; i < atype.length; i++) v_atype.set(i, atype[i]);
 for (var i = 0; i < cell.length; i++) v_cell.set(i, cell[i]);
 
-var energy = 0.0
+var energy = 0.0;
 var v_forces = new deepmd.vectord();
 var v_virials = new deepmd.vectord();
 
@@ -25,6 +27,10 @@ energy = dp.compute(energy, v_forces, v_virials, v_coord, v_atype, v_cell);
 
 console.log('energy:', energy);
 console.log(
-    'forces:', [...Array(v_forces.size()).keys()].map(i => v_forces.get(i)));
+    'forces:',
+    [...Array(v_forces.size()).keys()].map((i) => v_forces.get(i)),
+);
 console.log(
-    'virials:', [...Array(v_virials.size()).keys()].map(i => v_virials.get(i)));
+    'virials:',
+    [...Array(v_virials.size()).keys()].map((i) => v_virials.get(i)),
+);
diff --git a/source/nodejs/yarn.lock b/source/nodejs/yarn.lock
index 864cd77297..fb0c093f11 100644
--- a/source/nodejs/yarn.lock
+++ b/source/nodejs/yarn.lock
@@ -307,9 +307,9 @@ inherits@2, inherits@^2.0.3:
   integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
 
 ip@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/ip/-/ip-2.0.0.tgz#4cf4ab182fee2314c75ede1276f8c80b479936da"
-  integrity sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/ip/-/ip-2.0.1.tgz#e8f3595d33a3ea66490204234b77636965307105"
+  integrity sha512-lJUL9imLTNi1ZfXT+DU6rBBdbiKGBuay9B6xGSPVjUeQwaH1RIGqef8RZkUtHioLmSNpPR5M4HVKJGm1j8FWVQ==
 
 is-fullwidth-code-point@^3.0.0:
   version "3.0.0"
diff --git a/source/op/pt/CMakeLists.txt b/source/op/pt/CMakeLists.txt
new file mode 100644
index 0000000000..46ea38c193
--- /dev/null
+++ b/source/op/pt/CMakeLists.txt
@@ -0,0 +1,26 @@
+file(GLOB OP_SRC print_summary.cc)
+
+add_library(deepmd_op_pt MODULE ${OP_SRC})
+# link: libdeepmd libtorch
+target_link_libraries(deepmd_op_pt PRIVATE ${TORCH_LIBRARIES} ${LIB_DEEPMD})
+if(APPLE)
+  set_target_properties(deepmd_op_pt PROPERTIES INSTALL_RPATH "@loader_path")
+else()
+  set_target_properties(deepmd_op_pt PROPERTIES INSTALL_RPATH "$ORIGIN")
+endif()
+
+find_package(MPI)
+if(MPI_FOUND)
+  target_link_libraries(deepmd_op_pt INTERFACE MPI::MPI_CXX)
+  target_compile_definitions(deepmd_op_pt PRIVATE USE_MPI)
+endif()
+
+if(CMAKE_TESTING_ENABLED)
+  target_link_libraries(deepmd_op_pt PRIVATE coverage_config)
+endif()
+
+if(BUILD_PY_IF)
+  install(TARGETS deepmd_op_pt DESTINATION deepmd/lib/)
+else(BUILD_PY_IF)
+  install(TARGETS deepmd_op_pt DESTINATION lib/)
+endif(BUILD_PY_IF)
diff --git a/source/op/pt/print_summary.cc b/source/op/pt/print_summary.cc
new file mode 100644
index 0000000000..83209aab31
--- /dev/null
+++ b/source/op/pt/print_summary.cc
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include <torch/torch.h>
+
+#include <iostream>
+
+torch::Tensor enable_mpi() {
+#ifdef USE_MPI
+  return torch::ones({1}, torch::kBool);
+#else
+  return torch::zeros({1}, torch::kBool);
+#endif
+}
+
+TORCH_LIBRARY(deepmd, m) { m.def("enable_mpi", enable_mpi); }
diff --git a/source/op/CMakeLists.txt b/source/op/tf/CMakeLists.txt
similarity index 100%
rename from source/op/CMakeLists.txt
rename to source/op/tf/CMakeLists.txt
diff --git a/source/op/add_flt_nvnmd.cc b/source/op/tf/add_flt_nvnmd.cc
similarity index 100%
rename from source/op/add_flt_nvnmd.cc
rename to source/op/tf/add_flt_nvnmd.cc
diff --git a/source/op/copy_flt_nvnmd.cc b/source/op/tf/copy_flt_nvnmd.cc
similarity index 100%
rename from source/op/copy_flt_nvnmd.cc
rename to source/op/tf/copy_flt_nvnmd.cc
diff --git a/source/op/custom_op.cc b/source/op/tf/custom_op.cc
similarity index 100%
rename from source/op/custom_op.cc
rename to source/op/tf/custom_op.cc
diff --git a/source/op/custom_op.h b/source/op/tf/custom_op.h
similarity index 100%
rename from source/op/custom_op.h
rename to source/op/tf/custom_op.h
diff --git a/source/op/descrpt.cc b/source/op/tf/descrpt.cc
similarity index 100%
rename from source/op/descrpt.cc
rename to source/op/tf/descrpt.cc
diff --git a/source/op/descrpt_se_a_ef.cc b/source/op/tf/descrpt_se_a_ef.cc
similarity index 100%
rename from source/op/descrpt_se_a_ef.cc
rename to source/op/tf/descrpt_se_a_ef.cc
diff --git a/source/op/descrpt_se_a_ef_para.cc b/source/op/tf/descrpt_se_a_ef_para.cc
similarity index 100%
rename from source/op/descrpt_se_a_ef_para.cc
rename to source/op/tf/descrpt_se_a_ef_para.cc
diff --git a/source/op/descrpt_se_a_ef_vert.cc b/source/op/tf/descrpt_se_a_ef_vert.cc
similarity index 100%
rename from source/op/descrpt_se_a_ef_vert.cc
rename to source/op/tf/descrpt_se_a_ef_vert.cc
diff --git a/source/op/descrpt_se_a_mask.cc b/source/op/tf/descrpt_se_a_mask.cc
similarity index 100%
rename from source/op/descrpt_se_a_mask.cc
rename to source/op/tf/descrpt_se_a_mask.cc
diff --git a/source/op/dotmul_flt_nvnmd.cc b/source/op/tf/dotmul_flt_nvnmd.cc
similarity index 99%
rename from source/op/dotmul_flt_nvnmd.cc
rename to source/op/tf/dotmul_flt_nvnmd.cc
index fd7c831ef1..d7c2c8d3c3 100644
--- a/source/op/dotmul_flt_nvnmd.cc
+++ b/source/op/tf/dotmul_flt_nvnmd.cc
@@ -159,7 +159,7 @@ class DotmulFltNvnmdOp : public OpKernel {
       ufi3.nint &= FLT_MASK;
       y[ii] = ufi3.nflt;
     }  // loop ii
-  }    // Compute
+  }  // Compute
 
 };  // DotmulFltNvnmdOp
 
diff --git a/source/op/ewald_recp.cc b/source/op/tf/ewald_recp.cc
similarity index 100%
rename from source/op/ewald_recp.cc
rename to source/op/tf/ewald_recp.cc
diff --git a/source/op/flt_nvnmd.cc b/source/op/tf/flt_nvnmd.cc
similarity index 100%
rename from source/op/flt_nvnmd.cc
rename to source/op/tf/flt_nvnmd.cc
diff --git a/source/op/gelu_multi_device.cc b/source/op/tf/gelu_multi_device.cc
similarity index 100%
rename from source/op/gelu_multi_device.cc
rename to source/op/tf/gelu_multi_device.cc
diff --git a/source/op/map_aparam.cc b/source/op/tf/map_aparam.cc
similarity index 100%
rename from source/op/map_aparam.cc
rename to source/op/tf/map_aparam.cc
diff --git a/source/op/map_flt_nvnmd.cc b/source/op/tf/map_flt_nvnmd.cc
similarity index 98%
rename from source/op/map_flt_nvnmd.cc
rename to source/op/tf/map_flt_nvnmd.cc
index b23deac9c8..77b788e537 100644
--- a/source/op/map_flt_nvnmd.cc
+++ b/source/op/tf/map_flt_nvnmd.cc
@@ -141,10 +141,10 @@ class MapFltNvnmdOp : public OpKernel {
           add_flt_nvnmd(ytmp, d, ytmp);
           y[ii * M + jj] = ytmp;
         }  // jj
-      }    // ii
-    }      // ss
-  }        // Compute
-};         // MapFltNvnmdOp
+      }  // ii
+    }  // ss
+  }  // Compute
+};  // MapFltNvnmdOp
 
 #define REGISTER_CPU(T)                                              \
   REGISTER_KERNEL_BUILDER(                                           \
diff --git a/source/op/matmul_fitnet_nvnmd.cc b/source/op/tf/matmul_fitnet_nvnmd.cc
similarity index 99%
rename from source/op/matmul_fitnet_nvnmd.cc
rename to source/op/tf/matmul_fitnet_nvnmd.cc
index b5dc32a642..acc8e4b591 100644
--- a/source/op/matmul_fitnet_nvnmd.cc
+++ b/source/op/tf/matmul_fitnet_nvnmd.cc
@@ -160,7 +160,7 @@ class MatmulFitnetNvnmdOp : public OpKernel {
         s = floor(s * prec * precx) * div_precx;
         y[ii * K + kk] = s;
       }  // loop xx
-    }    // loop kk
+    }  // loop kk
 
   }  // Compute
 
diff --git a/source/op/matmul_flt2fix_nvnmd.cc b/source/op/tf/matmul_flt2fix_nvnmd.cc
similarity index 98%
rename from source/op/matmul_flt2fix_nvnmd.cc
rename to source/op/tf/matmul_flt2fix_nvnmd.cc
index ab823a829d..10cfb3d3ba 100644
--- a/source/op/matmul_flt2fix_nvnmd.cc
+++ b/source/op/tf/matmul_flt2fix_nvnmd.cc
@@ -138,9 +138,9 @@ class MatmulFlt2fixNvnmdOp : public OpKernel {
           ufi.nint &= FLT_MASK;
           y[hh * N * K + ii * K + kk] = ufi.nflt;
         }  // loop jj
-      }    // loop ii
-    }      // loop hh
-  }        // Compute
+      }  // loop ii
+    }  // loop hh
+  }  // Compute
 
  private:
   int nbit;
diff --git a/source/op/matmul_flt_nvnmd.cc b/source/op/tf/matmul_flt_nvnmd.cc
similarity index 98%
rename from source/op/matmul_flt_nvnmd.cc
rename to source/op/tf/matmul_flt_nvnmd.cc
index 92b6375100..22ed23c0a3 100644
--- a/source/op/matmul_flt_nvnmd.cc
+++ b/source/op/tf/matmul_flt_nvnmd.cc
@@ -188,9 +188,9 @@ class MatmulFltNvnmdOp : public OpKernel {
           ufi3.nint &= FLT_MASK;
           y[hh * N * K + ii * K + kk] = ufi3.nflt;
         }  // loop kk
-      }    // loop ii
-    }      // loop hh
-  }        // Compute
+      }  // loop ii
+    }  // loop hh
+  }  // Compute
 
  private:
   int normx;
diff --git a/source/op/mul_flt_nvnmd.cc b/source/op/tf/mul_flt_nvnmd.cc
similarity index 100%
rename from source/op/mul_flt_nvnmd.cc
rename to source/op/tf/mul_flt_nvnmd.cc
diff --git a/source/op/neighbor_stat.cc b/source/op/tf/neighbor_stat.cc
similarity index 100%
rename from source/op/neighbor_stat.cc
rename to source/op/tf/neighbor_stat.cc
diff --git a/source/op/optimizer/parallel.cc b/source/op/tf/optimizer/parallel.cc
similarity index 100%
rename from source/op/optimizer/parallel.cc
rename to source/op/tf/optimizer/parallel.cc
diff --git a/source/op/optimizer/parallel.h b/source/op/tf/optimizer/parallel.h
similarity index 100%
rename from source/op/optimizer/parallel.h
rename to source/op/tf/optimizer/parallel.h
diff --git a/source/op/pair_tab.cc b/source/op/tf/pair_tab.cc
similarity index 100%
rename from source/op/pair_tab.cc
rename to source/op/tf/pair_tab.cc
diff --git a/source/op/pairwise.cc b/source/op/tf/pairwise.cc
similarity index 100%
rename from source/op/pairwise.cc
rename to source/op/tf/pairwise.cc
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/tf/prod_env_mat_multi_device.cc
similarity index 100%
rename from source/op/prod_env_mat_multi_device.cc
rename to source/op/tf/prod_env_mat_multi_device.cc
diff --git a/source/op/prod_env_mat_multi_device_nvnmd.cc b/source/op/tf/prod_env_mat_multi_device_nvnmd.cc
similarity index 100%
rename from source/op/prod_env_mat_multi_device_nvnmd.cc
rename to source/op/tf/prod_env_mat_multi_device_nvnmd.cc
diff --git a/source/op/prod_force.cc b/source/op/tf/prod_force.cc
similarity index 100%
rename from source/op/prod_force.cc
rename to source/op/tf/prod_force.cc
diff --git a/source/op/prod_force_grad.cc b/source/op/tf/prod_force_grad.cc
similarity index 100%
rename from source/op/prod_force_grad.cc
rename to source/op/tf/prod_force_grad.cc
diff --git a/source/op/prod_force_grad_multi_device.cc b/source/op/tf/prod_force_grad_multi_device.cc
similarity index 100%
rename from source/op/prod_force_grad_multi_device.cc
rename to source/op/tf/prod_force_grad_multi_device.cc
diff --git a/source/op/prod_force_multi_device.cc b/source/op/tf/prod_force_multi_device.cc
similarity index 100%
rename from source/op/prod_force_multi_device.cc
rename to source/op/tf/prod_force_multi_device.cc
diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/tf/prod_force_se_a_grad.cc
similarity index 100%
rename from source/op/prod_force_se_a_grad.cc
rename to source/op/tf/prod_force_se_a_grad.cc
diff --git a/source/op/prod_force_se_a_mask.cc b/source/op/tf/prod_force_se_a_mask.cc
similarity index 100%
rename from source/op/prod_force_se_a_mask.cc
rename to source/op/tf/prod_force_se_a_mask.cc
diff --git a/source/op/prod_force_se_a_mask_grad.cc b/source/op/tf/prod_force_se_a_mask_grad.cc
similarity index 100%
rename from source/op/prod_force_se_a_mask_grad.cc
rename to source/op/tf/prod_force_se_a_mask_grad.cc
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/tf/prod_force_se_r_grad.cc
similarity index 100%
rename from source/op/prod_force_se_r_grad.cc
rename to source/op/tf/prod_force_se_r_grad.cc
diff --git a/source/op/prod_virial.cc b/source/op/tf/prod_virial.cc
similarity index 100%
rename from source/op/prod_virial.cc
rename to source/op/tf/prod_virial.cc
diff --git a/source/op/prod_virial_grad.cc b/source/op/tf/prod_virial_grad.cc
similarity index 100%
rename from source/op/prod_virial_grad.cc
rename to source/op/tf/prod_virial_grad.cc
diff --git a/source/op/prod_virial_grad_multi_device.cc b/source/op/tf/prod_virial_grad_multi_device.cc
similarity index 100%
rename from source/op/prod_virial_grad_multi_device.cc
rename to source/op/tf/prod_virial_grad_multi_device.cc
diff --git a/source/op/prod_virial_multi_device.cc b/source/op/tf/prod_virial_multi_device.cc
similarity index 100%
rename from source/op/prod_virial_multi_device.cc
rename to source/op/tf/prod_virial_multi_device.cc
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/tf/prod_virial_se_a_grad.cc
similarity index 100%
rename from source/op/prod_virial_se_a_grad.cc
rename to source/op/tf/prod_virial_se_a_grad.cc
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/tf/prod_virial_se_r_grad.cc
similarity index 100%
rename from source/op/prod_virial_se_r_grad.cc
rename to source/op/tf/prod_virial_se_r_grad.cc
diff --git a/source/op/quantize_nvnmd.cc b/source/op/tf/quantize_nvnmd.cc
similarity index 100%
rename from source/op/quantize_nvnmd.cc
rename to source/op/tf/quantize_nvnmd.cc
diff --git a/source/op/readme b/source/op/tf/readme
similarity index 100%
rename from source/op/readme
rename to source/op/tf/readme
diff --git a/source/op/soft_min.cc b/source/op/tf/soft_min.cc
similarity index 100%
rename from source/op/soft_min.cc
rename to source/op/tf/soft_min.cc
diff --git a/source/op/soft_min_force.cc b/source/op/tf/soft_min_force.cc
similarity index 100%
rename from source/op/soft_min_force.cc
rename to source/op/tf/soft_min_force.cc
diff --git a/source/op/soft_min_force_grad.cc b/source/op/tf/soft_min_force_grad.cc
similarity index 100%
rename from source/op/soft_min_force_grad.cc
rename to source/op/tf/soft_min_force_grad.cc
diff --git a/source/op/soft_min_virial.cc b/source/op/tf/soft_min_virial.cc
similarity index 100%
rename from source/op/soft_min_virial.cc
rename to source/op/tf/soft_min_virial.cc
diff --git a/source/op/soft_min_virial_grad.cc b/source/op/tf/soft_min_virial_grad.cc
similarity index 100%
rename from source/op/soft_min_virial_grad.cc
rename to source/op/tf/soft_min_virial_grad.cc
diff --git a/source/op/tabulate_multi_device.cc b/source/op/tf/tabulate_multi_device.cc
similarity index 99%
rename from source/op/tabulate_multi_device.cc
rename to source/op/tf/tabulate_multi_device.cc
index 6a70f60a96..50267df556 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tf/tabulate_multi_device.cc
@@ -191,7 +191,7 @@ class TabulateFusionSeAOp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 3"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(4);  // TODO: be careful here;
+    descriptor_shape.AddDim(4);  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
@@ -390,7 +390,7 @@ class TabulateFusionSeAttenOp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 2"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(4);  // TODO: be careful here;
+    descriptor_shape.AddDim(4);  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
@@ -786,8 +786,7 @@ class TabulateFusionSeROp : public OpKernel {
                 errors::InvalidArgument("Dim of input should be 2"));
     TensorShape descriptor_shape;
     descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim(
-        em_tensor.shape().dim_size(1));  // TODO: be careful here;
+    descriptor_shape.AddDim(em_tensor.shape().dim_size(1));  // be careful here;
     descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
diff --git a/source/op/tanh4_flt_nvnmd.cc b/source/op/tf/tanh4_flt_nvnmd.cc
similarity index 98%
rename from source/op/tanh4_flt_nvnmd.cc
rename to source/op/tf/tanh4_flt_nvnmd.cc
index 987013a5e6..3351a366e4 100644
--- a/source/op/tanh4_flt_nvnmd.cc
+++ b/source/op/tf/tanh4_flt_nvnmd.cc
@@ -117,8 +117,8 @@ class Tanh4FltNvnmdOp : public OpKernel {
         y = floor(y * prechi) / prechi;
         ys(ii, jj) = (x < 0) ? (-y) : y;
       }  // loop jj
-    }    // loop ii
-  }      // Compute
+    }  // loop ii
+  }  // Compute
 
   //- define the private variable for calculation
 };  // Tanh4FltNvnmd
diff --git a/source/op/unaggregated_grad.cc b/source/op/tf/unaggregated_grad.cc
similarity index 100%
rename from source/op/unaggregated_grad.cc
rename to source/op/tf/unaggregated_grad.cc
diff --git a/source/tests/__init__.py b/source/tests/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/common/__init__.py b/source/tests/common/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/common/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/common/common.py b/source/tests/common/common.py
new file mode 100644
index 0000000000..4736042150
--- /dev/null
+++ b/source/tests/common/common.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import pathlib
+
+tests_path = pathlib.Path(__file__).parent.absolute()
+infer_path = (tests_path.parent / "infer").absolute()
diff --git a/source/tests/common/dpmodel/README b/source/tests/common/dpmodel/README
new file mode 100644
index 0000000000..de6d061bdd
--- /dev/null
+++ b/source/tests/common/dpmodel/README
@@ -0,0 +1 @@
+test deepmd-kit/source/deepmd/dpmodel
diff --git a/source/tests/common/dpmodel/__init__.py b/source/tests/common/dpmodel/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/common/dpmodel/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/common/dpmodel/case_single_frame_with_nlist.py b/source/tests/common/dpmodel/case_single_frame_with_nlist.py
new file mode 100644
index 0000000000..828e090cad
--- /dev/null
+++ b/source/tests/common/dpmodel/case_single_frame_with_nlist.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
+
+class TestCaseSingleFrameWithoutNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nf, self.nt = 1, 2
+        self.coord = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nloc * 3])
+        self.atype = np.array([0, 0, 1], dtype=int).reshape([1, self.nloc])
+        self.cell = 2.0 * np.eye(3).reshape([1, 9])
+        # sel = [5, 2]
+        self.sel = [16, 8]
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.coord = self.coord_ext[:, : self.nloc, :]
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        self.atype = self.atype_ext[:, : self.nloc]
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        self.atol = 1e-12
+
+        # permutations
+        self.perm = np.array([2, 0, 1, 3], dtype=np.int32)
+        inv_perm = np.array([1, 2, 0, 3], dtype=np.int32)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+
+
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
diff --git a/source/tests/common/dpmodel/test_descriptor_se_e2_a.py b/source/tests/common/dpmodel/test_descriptor_se_e2_a.py
new file mode 100644
index 0000000000..17c27cf9f1
--- /dev/null
+++ b/source/tests/common/dpmodel/test_descriptor_se_e2_a.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+)
+
+
+class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        em0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        em0.davg = davg
+        em0.dstd = dstd
+        em1 = DescrptSeA.deserialize(em0.serialize())
+        mm0 = em0.call(self.coord_ext, self.atype_ext, self.nlist)
+        mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
+        for ii in [0, 1, 4]:
+            np.testing.assert_allclose(mm0[ii], mm1[ii])
diff --git a/source/tests/common/dpmodel/test_dp_atomic_model.py b/source/tests/common/dpmodel/test_dp_atomic_model.py
new file mode 100644
index 0000000000..c69de6161d
--- /dev/null
+++ b/source/tests/common/dpmodel/test_dp_atomic_model.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    InvarFitting,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithNlistWithVirtual,
+)
+
+
+class TestDPAtomicModel(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            md0 = DPAtomicModel(ds, ft, type_map=type_map)
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            md1 = DPAtomicModel.deserialize(md0.serialize())
+
+            ret0 = md0.forward_common_atomic(self.coord_ext, self.atype_ext, self.nlist)
+            ret1 = md1.forward_common_atomic(self.coord_ext, self.atype_ext, self.nlist)
+
+            np.testing.assert_allclose(ret0["energy"], ret1["energy"])
+
+    def test_excl_consistency(self):
+        type_map = ["foo", "bar"]
+
+        # test the case of exclusion
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            ds = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+            )
+            ft = InvarFitting(
+                "energy",
+                self.nt,
+                ds.get_dim_out(),
+                1,
+                mixed_types=ds.mixed_types(),
+            )
+            md0 = DPAtomicModel(
+                ds,
+                ft,
+                type_map=type_map,
+            )
+            md1 = DPAtomicModel.deserialize(md0.serialize())
+
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            # hacking!
+            md1.descriptor.reinit_exclude(pair_excl)
+            md1.fitting.reinit_exclude(atom_excl)
+
+            # check energy consistency
+            args = [self.coord_ext, self.atype_ext, self.nlist]
+            ret0 = md0.forward_common_atomic(*args)
+            ret1 = md1.forward_common_atomic(*args)
+            np.testing.assert_allclose(
+                ret0["energy"],
+                ret1["energy"],
+            )
+
+            # check output def
+            out_names = [vv.name for vv in md0.atomic_output_def().get_data().values()]
+            self.assertEqual(out_names, ["energy", "mask"])
+            if atom_excl != []:
+                for ii in md0.atomic_output_def().get_data().values():
+                    if ii.name == "mask":
+                        self.assertEqual(ii.shape, [1])
+                        self.assertFalse(ii.reduciable)
+                        self.assertFalse(ii.r_differentiable)
+                        self.assertFalse(ii.c_differentiable)
+
+            # check mask
+            if atom_excl == []:
+                pass
+            elif atom_excl == [1]:
+                self.assertIn("mask", ret0.keys())
+                expected = np.array([1, 1, 0], dtype=int)
+                expected = np.concatenate(
+                    [expected, expected[self.perm[: self.nloc]]]
+                ).reshape(2, 3)
+                np.testing.assert_array_equal(ret0["mask"], expected)
+            else:
+                raise ValueError(f"not expected atom_excl {atom_excl}")
+
+
+class TestDPAtomicModelVirtualConsistency(unittest.TestCase):
+    def setUp(self):
+        self.case0 = TestCaseSingleFrameWithNlist()
+        self.case1 = TestCaseSingleFrameWithNlistWithVirtual()
+        self.case0.setUp()
+        self.case1.setUp()
+
+    def test_virtual_consistency(self):
+        nf, _, _ = self.case0.nlist.shape
+        ds = DescrptSeA(
+            self.case0.rcut,
+            self.case0.rcut_smth,
+            self.case0.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.case0.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md1 = DPAtomicModel(ds, ft, type_map=type_map)
+
+        args0 = [self.case0.coord_ext, self.case0.atype_ext, self.case0.nlist]
+        # args0 = [np.array(ii) for ii in args0]
+        args1 = [self.case1.coord_ext, self.case1.atype_ext, self.case1.nlist]
+        # args1 = [np.array(ii) for ii in args1]
+
+        ret0 = md1.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+
+        for dd in range(self.case0.nf):
+            np.testing.assert_allclose(
+                ret0["energy"][dd],
+                ret1["energy"][dd, self.case1.get_real_mapping[dd], :],
+            )
+        expected_mask = np.array(
+            [
+                [1, 0, 1, 1],
+                [1, 1, 0, 1],
+            ]
+        )
+        np.testing.assert_equal(ret1["mask"], expected_mask)
diff --git a/source/tests/common/dpmodel/test_dp_model.py b/source/tests/common/dpmodel/test_dp_model.py
new file mode 100644
index 0000000000..9121c7cd07
--- /dev/null
+++ b/source/tests/common/dpmodel/test_dp_model.py
@@ -0,0 +1,151 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    InvarFitting,
+)
+from deepmd.dpmodel.model import (
+    DPModel,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithoutNlist,
+)
+
+
+class TestDPModelLower(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize())
+
+        ret0 = md0.call_lower(self.coord_ext, self.atype_ext, self.nlist)
+        ret1 = md1.call_lower(self.coord_ext, self.atype_ext, self.nlist)
+
+        np.testing.assert_allclose(ret0["energy"], ret1["energy"])
+        np.testing.assert_allclose(ret0["energy_redu"], ret1["energy_redu"])
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md1 = DPModel(ds, ft, type_map=type_map)
+
+        args64 = [self.coord_ext, self.atype_ext, self.nlist]
+        args64[0] = args64[0].astype(np.float64)
+        args32 = [self.coord_ext, self.atype_ext, self.nlist]
+        args32[0] = args32[0].astype(np.float32)
+
+        model_l_ret_64 = md1.call_lower(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.call_lower(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if model_l_ret_32[ii] is None:
+                continue
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, np.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, np.int32)
+            np.testing.assert_allclose(
+                model_l_ret_32[ii],
+                model_l_ret_64[ii],
+            )
+
+
+class TestDPModel(unittest.TestCase, TestCaseSingleFrameWithoutNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithoutNlist.setUp(self)
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc = self.atype.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md1 = DPModel(ds, ft, type_map=type_map)
+
+        args64 = [self.coord, self.atype, self.cell]
+        args64[0] = args64[0].astype(np.float64)
+        args64[2] = args64[2].astype(np.float64)
+        args32 = [self.coord, self.atype, self.cell]
+        args32[0] = args32[0].astype(np.float32)
+        args32[2] = args32[2].astype(np.float32)
+
+        model_l_ret_64 = md1.call(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.call(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if model_l_ret_32[ii] is None:
+                continue
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, np.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, np.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, np.int32)
+            np.testing.assert_allclose(
+                model_l_ret_32[ii],
+                model_l_ret_64[ii],
+            )
diff --git a/source/tests/common/dpmodel/test_env_mat.py b/source/tests/common/dpmodel/test_env_mat.py
new file mode 100644
index 0000000000..7e1ce7cddd
--- /dev/null
+++ b/source/tests/common/dpmodel/test_env_mat.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.utils import (
+    EnvMat,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+)
+
+
+class TestEnvMat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+        em0 = EnvMat(self.rcut, self.rcut_smth)
+        em1 = EnvMat.deserialize(em0.serialize())
+        mm0, ww0 = em0.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
+        mm1, ww1 = em1.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
+        np.testing.assert_allclose(mm0, mm1)
+        np.testing.assert_allclose(ww0, ww1)
diff --git a/source/tests/common/dpmodel/test_exclusion_mask.py b/source/tests/common/dpmodel/test_exclusion_mask.py
new file mode 100644
index 0000000000..a6fdce317a
--- /dev/null
+++ b/source/tests/common/dpmodel/test_exclusion_mask.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.utils.exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+)
+
+
+class TestAtomExcludeMask(unittest.TestCase):
+    def test_build_type_exclude_mask(self):
+        nf = 2
+        nt = 3
+        exclude_types = [0, 2]
+        atype = np.array(
+            [
+                [0, 2, 1, 2, 0, 1, 0],
+                [1, 2, 0, 0, 2, 2, 1],
+            ],
+            dtype=np.int32,
+        ).reshape([nf, -1])
+        expected_mask = np.array(
+            [
+                [0, 0, 1, 0, 0, 1, 0],
+                [1, 0, 0, 0, 0, 0, 1],
+            ]
+        ).reshape([nf, -1])
+        des = AtomExcludeMask(nt, exclude_types=exclude_types)
+        mask = des.build_type_exclude_mask(atype)
+        np.testing.assert_equal(mask, expected_mask)
+
+
+# to be merged with the tf test case
+class TestPairExcludeMask(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_build_type_exclude_mask(self):
+        exclude_types = [[0, 1]]
+        expected_mask = np.array(
+            [
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+            ]
+        ).reshape(self.nf, self.nloc, sum(self.sel))
+        des = PairExcludeMask(self.nt, exclude_types=exclude_types)
+        mask = des.build_type_exclude_mask(
+            self.nlist,
+            self.atype_ext,
+        )
+        np.testing.assert_equal(mask, expected_mask)
diff --git a/source/tests/common/dpmodel/test_fitting_invar_fitting.py b/source/tests/common/dpmodel/test_fitting_invar_fitting.py
new file mode 100644
index 0000000000..87eeb9e06b
--- /dev/null
+++ b/source/tests/common/dpmodel/test_fitting_invar_fitting.py
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    InvarFitting,
+)
+
+from .case_single_frame_with_nlist import (
+    TestCaseSingleFrameWithNlist,
+)
+
+
+class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
+        atype = self.atype_ext[:, :nloc]
+
+        for (
+            mixed_types,
+            od,
+            nfp,
+            nap,
+            et,
+        ) in itertools.product(
+            [True, False],
+            [1, 2],
+            [0, 3],
+            [0, 4],
+            [[], [0], [1]],
+        ):
+            ifn0 = InvarFitting(
+                "energy",
+                self.nt,
+                ds.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+            )
+            ifn1 = InvarFitting.deserialize(ifn0.serialize())
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp))
+            else:
+                ifp = None
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap))
+            else:
+                iap = None
+            ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+            ret1 = ifn1(dd[0], atype, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(ret0["energy"], ret1["energy"])
+            sel_set = set(ifn0.get_sel_type())
+            exclude_set = set(et)
+            self.assertEqual(sel_set | exclude_set, set(range(self.nt)))
+            self.assertEqual(sel_set & exclude_set, set())
+
+    def test_mask(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
+        atype = self.atype_ext[:, :nloc]
+        od = 2
+        mixed_types = True
+        # exclude type 1
+        et = [1]
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            ds.dim_out,
+            od,
+            mixed_types=mixed_types,
+            exclude_types=et,
+        )
+        ret0 = ifn0(dd[0], atype)
+        # atom index 2 is of type 1 that is excluded
+        zero_idx = 2
+        np.testing.assert_allclose(
+            ret0["energy"][0, zero_idx, :],
+            np.zeros_like(ret0["energy"][0, zero_idx, :]),
+        )
+        zero_idx = 0
+        np.testing.assert_allclose(
+            ret0["energy"][1, zero_idx, :],
+            np.zeros_like(ret0["energy"][1, zero_idx, :]),
+        )
+
+    def test_self_exception(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
+        atype = self.atype_ext[:, :nloc]
+
+        for (
+            mixed_types,
+            od,
+            nfp,
+            nap,
+        ) in itertools.product(
+            [True, False],
+            [1, 2],
+            [0, 3],
+            [0, 4],
+        ):
+            ifn0 = InvarFitting(
+                "energy",
+                self.nt,
+                ds.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+            )
+
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp))
+            else:
+                ifp = None
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap))
+            else:
+                iap = None
+            with self.assertRaises(ValueError) as context:
+                ret0 = ifn0(dd[0][:, :, :-2], atype, fparam=ifp, aparam=iap)
+                self.assertIn("input descriptor", context.exception)
+
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp - 1))
+                with self.assertRaises(ValueError) as context:
+                    ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+                    self.assertIn("input fparam", context.exception)
+
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap - 1))
+                with self.assertRaises(ValueError) as context:
+                    ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+                    self.assertIn("input aparam", context.exception)
+
+    def test_get_set(self):
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            3,
+            1,
+        )
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ii in [
+            "bias_atom_e",
+            "fparam_avg",
+            "fparam_inv_std",
+            "aparam_avg",
+            "aparam_inv_std",
+        ]:
+            ifn0[ii] = foo
+            np.testing.assert_allclose(foo, ifn0[ii])
diff --git a/source/tests/common/dpmodel/test_linear_atomic_model.py b/source/tests/common/dpmodel/test_linear_atomic_model.py
new file mode 100644
index 0000000000..832d1de106
--- /dev/null
+++ b/source/tests/common/dpmodel/test_linear_atomic_model.py
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.dpmodel.atomic_model.linear_atomic_model import (
+    DPZBLLinearEnergyAtomicModel,
+)
+from deepmd.dpmodel.atomic_model.pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+from deepmd.dpmodel.descriptor.se_e2_a import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting.invar_fitting import (
+    InvarFitting,
+)
+
+
+class TestWeightCalculation(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_pairwise(self, mock_loadtxt):
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.05, 1.0, 2.0, 3.0],
+                [0.1, 0.8, 1.6, 2.4],
+                [0.15, 0.5, 1.0, 1.5],
+                [0.2, 0.25, 0.4, 0.75],
+                [0.25, 0.0, 0.0, 0.0],
+            ]
+        )
+        extended_atype = np.array([[0, 0]])
+        nlist = np.array([[[1], [-1]]])
+
+        ds = DescrptSeA(
+            rcut_smth=0.3,
+            rcut=0.4,
+            sel=[3],
+        )
+        ft = InvarFitting(
+            "energy",
+            2,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+
+        type_map = ["foo", "bar"]
+        zbl_model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.3, sel=2, type_map=type_map
+        )
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map)
+
+        wgt_model = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        )
+        wgt_res = []
+        for dist in np.linspace(0.05, 0.3, 10):
+            extended_coord = np.array(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ]
+            )
+
+            wgt_model.forward_atomic(extended_coord, extended_atype, nlist)
+
+            wgt_res.append(wgt_model.zbl_weight)
+        results = np.stack(wgt_res).reshape(10, 2)
+        excepted_res = np.array(
+            [
+                [1.0, 0.0],
+                [1.0, 0.0],
+                [0.9995, 0.0],
+                [0.9236, 0.0],
+                [0.6697, 0.0],
+                [0.3303, 0.0],
+                [0.0764, 0.0],
+                [0.0005, 0.0],
+                [0.0, 0.0],
+                [0.0, 0.0],
+            ],
+        )
+        np.testing.assert_allclose(results, excepted_res, rtol=0.0001, atol=0.0001)
+
+
+class TestIntegration(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt):
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut_smth = 0.4
+        self.rcut = 2.2
+
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map)
+        zbl_model = PairTabAtomicModel(
+            file_path, self.rcut, sum(self.sel), type_map=type_map
+        )
+        self.md0 = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        )
+        self.md1 = DPZBLLinearEnergyAtomicModel.deserialize(self.md0.serialize())
+
+    def test_self_consistency(self):
+        ret0 = self.md0.forward_atomic(self.coord_ext, self.atype_ext, self.nlist)
+        ret1 = self.md1.forward_atomic(self.coord_ext, self.atype_ext, self.nlist)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            ret1["energy"],
+        )
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/common/dpmodel/test_neighbor_stat.py b/source/tests/common/dpmodel/test_neighbor_stat.py
new file mode 100644
index 0000000000..2a9296057b
--- /dev/null
+++ b/source/tests/common/dpmodel/test_neighbor_stat.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import shutil
+import unittest
+
+import dpdata
+import numpy as np
+
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
+
+
+def gen_sys(nframes):
+    natoms = 1000
+    data = {}
+    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
+    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
+    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
+    data["forces"] = np.random.default_rng().random([nframes, natoms, 3])
+    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
+        1, 3, 3
+    )
+    data["energies"] = np.random.default_rng().random([nframes, 1])
+    data["atom_names"] = ["TYPE"]
+    data["atom_numbs"] = [27]
+    data["atom_types"] = np.repeat(0, 27)
+    return data
+
+
+class TestNeighborStat(unittest.TestCase):
+    def setUp(self):
+        data0 = gen_sys(1)
+        sys0 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys0.to_deepmd_npy("system_0", set_size=1)
+
+    def tearDown(self):
+        shutil.rmtree("system_0")
+
+    def test_neighbor_stat(self):
+        for rcut in (0.0, 1.0, 2.0, 4.0):
+            for mixed_type in (True, False):
+                with self.subTest(rcut=rcut, mixed_type=mixed_type):
+                    rcut += 1e-3  # prevent numerical errors
+                    min_nbor_dist, max_nbor_size = neighbor_stat(
+                        system="system_0",
+                        rcut=rcut,
+                        type_map=["TYPE", "NO_THIS_TYPE"],
+                        mixed_type=mixed_type,
+                        backend="numpy",
+                    )
+                    upper = np.ceil(rcut) + 1
+                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
+                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
+                    # distance to (0,0,0)
+                    distance = np.linalg.norm(positions, axis=1)
+                    expected_neighbors = np.count_nonzero(
+                        np.logical_and(distance > 0, distance <= rcut)
+                    )
+                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
+                    ret = [expected_neighbors]
+                    if not mixed_type:
+                        ret.append(0)
+                    np.testing.assert_array_equal(max_nbor_size, ret)
diff --git a/source/tests/test_model_format_utils.py b/source/tests/common/dpmodel/test_network.py
similarity index 78%
rename from source/tests/test_model_format_utils.py
rename to source/tests/common/dpmodel/test_network.py
index f588647096..047eee501c 100644
--- a/source/tests/test_model_format_utils.py
+++ b/source/tests/common/dpmodel/test_network.py
@@ -8,10 +8,8 @@
 
 import numpy as np
 
-from deepmd_utils.model_format import (
-    DescrptSeA,
+from deepmd.dpmodel.utils import (
     EmbeddingNet,
-    EnvMat,
     FittingNet,
     NativeLayer,
     NativeNet,
@@ -57,7 +55,7 @@ def test_shape_error(self):
         self.b1 = np.full((3,), 4.0)
         self.idt0 = np.full((2,), 4.0)
         with self.assertRaises(ValueError) as context:
-            network = NativeLayer.deserialize(
+            NativeLayer.deserialize(
                 {
                     "activation_function": "tanh",
                     "resnet": True,
@@ -66,7 +64,7 @@ def test_shape_error(self):
             )
             assert "not equalt to shape of b" in context.exception
         with self.assertRaises(ValueError) as context:
-            network = NativeLayer.deserialize(
+            NativeLayer.deserialize(
                 {
                     "activation_function": "tanh",
                     "resnet": True,
@@ -136,7 +134,7 @@ def test_deserialize(self):
 
     def test_shape_error(self):
         with self.assertRaises(ValueError) as context:
-            network = NativeNet.deserialize(
+            NativeNet.deserialize(
                 {
                     "layers": [
                         {
@@ -265,7 +263,7 @@ def test_zero_dim(self):
         )
 
 
-class TestDPModel(unittest.TestCase):
+class TestSaveLoadDPModel(unittest.TestCase):
     def setUp(self) -> None:
         self.w = np.full((3, 2), 3.0)
         self.b = np.full((3,), 4.0)
@@ -284,10 +282,10 @@ def setUp(self) -> None:
                 },
             ],
         }
-        self.filename = "test_dp_model_format.dp"
+        self.filename = "test_dp_dpmodel.dp"
 
     def test_save_load_model(self):
-        save_dp_model(self.filename, deepcopy(self.model_dict))
+        save_dp_model(self.filename, {"model": deepcopy(self.model_dict)})
         model = load_dp_model(self.filename)
         np.testing.assert_equal(model["model"], self.model_dict)
         assert "software" in model
@@ -296,75 +294,3 @@ def test_save_load_model(self):
     def tearDown(self) -> None:
         if os.path.exists(self.filename):
             os.remove(self.filename)
-
-
-class TestCaseSingleFrameWithNlist:
-    def setUp(self):
-        # nloc == 3, nall == 4
-        self.nloc = 3
-        self.nall = 4
-        self.nf, self.nt = 1, 2
-        self.coord_ext = np.array(
-            [
-                [0, 0, 0],
-                [0, 1, 0],
-                [0, 0, 1],
-                [0, -2, 0],
-            ],
-            dtype=np.float64,
-        ).reshape([1, self.nall * 3])
-        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
-        # sel = [5, 2]
-        self.sel = [5, 2]
-        self.nlist = np.array(
-            [
-                [1, 3, -1, -1, -1, 2, -1],
-                [0, -1, -1, -1, -1, 2, -1],
-                [0, 1, -1, -1, -1, 0, -1],
-            ],
-            dtype=int,
-        ).reshape([1, self.nloc, sum(self.sel)])
-        self.rcut = 0.4
-        self.rcut_smth = 2.2
-
-
-class TestEnvMat(unittest.TestCase, TestCaseSingleFrameWithNlist):
-    def setUp(self):
-        TestCaseSingleFrameWithNlist.setUp(self)
-
-    def test_self_consistency(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        davg = rng.normal(size=(self.nt, nnei, 4))
-        dstd = rng.normal(size=(self.nt, nnei, 4))
-        dstd = 0.1 + np.abs(dstd)
-        em0 = EnvMat(self.rcut, self.rcut_smth)
-        em1 = EnvMat.deserialize(em0.serialize())
-        mm0, ww0 = em0.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
-        mm1, ww1 = em1.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
-        np.testing.assert_allclose(mm0, mm1)
-        np.testing.assert_allclose(ww0, ww1)
-
-
-class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
-    def setUp(self):
-        TestCaseSingleFrameWithNlist.setUp(self)
-
-    def test_self_consistency(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        davg = rng.normal(size=(self.nt, nnei, 4))
-        dstd = rng.normal(size=(self.nt, nnei, 4))
-        dstd = 0.1 + np.abs(dstd)
-
-        em0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
-        em0.davg = davg
-        em0.dstd = dstd
-        em1 = DescrptSeA.deserialize(em0.serialize())
-        mm0 = em0.call(self.coord_ext, self.atype_ext, self.nlist)
-        mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
-        np.testing.assert_allclose(mm0, mm1)
diff --git a/source/tests/common/dpmodel/test_nlist.py b/source/tests/common/dpmodel/test_nlist.py
new file mode 100644
index 0000000000..ee8a7139e7
--- /dev/null
+++ b/source/tests/common/dpmodel/test_nlist.py
@@ -0,0 +1,302 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    InvarFitting,
+)
+from deepmd.dpmodel.model import (
+    DPModel,
+)
+from deepmd.dpmodel.utils import (
+    build_multiple_neighbor_list,
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    get_multiple_nlist_key,
+    inter2phys,
+)
+
+
+class TestDPModelFormatNlist(unittest.TestCase):
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 5
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+                [2.3, 0, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.expected_nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.atype_ext = np.array([0, 0, 1, 0, 1], dtype=int).reshape([1, self.nall])
+        self.rcut_smth = 0.4
+        self.rcut = 2.1
+
+        nf, nloc, nnei = self.expected_nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        self.md = DPModel(ds, ft, type_map=type_map)
+
+    def test_nlist_eq(self):
+        # n_nnei == nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            self.coord_ext,
+            self.atype_ext,
+            nlist,
+        )
+        np.testing.assert_allclose(self.expected_nlist, nlist1)
+
+    def test_nlist_st(self):
+        # n_nnei < nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, 2],
+                [0, -1, -1, 2],
+                [0, 1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            self.coord_ext,
+            self.atype_ext,
+            nlist,
+        )
+        np.testing.assert_allclose(self.expected_nlist, nlist1)
+
+    def test_nlist_lt(self):
+        # n_nnei > nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1, -1, 4],
+                [0, -1, 4, -1, -1, 2, -1, 3, -1],
+                [0, 1, -1, -1, -1, 4, -1, -1, 3],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            self.coord_ext,
+            self.atype_ext,
+            nlist,
+        )
+        np.testing.assert_allclose(self.expected_nlist, nlist1)
+
+
+dtype = np.float64
+
+
+class TestNeighList(unittest.TestCase):
+    def setUp(self):
+        self.nf = 3
+        self.nloc = 3
+        self.ns = 5 * 5 * 3
+        self.nall = self.ns * self.nloc
+        self.cell = np.array([[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype)
+        self.icoord = np.array([[0, 0, 0], [0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype)
+        self.atype = np.array([-1, 0, 1], dtype=np.int32)
+        [self.cell, self.icoord, self.atype] = [
+            np.expand_dims(ii, 0) for ii in [self.cell, self.icoord, self.atype]
+        ]
+        self.coord = inter2phys(self.icoord, self.cell).reshape([-1, self.nloc * 3])
+        self.cell = self.cell.reshape([-1, 9])
+        [self.cell, self.coord, self.atype] = [
+            np.tile(ii, [self.nf, 1]) for ii in [self.cell, self.coord, self.atype]
+        ]
+        self.rcut = 1.01
+        self.prec = 1e-10
+        self.nsel = [10, 10]
+        self.ref_nlist = np.array(
+            [
+                [-1] * sum(self.nsel),
+                [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1],
+                [1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1],
+            ]
+        )
+
+    def test_build_notype(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            sum(self.nsel),
+            distinguish_types=False,
+        )
+        np.testing.assert_allclose(nlist[0], nlist[1])
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        np.testing.assert_allclose(
+            np.sort(nlist_loc, axis=-1),
+            np.sort(self.ref_nlist, axis=-1),
+        )
+
+    def test_build_type(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            self.nsel,
+            distinguish_types=True,
+        )
+        np.testing.assert_allclose(nlist[0], nlist[1])
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        for ii in range(2):
+            np.testing.assert_allclose(
+                np.sort(np.split(nlist_loc, self.nsel, axis=-1)[ii], axis=-1),
+                np.sort(np.split(self.ref_nlist, self.nsel, axis=-1)[ii], axis=-1),
+            )
+
+    def test_build_multiple_nlist(self):
+        rcuts = [1.01, 2.01]
+        nsels = [20, 80]
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, max(rcuts)
+        )
+        nlist1 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[1],
+            nsels[1] - 1,
+            distinguish_types=False,
+        )
+        pad = -1 * np.ones([self.nf, self.nloc, 1], dtype=nlist1.dtype)
+        nlist2 = np.concatenate([nlist1, pad], axis=-1)
+        nlist0 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[0],
+            nsels[0],
+            distinguish_types=False,
+        )
+        nlists = build_multiple_neighbor_list(ecoord, nlist1, rcuts, nsels)
+        for dd in range(2):
+            self.assertEqual(
+                nlists[get_multiple_nlist_key(rcuts[dd], nsels[dd])].shape[-1],
+                nsels[dd],
+            )
+        np.testing.assert_allclose(
+            nlists[get_multiple_nlist_key(rcuts[0], nsels[0])],
+            nlist0,
+        )
+        np.testing.assert_allclose(
+            nlists[get_multiple_nlist_key(rcuts[1], nsels[1])],
+            nlist2,
+        )
+
+    def test_extend_coord(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        # expected ncopy x nloc
+        self.assertEqual(list(ecoord.shape), [self.nf, self.nall * 3])
+        self.assertEqual(list(eatype.shape), [self.nf, self.nall])
+        self.assertEqual(list(mapping.shape), [self.nf, self.nall])
+        # check the nloc part is identical with original coord
+        np.testing.assert_allclose(
+            ecoord[:, : self.nloc * 3], self.coord, rtol=self.prec, atol=self.prec
+        )
+        # check the shift vectors are aligned with grid
+        shift_vec = (
+            ecoord.reshape([-1, self.ns, self.nloc, 3])
+            - self.coord.reshape([-1, self.nloc, 3])[:, None, :, :]
+        )
+        shift_vec = shift_vec.reshape([-1, self.nall, 3])
+        # hack!!! assumes identical cell across frames
+        shift_vec = np.matmul(
+            shift_vec, np.linalg.inv(self.cell.reshape([self.nf, 3, 3])[0])
+        )
+        # nf x nall x 3
+        shift_vec = np.round(shift_vec)
+        # check: identical shift vecs
+        np.testing.assert_allclose(
+            shift_vec[0], shift_vec[1], rtol=self.prec, atol=self.prec
+        )
+        # check: shift idx aligned with grid
+        mm, cc = np.unique(shift_vec[0][:, 0], return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-2, -1, 0, 1, 2], dtype=dtype),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([self.ns * self.nloc // 5] * 5, dtype=np.int32),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = np.unique(shift_vec[1][:, 1], return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-2, -1, 0, 1, 2], dtype=dtype),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([self.ns * self.nloc // 5] * 5, dtype=np.int32),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = np.unique(shift_vec[1][:, 2], return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-1, 0, 1], dtype=dtype),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([self.ns * self.nloc // 3] * 3, dtype=np.int32),
+            rtol=self.prec,
+            atol=self.prec,
+        )
diff --git a/source/tests/common/dpmodel/test_output_def.py b/source/tests/common/dpmodel/test_output_def.py
new file mode 100644
index 0000000000..27fa54ea4c
--- /dev/null
+++ b/source/tests/common/dpmodel/test_output_def.py
@@ -0,0 +1,761 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    List,
+)
+
+import numpy as np
+
+from deepmd.dpmodel import (
+    FittingOutputDef,
+    ModelOutputDef,
+    NativeOP,
+    OutputVariableDef,
+    fitting_check_output,
+    model_check_output,
+)
+from deepmd.dpmodel.output_def import (
+    OutputVariableCategory,
+    OutputVariableOperation,
+    apply_operation,
+    check_var,
+)
+
+
+class VariableDef:
+    def __init__(
+        self,
+        name: str,
+        shape: List[int],
+        atomic: bool = True,
+    ):
+        self.name = name
+        self.shape = list(shape)
+        self.atomic = atomic
+
+
+class TestDef(unittest.TestCase):
+    def test_model_output_def(self):
+        defs = [
+            OutputVariableDef(
+                "energy",
+                [1],
+                reduciable=True,
+                r_differentiable=True,
+                c_differentiable=True,
+                atomic=True,
+                r_hessian=False,
+            ),
+            OutputVariableDef(
+                "energy2",
+                [1],
+                reduciable=True,
+                r_differentiable=True,
+                c_differentiable=True,
+                atomic=True,
+                r_hessian=True,
+            ),
+            OutputVariableDef(
+                "energy3",
+                [1],
+                reduciable=True,
+                r_differentiable=True,
+                c_differentiable=True,
+                atomic=True,
+                magnetic=True,
+            ),
+            OutputVariableDef(
+                "dos",
+                [10],
+                reduciable=True,
+                r_differentiable=False,
+                c_differentiable=False,
+                atomic=True,
+            ),
+            OutputVariableDef(
+                "foo",
+                [3],
+                reduciable=False,
+                r_differentiable=False,
+                c_differentiable=False,
+                atomic=True,
+            ),
+        ]
+        # fitting definition
+        fd = FittingOutputDef(defs)
+        expected_keys = ["energy", "energy2", "energy3", "dos", "foo"]
+        self.assertEqual(
+            set(expected_keys),
+            set(fd.keys()),
+        )
+        # shape
+        self.assertEqual(fd["energy"].shape, [1])
+        self.assertEqual(fd["energy2"].shape, [1])
+        self.assertEqual(fd["energy3"].shape, [1])
+        self.assertEqual(fd["dos"].shape, [10])
+        self.assertEqual(fd["foo"].shape, [3])
+        # atomic
+        self.assertEqual(fd["energy"].atomic, True)
+        self.assertEqual(fd["energy2"].atomic, True)
+        self.assertEqual(fd["energy3"].atomic, True)
+        self.assertEqual(fd["dos"].atomic, True)
+        self.assertEqual(fd["foo"].atomic, True)
+        # reduce
+        self.assertEqual(fd["energy"].reduciable, True)
+        self.assertEqual(fd["energy2"].reduciable, True)
+        self.assertEqual(fd["energy3"].reduciable, True)
+        self.assertEqual(fd["dos"].reduciable, True)
+        self.assertEqual(fd["foo"].reduciable, False)
+        # derivative
+        self.assertEqual(fd["energy"].r_differentiable, True)
+        self.assertEqual(fd["energy"].c_differentiable, True)
+        self.assertEqual(fd["energy"].r_hessian, False)
+        self.assertEqual(fd["energy2"].r_differentiable, True)
+        self.assertEqual(fd["energy2"].c_differentiable, True)
+        self.assertEqual(fd["energy2"].r_hessian, True)
+        self.assertEqual(fd["energy3"].r_differentiable, True)
+        self.assertEqual(fd["energy3"].c_differentiable, True)
+        self.assertEqual(fd["energy3"].r_hessian, False)
+        self.assertEqual(fd["dos"].r_differentiable, False)
+        self.assertEqual(fd["foo"].r_differentiable, False)
+        self.assertEqual(fd["dos"].c_differentiable, False)
+        self.assertEqual(fd["foo"].c_differentiable, False)
+        # magnetic
+        self.assertEqual(fd["energy"].magnetic, False)
+        self.assertEqual(fd["energy2"].magnetic, False)
+        self.assertEqual(fd["energy3"].magnetic, True)
+        self.assertEqual(fd["dos"].magnetic, False)
+        self.assertEqual(fd["foo"].magnetic, False)
+        # model definition
+        md = ModelOutputDef(fd)
+        expected_keys = [
+            "energy",
+            "energy2",
+            "energy3",
+            "dos",
+            "foo",
+            "energy_redu",
+            "energy_derv_r",
+            "energy_derv_c",
+            "energy_derv_c_redu",
+            "energy2_redu",
+            "energy2_derv_r",
+            "energy2_derv_r_derv_r",
+            "energy2_derv_c",
+            "energy2_derv_c_redu",
+            "energy3_redu",
+            "energy3_derv_r",
+            "energy3_derv_c",
+            "energy3_derv_c_redu",
+            "energy3_derv_r_mag",
+            "energy3_derv_c_mag",
+            "dos_redu",
+            "mask",
+            "mask_mag",
+        ]
+        self.assertEqual(
+            set(expected_keys),
+            set(md.keys()),
+        )
+        for kk in expected_keys:
+            self.assertEqual(md[kk].name, kk)
+        # reduce
+        self.assertEqual(md["energy"].reduciable, True)
+        self.assertEqual(md["energy2"].reduciable, True)
+        self.assertEqual(md["energy3"].reduciable, True)
+        self.assertEqual(md["dos"].reduciable, True)
+        self.assertEqual(md["foo"].reduciable, False)
+        # derivative
+        self.assertEqual(md["energy"].r_differentiable, True)
+        self.assertEqual(md["energy"].c_differentiable, True)
+        self.assertEqual(md["energy"].r_hessian, False)
+        self.assertEqual(md["energy2"].r_differentiable, True)
+        self.assertEqual(md["energy2"].c_differentiable, True)
+        self.assertEqual(md["energy2"].r_hessian, True)
+        self.assertEqual(md["energy3"].r_differentiable, True)
+        self.assertEqual(md["energy3"].c_differentiable, True)
+        self.assertEqual(md["energy3"].r_hessian, False)
+        self.assertEqual(md["dos"].r_differentiable, False)
+        self.assertEqual(md["foo"].r_differentiable, False)
+        self.assertEqual(md["dos"].c_differentiable, False)
+        self.assertEqual(md["foo"].c_differentiable, False)
+        # shape
+        self.assertEqual(md["mask"].shape, [1])
+        self.assertEqual(md["mask_mag"].shape, [1])
+        self.assertEqual(md["energy"].shape, [1])
+        self.assertEqual(md["energy2"].shape, [1])
+        self.assertEqual(md["energy3"].shape, [1])
+        self.assertEqual(md["dos"].shape, [10])
+        self.assertEqual(md["foo"].shape, [3])
+        self.assertEqual(md["energy_redu"].shape, [1])
+        self.assertEqual(md["energy_derv_r"].shape, [1, 3])
+        self.assertEqual(md["energy_derv_c"].shape, [1, 9])
+        self.assertEqual(md["energy_derv_c_redu"].shape, [1, 9])
+        self.assertEqual(md["energy2_redu"].shape, [1])
+        self.assertEqual(md["energy2_derv_r"].shape, [1, 3])
+        self.assertEqual(md["energy2_derv_c"].shape, [1, 9])
+        self.assertEqual(md["energy2_derv_c_redu"].shape, [1, 9])
+        self.assertEqual(md["energy2_derv_r_derv_r"].shape, [1, 3, 3])
+        self.assertEqual(md["energy3_derv_r"].shape, [1, 3])
+        self.assertEqual(md["energy3_derv_c"].shape, [1, 9])
+        self.assertEqual(md["energy3_derv_c_redu"].shape, [1, 9])
+        self.assertEqual(md["energy3_derv_r_mag"].shape, [1, 3])
+        self.assertEqual(md["energy3_derv_c_mag"].shape, [1, 9])
+        # atomic
+        self.assertEqual(md["energy"].atomic, True)
+        self.assertEqual(md["energy2"].atomic, True)
+        self.assertEqual(md["dos"].atomic, True)
+        self.assertEqual(md["foo"].atomic, True)
+        self.assertEqual(md["energy_redu"].atomic, False)
+        self.assertEqual(md["energy_derv_r"].atomic, True)
+        self.assertEqual(md["energy_derv_c"].atomic, True)
+        self.assertEqual(md["energy_derv_c_redu"].atomic, False)
+        self.assertEqual(md["energy2_redu"].atomic, False)
+        self.assertEqual(md["energy2_derv_r"].atomic, True)
+        self.assertEqual(md["energy2_derv_c"].atomic, True)
+        self.assertEqual(md["energy2_derv_c_redu"].atomic, False)
+        self.assertEqual(md["energy2_derv_r_derv_r"].atomic, True)
+        self.assertEqual(md["energy3_redu"].atomic, False)
+        self.assertEqual(md["energy3_derv_r"].atomic, True)
+        self.assertEqual(md["energy3_derv_c"].atomic, True)
+        self.assertEqual(md["energy3_derv_r_mag"].atomic, True)
+        self.assertEqual(md["energy3_derv_c_mag"].atomic, True)
+        self.assertEqual(md["energy3_derv_c_redu"].atomic, False)
+        # category
+        self.assertEqual(md["mask"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["mask_mag"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["energy"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["energy2"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["energy3"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["dos"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["foo"].category, OutputVariableCategory.OUT)
+        self.assertEqual(md["energy_redu"].category, OutputVariableCategory.REDU)
+        self.assertEqual(md["energy_derv_r"].category, OutputVariableCategory.DERV_R)
+        self.assertEqual(md["energy_derv_c"].category, OutputVariableCategory.DERV_C)
+        self.assertEqual(
+            md["energy_derv_c_redu"].category, OutputVariableCategory.DERV_C_REDU
+        )
+        self.assertEqual(md["energy2_redu"].category, OutputVariableCategory.REDU)
+        self.assertEqual(md["energy2_derv_r"].category, OutputVariableCategory.DERV_R)
+        self.assertEqual(md["energy2_derv_c"].category, OutputVariableCategory.DERV_C)
+        self.assertEqual(
+            md["energy2_derv_c_redu"].category, OutputVariableCategory.DERV_C_REDU
+        )
+        self.assertEqual(
+            md["energy2_derv_r_derv_r"].category, OutputVariableCategory.DERV_R_DERV_R
+        )
+        self.assertEqual(md["energy3_redu"].category, OutputVariableCategory.REDU)
+        self.assertEqual(md["energy3_derv_r"].category, OutputVariableCategory.DERV_R)
+        self.assertEqual(md["energy3_derv_c"].category, OutputVariableCategory.DERV_C)
+        self.assertEqual(
+            md["energy3_derv_c_redu"].category, OutputVariableCategory.DERV_C_REDU
+        )
+        self.assertEqual(
+            md["energy3_derv_r_mag"].category, OutputVariableCategory.DERV_R
+        )
+        self.assertEqual(
+            md["energy3_derv_c_mag"].category, OutputVariableCategory.DERV_C
+        )
+        # flag
+        OVO = OutputVariableOperation
+        self.assertEqual(md["energy"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["energy"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy2"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy2"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["energy2"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy3"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy3"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["energy3"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["dos"].category & OVO.REDU, 0)
+        self.assertEqual(md["dos"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["dos"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["foo"].category & OVO.REDU, 0)
+        self.assertEqual(md["foo"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["foo"].category & OVO.DERV_C, 0)
+        # flag: energy
+        self.assertEqual(
+            md["energy_redu"].category & OVO.REDU,
+            OVO.REDU,
+        )
+        self.assertEqual(md["energy_redu"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["energy_redu"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy_derv_r"].category & OVO.REDU, 0)
+        self.assertEqual(
+            md["energy_derv_r"].category & OVO.DERV_R,
+            OVO.DERV_R,
+        )
+        self.assertEqual(md["energy_derv_r"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy_derv_c"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy_derv_c"].category & OVO.DERV_R, 0)
+        self.assertEqual(
+            md["energy_derv_c"].category & OVO.DERV_C,
+            OVO.DERV_C,
+        )
+        self.assertEqual(
+            md["energy_derv_c_redu"].category & OVO.REDU,
+            OVO.REDU,
+        )
+        self.assertEqual(md["energy_derv_c_redu"].category & OVO.DERV_R, 0)
+        self.assertEqual(
+            md["energy_derv_c_redu"].category & OVO.DERV_C,
+            OVO.DERV_C,
+        )
+        # flag: energy2
+        kk = "energy2_redu"
+        self.assertEqual(md[kk].category & OVO.REDU, OVO.REDU)
+        self.assertEqual(md[kk].category & OVO.DERV_R, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_C, 0)
+        self.assertEqual(md[kk].category & OVO._SEC_DERV_R, 0)
+        kk = "energy2_derv_r"
+        self.assertEqual(md[kk].category & OVO.REDU, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_R, OVO.DERV_R)
+        self.assertEqual(md[kk].category & OVO.DERV_C, 0)
+        self.assertEqual(md[kk].category & OVO._SEC_DERV_R, 0)
+        kk = "energy2_derv_c"
+        self.assertEqual(md[kk].category & OVO.REDU, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_R, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_C, OVO.DERV_C)
+        self.assertEqual(md[kk].category & OVO._SEC_DERV_R, 0)
+        kk = "energy2_derv_c_redu"
+        self.assertEqual(md[kk].category & OVO.REDU, OVO.REDU)
+        self.assertEqual(md[kk].category & OVO.DERV_R, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_C, OVO.DERV_C)
+        self.assertEqual(md[kk].category & OVO._SEC_DERV_R, 0)
+        kk = "energy2_derv_r_derv_r"
+        self.assertEqual(md[kk].category & OVO.REDU, 0)
+        self.assertEqual(md[kk].category & OVO.DERV_R, OVO.DERV_R)
+        self.assertEqual(md[kk].category & OVO.DERV_C, 0)
+        self.assertEqual(md[kk].category & OVO._SEC_DERV_R, OVO._SEC_DERV_R)
+        # flag: energy3
+        self.assertEqual(
+            md["energy3_redu"].category & OVO.REDU,
+            OVO.REDU,
+        )
+        self.assertEqual(md["energy3_redu"].category & OVO.DERV_R, 0)
+        self.assertEqual(md["energy3_redu"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy3_derv_r"].category & OVO.REDU, 0)
+        self.assertEqual(
+            md["energy3_derv_r"].category & OVO.DERV_R,
+            OVO.DERV_R,
+        )
+        self.assertEqual(md["energy3_derv_r"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy3_derv_c"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy3_derv_c"].category & OVO.DERV_R, 0)
+        self.assertEqual(
+            md["energy3_derv_c"].category & OVO.DERV_C,
+            OVO.DERV_C,
+        )
+        self.assertEqual(
+            md["energy3_derv_c_redu"].category & OVO.REDU,
+            OVO.REDU,
+        )
+        self.assertEqual(md["energy3_derv_c_redu"].category & OVO.DERV_R, 0)
+        self.assertEqual(
+            md["energy3_derv_c_redu"].category & OVO.DERV_C,
+            OVO.DERV_C,
+        )
+        self.assertEqual(md["energy3_derv_r_mag"].category & OVO.REDU, 0)
+        self.assertEqual(
+            md["energy3_derv_r_mag"].category & OVO.DERV_R,
+            OVO.DERV_R,
+        )
+        self.assertEqual(md["energy3_derv_r_mag"].category & OVO.DERV_C, 0)
+        self.assertEqual(md["energy3_derv_c_mag"].category & OVO.REDU, 0)
+        self.assertEqual(md["energy3_derv_c_mag"].category & OVO.DERV_R, 0)
+        self.assertEqual(
+            md["energy3_derv_c_mag"].category & OVO.DERV_C,
+            OVO.DERV_C,
+        )
+        # apply_operation: energy
+        self.assertEqual(
+            apply_operation(md["energy"], OVO.REDU),
+            md["energy_redu"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy"], OVO.DERV_R),
+            md["energy_derv_r"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy"], OVO.DERV_C),
+            md["energy_derv_c"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy_derv_c"], OVO.REDU),
+            md["energy_derv_c_redu"].category,
+        )
+        # apply_operation: energy2
+        self.assertEqual(
+            apply_operation(md["energy2"], OVO.REDU),
+            md["energy2_redu"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy2"], OVO.DERV_R),
+            md["energy2_derv_r"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy2"], OVO.DERV_C),
+            md["energy2_derv_c"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy2_derv_c"], OVO.REDU),
+            md["energy2_derv_c_redu"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy2_derv_r"], OVO.DERV_R),
+            md["energy2_derv_r_derv_r"].category,
+        )
+        # apply_operation: energy3
+        self.assertEqual(
+            apply_operation(md["energy3"], OVO.REDU),
+            md["energy3_redu"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy3"], OVO.DERV_R),
+            md["energy3_derv_r"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy3"], OVO.DERV_C),
+            md["energy3_derv_c"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy3_derv_c"], OVO.REDU),
+            md["energy3_derv_c_redu"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy3"], OVO.DERV_R),
+            md["energy3_derv_r_mag"].category,
+        )
+        self.assertEqual(
+            apply_operation(md["energy3"], OVO.DERV_C),
+            md["energy3_derv_c_mag"].category,
+        )
+        # raise ValueError
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy_redu"], OVO.REDU)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy_derv_c"], OVO.DERV_C)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy_derv_c_redu"], OVO.REDU)
+        # raise ValueError
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy2_redu"], OVO.REDU)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy2_derv_c"], OVO.DERV_C)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy2_derv_c_redu"], OVO.REDU)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy2_derv_r_derv_r"], OVO.DERV_R)
+        # raise ValueError
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy3_redu"], OVO.REDU)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy3_derv_c"], OVO.DERV_C)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy3_derv_c_redu"], OVO.REDU)
+        with self.assertRaises(ValueError):
+            apply_operation(md["energy3_derv_c_mag"], OVO.DERV_C)
+        # hession
+        hession_cat = apply_operation(md["energy_derv_r"], OVO.DERV_R)
+        self.assertEqual(hession_cat & OVO.DERV_R, OVO.DERV_R)
+        self.assertEqual(
+            hession_cat & OVO._SEC_DERV_R,
+            OVO._SEC_DERV_R,
+        )
+        self.assertEqual(hession_cat, OutputVariableCategory.DERV_R_DERV_R)
+        hession_vardef = OutputVariableDef(
+            "energy_derv_r_derv_r", [1], False, False, category=hession_cat
+        )
+        with self.assertRaises(ValueError):
+            apply_operation(hession_vardef, OVO.DERV_R)
+
+    def test_no_raise_no_redu_deriv(self):
+        OutputVariableDef(
+            "energy",
+            [1],
+            reduciable=False,
+            r_differentiable=True,
+            c_differentiable=False,
+        )
+
+    def test_raise_requires_r_deriv(self):
+        with self.assertRaises(ValueError) as context:
+            OutputVariableDef(
+                "energy",
+                [1],
+                reduciable=True,
+                r_differentiable=False,
+                c_differentiable=True,
+            )
+
+    def test_raise_redu_not_atomic(self):
+        with self.assertRaises(ValueError) as context:
+            (OutputVariableDef("energy", [1], reduciable=True, atomic=False),)
+
+    def test_hessian_not_reducible(self):
+        with self.assertRaises(ValueError) as context:
+            (
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reduciable=False,
+                    atomic=False,
+                    r_differentiable=True,
+                    r_hessian=True,
+                ),
+            )
+
+    def test_hessian_not_r_differentiable(self):
+        with self.assertRaises(ValueError) as context:
+            (
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reduciable=True,
+                    atomic=False,
+                    r_differentiable=False,
+                    r_hessian=True,
+                ),
+            )
+
+    def test_energy_magnetic(self):
+        with self.assertRaises(ValueError) as context:
+            (
+                OutputVariableDef(
+                    "energy",
+                    [1],
+                    reduciable=False,
+                    atomic=False,
+                    r_differentiable=True,
+                    r_hessian=True,
+                    magnetic=True,
+                ),
+            )
+
+    def test_model_decorator(self):
+        nf = 2
+        nloc = 3
+        nall = 4
+
+        @model_check_output
+        class Foo(NativeOP):
+            def output_def(self):
+                defs = [
+                    OutputVariableDef(
+                        "energy",
+                        [1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                    ),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def call(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros([nf, 1]),
+                    "energy_derv_r": np.zeros([nf, nall, 1, 3]),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 9]),
+                }
+
+        ff = Foo()
+        ff()
+
+    def test_model_decorator_keyerror(self):
+        nf = 2
+        nloc = 3
+        nall = 4
+
+        @model_check_output
+        class Foo(NativeOP):
+            def __init__(self):
+                super().__init__()
+
+            def output_def(self):
+                defs = [
+                    OutputVariableDef(
+                        "energy",
+                        [1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                    ),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def call(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros([nf, 1]),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 9]),
+                }
+
+        ff = Foo()
+        with self.assertRaises(KeyError) as context:
+            ff()
+            self.assertIn("energy_derv_r", context.exception)
+
+    def test_model_decorator_shapeerror(self):
+        nf = 2
+        nloc = 3
+        nall = 4
+
+        @model_check_output
+        class Foo(NativeOP):
+            def __init__(
+                self,
+                shape_rd=[nf, 1],
+                shape_dr=[nf, nall, 1, 3],
+            ):
+                self.shape_rd, self.shape_dr = shape_rd, shape_dr
+
+            def output_def(self):
+                defs = [
+                    OutputVariableDef(
+                        "energy",
+                        [1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                    ),
+                ]
+                return ModelOutputDef(FittingOutputDef(defs))
+
+            def call(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                    "energy_redu": np.zeros(self.shape_rd),
+                    "energy_derv_r": np.zeros(self.shape_dr),
+                    "energy_derv_c": np.zeros([nf, nall, 1, 9]),
+                }
+
+        ff = Foo()
+        ff()
+        # shape of reduced energy
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_rd=[nf, nloc, 1])
+            ff()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_rd=[nf, 2])
+            ff()
+            self.assertIn("not matching", context.exception)
+        # shape of dr
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1])
+            ff()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1, 3, 3])
+            ff()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape_dr=[nf, nloc, 1, 4])
+            ff()
+            self.assertIn("not matching", context.exception)
+
+    def test_fitting_decorator(self):
+        nf = 2
+        nloc = 3
+        nall = 4
+
+        @fitting_check_output
+        class Foo(NativeOP):
+            def output_def(self):
+                defs = [
+                    OutputVariableDef(
+                        "energy",
+                        [1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                    ),
+                ]
+                return FittingOutputDef(defs)
+
+            def call(self):
+                return {
+                    "energy": np.zeros([nf, nloc, 1]),
+                }
+
+        ff = Foo()
+        ff()
+
+    def test_fitting_decorator_shapeerror(self):
+        nf = 2
+        nloc = 3
+
+        @fitting_check_output
+        class Foo(NativeOP):
+            def __init__(
+                self,
+                shape=[nf, nloc, 1],
+            ):
+                self.shape = shape
+
+            def output_def(self):
+                defs = [
+                    OutputVariableDef(
+                        "energy",
+                        [1],
+                        reduciable=True,
+                        r_differentiable=True,
+                        c_differentiable=True,
+                    ),
+                ]
+                return FittingOutputDef(defs)
+
+            def call(self):
+                return {
+                    "energy": np.zeros(self.shape),
+                }
+
+        ff = Foo()
+        ff()
+        # shape of reduced energy
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape=[nf, 1])
+            ff()
+            self.assertIn("not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            ff = Foo(shape=[nf, nloc, 2])
+            ff()
+            self.assertIn("not matching", context.exception)
+
+    def test_check_var(self):
+        var_def = VariableDef("foo", [2, 3], atomic=True)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 3, 2, 3]), var_def)
+
+        var_def = VariableDef("foo", [2, 3], atomic=False)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 2, 3]), var_def)
+
+        var_def = VariableDef("foo", [2, -1], atomic=True)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 3, 2, 8]), var_def)
+
+        var_def = VariableDef("foo", [2, -1], atomic=False)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4, 5]), var_def)
+            self.assertIn("length not matching", context.exception)
+        with self.assertRaises(ValueError) as context:
+            check_var(np.zeros([2, 3, 4]), var_def)
+            self.assertIn("shape not matching", context.exception)
+        check_var(np.zeros([2, 2, 8]), var_def)
diff --git a/source/tests/common/dpmodel/test_pairtab_atomic_model.py b/source/tests/common/dpmodel/test_pairtab_atomic_model.py
new file mode 100644
index 0000000000..e2866d3766
--- /dev/null
+++ b/source/tests/common/dpmodel/test_pairtab_atomic_model.py
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.atomic_model.pairtab_atomic_model import (
+    PairTabAtomicModel,
+)
+
+
+class TestPairTab(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+
+        self.model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.02, sel=2, type_map=["H", "O"]
+        )
+
+        self.extended_coord = np.array(
+            [
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.02, 0.01, 0.01],
+                ],
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.05, 0.01, 0.01],
+                ],
+            ]
+        )
+
+        # nframes=2, nall=4
+        self.extended_atype = np.array([[0, 1, 0, 1], [0, 0, 1, 1]])
+
+        # nframes=2, nloc=2, nnei=2
+        self.nlist = np.array([[[1, 2], [0, 2]], [[1, 2], [0, 3]]])
+
+    def test_without_mask(self):
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = np.array([[[1.2000], [1.3614]], [[1.2000], [0.4000]]])
+
+        np.testing.assert_allclose(result["energy"], expected_result, 0.0001, 0.0001)
+
+    def test_with_mask(self):
+        self.nlist = np.array([[[1, -1], [0, 2]], [[1, 2], [0, 3]]])
+
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = np.array([[[0.8000], [1.3614]], [[1.2000], [0.4000]]])
+
+        np.testing.assert_allclose(result["energy"], expected_result, 0.0001, 0.0001)
+
+    def test_deserialize(self):
+        model1 = PairTabAtomicModel.deserialize(self.model.serialize())
+        np.testing.assert_allclose(self.model.tab_data, model1.tab_data)
+        np.testing.assert_allclose(self.model.tab_info, model1.tab_info)
+
+        self.nlist = np.array([[[1, -1], [0, 2]], [[1, 2], [0, 3]]])
+        result = model1.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+
+        np.testing.assert_allclose(
+            result["energy"], expected_result["energy"], 0.0001, 0.0001
+        )
+
+
+class TestPairTabTwoAtoms(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_extrapolation_nonzero_rmax(self, mock_loadtxt) -> None:
+        """Scenarios to test.
+
+        rcut < rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut == rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut > rmax:
+            rr < rmax: use table values, or interpolate.
+            rr == rmax: use table values, or interpolate.
+            rmax < rr < rcut: extrapolate
+            rr >= rcut: should be 0
+
+        """
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0],
+                [0.01, 0.8],
+                [0.015, 0.5],
+                [0.02, 0.25],
+            ]
+        )
+
+        # nframes=1, nall=2
+        extended_atype = np.array([[0, 0]])
+
+        # nframes=1, nloc=2, nnei=1
+        nlist = np.array([[[1], [-1]]])
+
+        results = []
+
+        for dist, rcut in zip(
+            [
+                0.01,
+                0.015,
+                0.020,
+                0.015,
+                0.02,
+                0.021,
+                0.015,
+                0.02,
+                0.021,
+                0.025,
+                0.026,
+                0.025,
+                0.025,
+                0.0216161,
+            ],
+            [
+                0.015,
+                0.015,
+                0.015,
+                0.02,
+                0.02,
+                0.02,
+                0.022,
+                0.022,
+                0.022,
+                0.025,
+                0.025,
+                0.03,
+                0.035,
+                0.025,
+            ],
+        ):
+            extended_coord = np.array(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ]
+            )
+
+            model = PairTabAtomicModel(
+                tab_file=file_path, rcut=rcut, sel=2, type_map=["S"]
+            )
+            results.append(
+                model.forward_atomic(extended_coord, extended_atype, nlist)["energy"]
+            )
+
+        expected_result = np.stack(
+            [
+                np.array(
+                    [
+                        [
+                            [0.4, 0],
+                            [0.0, 0],
+                            [0.0, 0],
+                            [0.25, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.25, 0],
+                            [0.125, 0],
+                            [0.0922, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.0923, 0],
+                            [0.0713, 0],
+                        ]
+                    ]
+                )
+            ]
+        ).reshape(14, 2)
+        results = np.stack(results).reshape(14, 2)
+
+        np.testing.assert_allclose(results, expected_result, 0.0001, 0.0001)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/common/dpmodel/test_pairtab_preprocess.py b/source/tests/common/dpmodel/test_pairtab_preprocess.py
new file mode 100644
index 0000000000..da3b9251f7
--- /dev/null
+++ b/source/tests/common/dpmodel/test_pairtab_preprocess.py
@@ -0,0 +1,279 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+
+class TestPairTabPreprocessExtrapolate(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+
+        self.tab1 = PairTab(filename=file_path, rcut=0.028)
+        self.tab2 = PairTab(filename=file_path, rcut=0.02)
+        self.tab3 = PairTab(filename=file_path, rcut=0.022)
+        self.tab4 = PairTab(filename=file_path, rcut=0.03)
+        self.tab5 = PairTab(filename=file_path, rcut=0.032)
+
+    def test_deserialize(self):
+        deserialized_tab = PairTab.deserialize(self.tab1.serialize())
+        np.testing.assert_allclose(self.tab1.vdata, deserialized_tab.vdata)
+        np.testing.assert_allclose(self.tab1.rmin, deserialized_tab.rmin)
+        np.testing.assert_allclose(self.tab1.rmax, deserialized_tab.rmax)
+        np.testing.assert_allclose(self.tab1.hh, deserialized_tab.hh)
+        np.testing.assert_allclose(self.tab1.ntypes, deserialized_tab.ntypes)
+        np.testing.assert_allclose(self.tab1.rcut, deserialized_tab.rcut)
+        np.testing.assert_allclose(self.tab1.nspline, deserialized_tab.nspline)
+        np.testing.assert_allclose(self.tab1.tab_info, deserialized_tab.tab_info)
+        np.testing.assert_allclose(self.tab1.tab_data, deserialized_tab.tab_data)
+
+    def test_preprocess(self):
+        np.testing.assert_allclose(
+            self.tab1.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+        np.testing.assert_allclose(
+            self.tab2.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+        # for this test case, the table does not decay to zero at rcut = 0.22,
+        # in the cubic spline code, we use a fixed size grid, if will be a problem if we introduce variable gird size.
+        # we will do post process to overwrite spline coefficient `a3`,`a2`,`a1`,`a0`, to ensure energy decays to `0`.
+        np.testing.assert_allclose(
+            self.tab3.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+        np.testing.assert_allclose(
+            self.tab4.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+        np.testing.assert_allclose(
+            self.tab5.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.12468, 0.1992, 0.3741],
+                    [0.03, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+
+class TestPairTabPreprocessZero(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+                [0.025, 0.0, 0.0, 0.0],
+            ]
+        )
+
+        self.tab1 = PairTab(filename=file_path, rcut=0.023)
+        self.tab2 = PairTab(filename=file_path, rcut=0.025)
+        self.tab3 = PairTab(filename=file_path, rcut=0.028)
+        self.tab4 = PairTab(filename=file_path, rcut=0.033)
+
+    def test_preprocess(self):
+        np.testing.assert_allclose(
+            self.tab1.vdata,
+            np.array(
+                [
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                ]
+            ),
+        )
+        np.testing.assert_allclose(
+            self.tab2.vdata,
+            np.array(
+                [
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                ]
+            ),
+        )
+
+        np.testing.assert_allclose(
+            self.tab3.vdata,
+            np.array(
+                [
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                    [0.03, 0.0, 0.0, 0.0],
+                ]
+            ),
+        )
+
+        np.testing.assert_allclose(
+            self.tab4.vdata,
+            np.array(
+                [
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.0, 0.0],
+                    [0.03, 0.0, 0.0, 0.0],
+                    [0.035, 0.0, 0.0, 0.0],
+                ]
+            ),
+        )
+
+
+class TestPairTabPreprocessUneven(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+                [0.025, 0.0, 0.1, 0.0],
+            ]
+        )
+
+        self.tab1 = PairTab(filename=file_path, rcut=0.025)
+        self.tab2 = PairTab(filename=file_path, rcut=0.028)
+        self.tab3 = PairTab(filename=file_path, rcut=0.03)
+        self.tab4 = PairTab(filename=file_path, rcut=0.037)
+
+    def test_preprocess(self):
+        np.testing.assert_allclose(
+            self.tab1.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.1, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+        np.testing.assert_allclose(
+            self.tab2.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.1, 0.0],
+                    [0.03, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+        np.testing.assert_allclose(
+            self.tab3.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.1, 0.0],
+                    [0.03, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-04,
+            atol=1e-04,
+        )
+
+        np.testing.assert_allclose(
+            self.tab4.vdata,
+            np.array(
+                [
+                    [0.005, 1.0, 2.0, 3.0],
+                    [0.01, 0.8, 1.6, 2.4],
+                    [0.015, 0.5, 1.0, 1.5],
+                    [0.02, 0.25, 0.4, 0.75],
+                    [0.025, 0.0, 0.1, 0.0],
+                    [0.03, 0.0, 0.04963, 0.0],
+                    [0.035, 0.0, 0.0, 0.0],
+                ]
+            ),
+            rtol=1e-03,
+            atol=1e-03,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/common/dpmodel/test_region.py b/source/tests/common/dpmodel/test_region.py
new file mode 100644
index 0000000000..8043c8c985
--- /dev/null
+++ b/source/tests/common/dpmodel/test_region.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.utils import (
+    inter2phys,
+    to_face_distance,
+)
+
+
+class TestRegion(unittest.TestCase):
+    def setUp(self):
+        self.cell = np.array(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]],
+        )
+        self.cell = np.reshape(self.cell, [1, 1, -1, 3])
+        self.cell = np.tile(self.cell, [4, 5, 1, 1])
+        self.prec = 1e-8
+
+    def test_inter_to_phys(self):
+        rng = np.random.default_rng()
+        inter = rng.normal(size=[4, 5, 3, 3])
+        phys = inter2phys(inter, self.cell)
+        for ii in range(4):
+            for jj in range(5):
+                expected_phys = np.matmul(inter[ii, jj], self.cell[ii, jj])
+                np.testing.assert_allclose(
+                    phys[ii, jj], expected_phys, rtol=self.prec, atol=self.prec
+                )
+
+    def test_to_face_dist(self):
+        cell0 = self.cell[0][0]
+        vol = np.linalg.det(cell0)
+        # area of surfaces xy, xz, yz
+        sxy = np.linalg.norm(np.cross(cell0[0], cell0[1]))
+        sxz = np.linalg.norm(np.cross(cell0[0], cell0[2]))
+        syz = np.linalg.norm(np.cross(cell0[1], cell0[2]))
+        # vol / area gives distance
+        dz = vol / sxy
+        dy = vol / sxz
+        dx = vol / syz
+        expected = np.array([dx, dy, dz])
+        dists = to_face_distance(self.cell)
+        for ii in range(4):
+            for jj in range(5):
+                np.testing.assert_allclose(
+                    dists[ii][jj], expected, rtol=self.prec, atol=self.prec
+                )
diff --git a/source/tests/test_argument_parser.py b/source/tests/common/test_argument_parser.py
similarity index 99%
rename from source/tests/test_argument_parser.py
rename to source/tests/common/test_argument_parser.py
index bb8dd9ed62..0b4f053aed 100644
--- a/source/tests/test_argument_parser.py
+++ b/source/tests/common/test_argument_parser.py
@@ -21,7 +21,7 @@
     Union,
 )
 
-from deepmd.entrypoints.main import (
+from deepmd.main import (
     get_ll,
     parse_args,
 )
diff --git a/source/tests/common/test_econf_embd.py b/source/tests/common/test_econf_embd.py
new file mode 100644
index 0000000000..97ac450c10
--- /dev/null
+++ b/source/tests/common/test_econf_embd.py
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+from deepmd.utils.econf_embd import (
+    electronic_configuration_embedding,
+    make_econf_embedding,
+)
+
+
+class TestEConfEmbd(unittest.TestCase):
+    def test_fe(self):
+        res = make_econf_embedding(["Fe"], flatten=False)["Fe"]
+        expected_res = {
+            (1, "s"): [2],
+            (2, "s"): [2],
+            (2, "p"): [2, 2, 2],
+            (3, "s"): [2],
+            (3, "p"): [2, 2, 2],
+            (3, "d"): [2, 1, 1, 1, 1],
+            (4, "s"): [2],
+            (4, "p"): [0, 0, 0],
+            (4, "d"): [0, 0, 0, 0, 0],
+            (4, "f"): [0, 0, 0, 0, 0, 0, 0],
+            (5, "s"): [0],
+            (5, "p"): [0, 0, 0],
+            (5, "d"): [0, 0, 0, 0, 0],
+            (5, "f"): [0, 0, 0, 0, 0, 0, 0],
+            (6, "s"): [0],
+            (6, "p"): [0, 0, 0],
+            (6, "d"): [0, 0, 0, 0, 0],
+            (7, "s"): [0],
+        }
+        self.assertDictEqual({kk: list(vv) for kk, vv in res.items()}, expected_res)
+
+    def test_fe_flatten(self):
+        res = make_econf_embedding(["Fe"], flatten=True)["Fe"]
+        # fmt: off
+        expected_res = [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+        # fmt: on
+        self.assertEqual(list(res), expected_res)
+
+    def test_dict(self):
+        res = electronic_configuration_embedding["Fe"]
+        # fmt: off
+        expected_res = [2,2,2,2,2,2,2,2,2,2,1,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+        # fmt: on
+        self.assertEqual(list(res), expected_res)
diff --git a/source/tests/test_examples.py b/source/tests/common/test_examples.py
similarity index 77%
rename from source/tests/test_examples.py
rename to source/tests/common/test_examples.py
index d50ca5fee1..6d5e34fedf 100644
--- a/source/tests/test_examples.py
+++ b/source/tests/common/test_examples.py
@@ -2,6 +2,7 @@
 """This module ensures input in the examples directory
 could pass the argument checking.
 """
+
 import unittest
 from pathlib import (
     Path,
@@ -14,7 +15,7 @@
     normalize,
 )
 
-p_examples = Path(__file__).parent.parent.parent / "examples"
+p_examples = Path(__file__).parent.parent.parent.parent / "examples"
 
 input_files = (
     p_examples / "water" / "se_e2_a" / "input.json",
@@ -33,15 +34,22 @@
     p_examples / "nopbc" / "train" / "input.json",
     p_examples / "water_tensor" / "dipole" / "dipole_input.json",
     p_examples / "water_tensor" / "polar" / "polar_input.json",
+    p_examples / "water_tensor" / "dipole" / "dipole_input_torch.json",
+    p_examples / "water_tensor" / "polar" / "polar_input_torch.json",
     p_examples / "water_multi_task" / "ener_dipole" / "input.json",
     p_examples / "fparam" / "train" / "input.json",
     p_examples / "fparam" / "train" / "input_aparam.json",
     p_examples / "zinc_protein" / "zinc_se_a_mask.json",
     p_examples / "dos" / "train" / "input.json",
-    p_examples / "spin" / "se_e2_a" / "input.json",
+    p_examples / "dos" / "train" / "input_torch.json",
+    p_examples / "spin" / "se_e2_a" / "input_tf.json",
+    p_examples / "spin" / "se_e2_a" / "input_torch.json",
     p_examples / "dprc" / "normal" / "input.json",
     p_examples / "dprc" / "pairwise" / "input.json",
     p_examples / "dprc" / "generalized_force" / "input.json",
+    p_examples / "water" / "se_e2_a" / "input_torch.json",
+    p_examples / "water" / "se_atten" / "input_torch.json",
+    p_examples / "water" / "dpa2" / "input_torch.json",
 )
 
 
diff --git a/source/tests/test_gui.py b/source/tests/common/test_gui.py
similarity index 100%
rename from source/tests/test_gui.py
rename to source/tests/common/test_gui.py
diff --git a/source/tests/common/test_out_stat.py b/source/tests/common/test_out_stat.py
new file mode 100644
index 0000000000..c0cfc25071
--- /dev/null
+++ b/source/tests/common/test_out_stat.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.utils.out_stat import (
+    compute_stats_from_atomic,
+    compute_stats_from_redu,
+)
+
+
+class TestOutStat(unittest.TestCase):
+    def setUp(self) -> None:
+        rng = np.random.default_rng(20240227)
+        ndim = 5
+        nframes = 1000
+        ntypes = 3
+        nloc = 1000
+        self.atype = rng.integers(0, ntypes, size=(nframes, nloc))
+        # compute the number of atoms for each type in each frame
+        self.natoms = np.zeros((nframes, ntypes), dtype=np.int64)
+        for i in range(ntypes):
+            self.natoms[:, i] = (self.atype == i).sum(axis=1)
+        self.mean = rng.random((ntypes, ndim)) * 1e4
+        self.std = rng.random((ntypes, ndim)) * 1e-3
+
+        # generate random output
+        self.output = rng.normal(
+            loc=self.mean[self.atype, :],
+            scale=self.std[self.atype, :],
+            size=(nframes, nloc, ndim),
+        )
+        self.output_redu = self.output.sum(axis=1)
+
+        return super().setUp()
+
+    def test_compute_stats_from_redu(self):
+        bias, std = compute_stats_from_redu(self.output_redu, self.natoms)
+        np.testing.assert_allclose(bias, self.mean, rtol=1e-7)
+        reference_std = np.array(
+            [
+                0.01700638138272794,
+                0.01954897296228177,
+                0.020281857747683162,
+                0.010741237959989648,
+                0.020258211828681347,
+            ]
+        )
+        np.testing.assert_allclose(
+            std,
+            reference_std,
+            rtol=1e-7,
+        )
+        # ensure the sum is close
+        np.testing.assert_allclose(
+            self.output_redu,
+            self.natoms @ bias,
+            rtol=1e-7,
+        )
+
+    def test_compute_stats_from_redu_with_assigned_bias(self):
+        assigned_bias = np.full_like(self.mean, np.nan)
+        assigned_bias[0] = self.mean[0]
+        bias, std = compute_stats_from_redu(
+            self.output_redu,
+            self.natoms,
+            assigned_bias=assigned_bias,
+        )
+        np.testing.assert_allclose(bias, self.mean, rtol=1e-7)
+        np.testing.assert_allclose(bias[0], self.mean[0], rtol=1e-14)
+        reference_std = np.array(
+            [
+                0.017015794087883902,
+                0.019549011723239484,
+                0.020285565914828625,
+                0.01074124012073672,
+                0.020283557003416414,
+            ]
+        )
+        np.testing.assert_allclose(
+            std,
+            reference_std,
+            rtol=1e-7,
+        )
+        # ensure the sum is close
+        np.testing.assert_allclose(
+            self.output_redu,
+            self.natoms @ bias,
+            rtol=1e-7,
+        )
+
+    def test_compute_stats_from_atomic(self):
+        bias, std = compute_stats_from_atomic(self.output, self.atype)
+        np.testing.assert_allclose(bias, self.mean)
+        reference_std = np.array(
+            [
+                [
+                    0.0005452949516910239,
+                    0.000686732800598535,
+                    0.00089423457667224,
+                    7.818017989121455e-05,
+                    0.0004758637035637342,
+                ],
+                [
+                    2.0610161678825724e-05,
+                    0.0007728218734771541,
+                    0.0004754659308165858,
+                    0.0001809007655290948,
+                    0.0008187364708029638,
+                ],
+                [
+                    0.0007935836092665254,
+                    0.00031176505013516624,
+                    0.0005469653430009186,
+                    0.0005652240916389281,
+                    0.0006087722080071852,
+                ],
+            ]
+        )
+        np.testing.assert_allclose(
+            std,
+            reference_std,
+            rtol=1e-7,
+        )
diff --git a/source/tests/common/test_path.py b/source/tests/common/test_path.py
new file mode 100644
index 0000000000..7dcb3a031c
--- /dev/null
+++ b/source/tests/common/test_path.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from pathlib import (
+    Path,
+)
+
+import h5py
+import numpy as np
+
+from deepmd.utils.path import (
+    DPPath,
+)
+
+
+class PathTest:
+    path: DPPath
+
+    def test_numpy(self):
+        numpy_path = self.path / "testcase"
+        arr1 = np.ones(3)
+        self.assertFalse(numpy_path.is_file())
+        numpy_path.save_numpy(arr1)
+        self.assertTrue(numpy_path.is_file())
+        arr2 = numpy_path.load_numpy()
+        np.testing.assert_array_equal(arr1, arr2)
+
+    def test_dir(self):
+        dir_path = self.path / "testcase"
+        self.assertFalse(dir_path.is_dir())
+        dir_path.mkdir()
+        self.assertTrue(dir_path.is_dir())
+
+
+class TestOSPath(PathTest, unittest.TestCase):
+    def setUp(self):
+        self.tempdir = tempfile.TemporaryDirectory()
+        self.path = DPPath(self.tempdir.name, "a")
+
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+
+class TestH5Path(PathTest, unittest.TestCase):
+    def setUp(self):
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.path = DPPath(h5file, "a")
+
+    def tearDown(self):
+        self.tempdir.cleanup()
diff --git a/source/tests/test_sel_idx.py b/source/tests/common/test_sel_idx.py
similarity index 100%
rename from source/tests/test_sel_idx.py
rename to source/tests/common/test_sel_idx.py
diff --git a/source/tests/common/test_spin.py b/source/tests/common/test_spin.py
new file mode 100644
index 0000000000..c3bca50b09
--- /dev/null
+++ b/source/tests/common/test_spin.py
@@ -0,0 +1,172 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+
+import numpy as np
+
+from deepmd.utils.spin import (
+    Spin,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class SpinTest(unittest.TestCase):
+    def setUp(self):
+        type_map_1 = ["H", "O"]
+        self.use_spin_1 = [False, False]
+        self.virtual_scale_1 = [0.1, 0.1]
+
+        type_map_2 = ["B", "Ni", "O"]
+        self.use_spin_2 = [False, True, False]
+        self.virtual_scale_2 = [0.1, 0.1, 0.1]
+
+        type_map_3 = ["H", "O", "B", "Ni", "O"]
+        self.use_spin_3 = [False, False, False, True, False]
+        self.virtual_scale_3 = [0.1, 0.1, 0.1, 0.1, 0.1]
+
+        self.virtual_scale_float = 0.1
+        self.virtual_scale_nspin = [0.1]
+
+        self.spin_1 = Spin(self.use_spin_1, self.virtual_scale_1)
+        self.spin_2 = Spin(self.use_spin_2, self.virtual_scale_2)
+        self.spin_3 = Spin(self.use_spin_3, self.virtual_scale_3)
+        self.spin_3_float = Spin(self.use_spin_3, self.virtual_scale_float)
+        self.spin_3_nspin = Spin(self.use_spin_3, self.virtual_scale_nspin)
+
+        self.expect_virtual_scale_mask_1 = np.array([0.0, 0.0])
+        self.expect_virtual_scale_mask_2 = np.array([0.0, 0.1, 0.0])
+        self.expect_virtual_scale_mask_3 = np.array([0.0, 0.0, 0.0, 0.1, 0.0])
+
+        self.expect_pair_exclude_types_1 = [
+            [2, 0],
+            [2, 1],
+            [2, 2],
+            [2, 3],
+            [3, 0],
+            [3, 1],
+            [3, 2],
+            [3, 3],
+        ]
+        self.expect_pair_exclude_types_2 = [
+            [3, 0],
+            [3, 1],
+            [3, 2],
+            [3, 3],
+            [3, 4],
+            [3, 5],
+            [5, 0],
+            [5, 1],
+            [5, 2],
+            [5, 3],
+            [5, 4],
+            [5, 5],
+        ]
+        self.expect_pair_exclude_types_3 = [
+            [5, 0],
+            [5, 1],
+            [5, 2],
+            [5, 3],
+            [5, 4],
+            [5, 5],
+            [5, 6],
+            [5, 7],
+            [5, 8],
+            [5, 9],
+            [6, 0],
+            [6, 1],
+            [6, 2],
+            [6, 3],
+            [6, 4],
+            [6, 5],
+            [6, 6],
+            [6, 7],
+            [6, 8],
+            [6, 9],
+            [7, 0],
+            [7, 1],
+            [7, 2],
+            [7, 3],
+            [7, 4],
+            [7, 5],
+            [7, 6],
+            [7, 7],
+            [7, 8],
+            [7, 9],
+            [9, 0],
+            [9, 1],
+            [9, 2],
+            [9, 3],
+            [9, 4],
+            [9, 5],
+            [9, 6],
+            [9, 7],
+            [9, 8],
+            [9, 9],
+        ]
+
+    def test_ntypes(self):
+        self.assertEqual(self.spin_1.get_ntypes_real(), 2)
+        self.assertEqual(self.spin_1.get_ntypes_spin(), 0)
+        self.assertEqual(self.spin_1.get_ntypes_real_and_spin(), 2)
+        self.assertEqual(self.spin_1.get_ntypes_input(), 4)
+
+        self.assertEqual(self.spin_2.get_ntypes_real(), 3)
+        self.assertEqual(self.spin_2.get_ntypes_spin(), 1)
+        self.assertEqual(self.spin_2.get_ntypes_real_and_spin(), 4)
+        self.assertEqual(self.spin_2.get_ntypes_input(), 6)
+
+        self.assertEqual(self.spin_3.get_ntypes_real(), 5)
+        self.assertEqual(self.spin_3.get_ntypes_spin(), 1)
+        self.assertEqual(self.spin_3.get_ntypes_real_and_spin(), 6)
+        self.assertEqual(self.spin_3.get_ntypes_input(), 10)
+
+    def test_use_spin(self):
+        np.testing.assert_allclose(self.spin_1.get_use_spin(), self.use_spin_1)
+        np.testing.assert_allclose(self.spin_2.get_use_spin(), self.use_spin_2)
+        np.testing.assert_allclose(self.spin_3.get_use_spin(), self.use_spin_3)
+
+    def test_mask(self):
+        np.testing.assert_allclose(
+            self.spin_1.get_virtual_scale_mask(), self.expect_virtual_scale_mask_1
+        )
+        np.testing.assert_allclose(
+            self.spin_2.get_virtual_scale_mask(), self.expect_virtual_scale_mask_2
+        )
+        np.testing.assert_allclose(
+            self.spin_3.get_virtual_scale_mask(), self.expect_virtual_scale_mask_3
+        )
+
+    def test_exclude_types(self):
+        self.assertEqual(
+            sorted(self.spin_1.get_pair_exclude_types()),
+            sorted(self.expect_pair_exclude_types_1),
+        )
+        self.assertEqual(
+            sorted(self.spin_2.get_pair_exclude_types()),
+            sorted(self.expect_pair_exclude_types_2),
+        )
+        self.assertEqual(
+            sorted(self.spin_3.get_pair_exclude_types()),
+            sorted(self.expect_pair_exclude_types_3),
+        )
+
+    def test_virtual_scale_consistence(self):
+        np.testing.assert_allclose(
+            self.spin_3.get_virtual_scale(), self.spin_3_float.get_virtual_scale()
+        )
+        np.testing.assert_allclose(
+            self.spin_3.get_virtual_scale_mask(), self.spin_3_nspin.get_virtual_scale()
+        )
+        np.testing.assert_allclose(
+            self.spin_3.get_virtual_scale_mask(),
+            self.spin_3_float.get_virtual_scale_mask(),
+        )
+        np.testing.assert_allclose(
+            self.spin_3.get_virtual_scale_mask(),
+            self.spin_3_nspin.get_virtual_scale_mask(),
+        )
+        self.assertEqual(
+            self.spin_3.get_pair_exclude_types(),
+            self.spin_3_float.get_pair_exclude_types(),
+        )
diff --git a/source/tests/consistent/__init__.py b/source/tests/consistent/__init__.py
new file mode 100644
index 0000000000..50b8b8bdc5
--- /dev/null
+++ b/source/tests/consistent/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Test whether DP native, TF, and PT models are consistent."""
diff --git a/source/tests/consistent/common.py b/source/tests/consistent/common.py
new file mode 100644
index 0000000000..cbcb987c89
--- /dev/null
+++ b/source/tests/consistent/common.py
@@ -0,0 +1,415 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import os
+import sys
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from enum import (
+    Enum,
+)
+from typing import (
+    Any,
+    Callable,
+    ClassVar,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+from uuid import (
+    uuid4,
+)
+
+import numpy as np
+from dargs import (
+    Argument,
+)
+
+from deepmd.backend.tensorflow import (
+    Backend,
+)
+
+INSTALLED_TF = Backend.get_backend("tensorflow")().is_available()
+INSTALLED_PT = Backend.get_backend("pytorch")().is_available()
+
+if os.environ.get("CI") and not (INSTALLED_TF and INSTALLED_PT):
+    raise ImportError("TensorFlow or PyTorch should be tested in the CI")
+
+
+if INSTALLED_TF:
+    from deepmd.tf.common import (
+        clear_session,
+    )
+    from deepmd.tf.env import (
+        default_tf_session_config,
+        tf,
+    )
+    from deepmd.tf.utils.sess import (
+        run_sess,
+    )
+
+
+__all__ = [
+    "CommonTest",
+    "INSTALLED_TF",
+    "INSTALLED_PT",
+]
+
+
+class CommonTest(ABC):
+    data: ClassVar[dict]
+    """Arguments data."""
+    addtional_data: ClassVar[dict] = {}
+    """Additional data that will not be checked."""
+    tf_class: ClassVar[Optional[type]]
+    """TensorFlow model class."""
+    dp_class: ClassVar[Optional[type]]
+    """Native DP model class."""
+    pt_class: ClassVar[Optional[type]]
+    """PyTorch model class."""
+    args: ClassVar[Optional[Union[Argument, List[Argument]]]]
+    """Arguments that maps to the `data`."""
+    skip_dp: ClassVar[bool] = False
+    """Whether to skip the native DP model."""
+    skip_tf: ClassVar[bool] = not INSTALLED_TF
+    """Whether to skip the TensorFlow model."""
+    skip_pt: ClassVar[bool] = not INSTALLED_PT
+    """Whether to skip the PyTorch model."""
+    rtol = 1e-10
+    """Relative tolerance for comparing the return value. Override for float32."""
+    atol = 1e-10
+    """Absolute tolerance for comparing the return value. Override for float32."""
+
+    def setUp(self):
+        self.unique_id = uuid4().hex
+
+    def reset_unique_id(self):
+        self.unique_id = uuid4().hex
+
+    def init_backend_cls(self, cls) -> Any:
+        """Initialize a backend model."""
+        assert self.data is not None
+        if self.args is None:
+            data = self.data
+        else:
+            if isinstance(self.args, list):
+                base = Argument("arg", dict, sub_fields=self.args)
+            elif isinstance(self.args, Argument):
+                base = self.args
+            else:
+                raise ValueError("Invalid type for args")
+            data = base.normalize_value(self.data, trim_pattern="_*")
+            base.check_value(data, strict=True)
+        return self.pass_data_to_cls(cls, data)
+
+    def pass_data_to_cls(self, cls, data) -> Any:
+        """Pass data to the class."""
+        return cls(**data, **self.addtional_data)
+
+    @abstractmethod
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        """Build the TF graph.
+
+        Parameters
+        ----------
+        obj : Any
+            The object of TF
+        suffix : str
+            The suffix of the scope
+
+        Returns
+        -------
+        list of tf.Tensor
+            The list of tensors
+        dict
+            The feed_dict
+        """
+
+    @abstractmethod
+    def eval_dp(self, dp_obj: Any) -> Any:
+        """Evaluate the return value of DP.
+
+        Parameters
+        ----------
+        dp_obj : Any
+            The object of DP
+        """
+
+    @abstractmethod
+    def eval_pt(self, pt_obj: Any) -> Any:
+        """Evaluate the return value of PT.
+
+        Parameters
+        ----------
+        pt_obj : Any
+            The object of PT
+        """
+
+    class RefBackend(Enum):
+        """Reference backend."""
+
+        TF = 1
+        DP = 2
+        PT = 3
+
+    @abstractmethod
+    def extract_ret(self, ret: Any, backend: RefBackend) -> Tuple[np.ndarray, ...]:
+        """Extract the return value when comparing with other backends.
+
+        Parameters
+        ----------
+        ret : Any
+            The return value
+        backend : RefBackend
+            The backend
+
+        Returns
+        -------
+        tuple[np.ndarray, ...]
+            The extracted return value
+        """
+
+    def build_eval_tf(
+        self, sess: "tf.Session", obj: Any, suffix: str
+    ) -> List[np.ndarray]:
+        """Build and evaluate the TF graph."""
+        t_out, feed_dict = self.build_tf(obj, suffix)
+
+        t_out_indentity = [
+            tf.identity(tt, name=f"o_{ii}_{suffix}") for ii, tt in enumerate(t_out)
+        ]
+        run_sess(sess, tf.global_variables_initializer())
+        return run_sess(
+            sess,
+            t_out_indentity,
+            feed_dict=feed_dict,
+        )
+
+    def get_tf_ret_serialization_from_cls(self, obj):
+        with tf.Session(config=default_tf_session_config) as sess:
+            graph = tf.get_default_graph()
+            ret = self.build_eval_tf(sess, obj, suffix=self.unique_id)
+            output_graph_def = tf.graph_util.convert_variables_to_constants(
+                sess,
+                graph.as_graph_def(),
+                [f"o_{ii}_{self.unique_id}" for ii, _ in enumerate(ret)],
+            )
+            with tf.Graph().as_default() as new_graph:
+                tf.import_graph_def(output_graph_def, name="")
+            obj.init_variables(new_graph, output_graph_def, suffix=self.unique_id)
+
+            data = obj.serialize(suffix=self.unique_id)
+        return ret, data
+
+    def get_pt_ret_serialization_from_cls(self, obj):
+        ret = self.eval_pt(obj)
+        data = obj.serialize()
+        return ret, data
+
+    def get_dp_ret_serialization_from_cls(self, obj):
+        ret = self.eval_dp(obj)
+        data = obj.serialize()
+        return ret, data
+
+    def get_reference_backend(self):
+        """Get the reference backend.
+
+        Order of checking for ref: DP, TF, PT.
+        """
+        if not self.skip_dp:
+            return self.RefBackend.DP
+        if not self.skip_tf:
+            return self.RefBackend.TF
+        if not self.skip_pt:
+            return self.RefBackend.PT
+        raise ValueError("No available reference")
+
+    def get_reference_ret_serialization(self, ref: RefBackend):
+        if ref == self.RefBackend.DP:
+            obj = self.init_backend_cls(self.dp_class)
+            return self.get_dp_ret_serialization_from_cls(obj)
+        if ref == self.RefBackend.TF:
+            obj = self.init_backend_cls(self.tf_class)
+            self.reset_unique_id()
+            return self.get_tf_ret_serialization_from_cls(obj)
+        if ref == self.RefBackend.PT:
+            obj = self.init_backend_cls(self.pt_class)
+            return self.get_pt_ret_serialization_from_cls(obj)
+        raise ValueError("No available reference")
+
+    def test_tf_consistent_with_ref(self):
+        """Test whether TF and reference are consistent."""
+        if self.skip_tf:
+            self.skipTest("Unsupported backend")
+        ref_backend = self.get_reference_backend()
+        if ref_backend == self.RefBackend.TF:
+            self.skipTest("Reference is self")
+        ret1, data1 = self.get_reference_ret_serialization(ref_backend)
+        ret1 = self.extract_ret(ret1, ref_backend)
+        self.reset_unique_id()
+        tf_obj = self.tf_class.deserialize(data1, suffix=self.unique_id)
+        ret2, data2 = self.get_tf_ret_serialization_from_cls(tf_obj)
+        ret2 = self.extract_ret(ret2, self.RefBackend.TF)
+        if tf_obj.__class__.__name__.startswith(("Polar", "Dipole", "DOS")):
+            # tf, pt serialization mismatch
+            common_keys = set(data1.keys()) & set(data2.keys())
+            data1 = {k: data1[k] for k in common_keys}
+            data2 = {k: data2[k] for k in common_keys}
+
+        # not comparing version
+        data1.pop("@version")
+        data2.pop("@version")
+
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(
+                rr1.ravel(), rr2.ravel(), rtol=self.rtol, atol=self.atol
+            )
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_tf_self_consistent(self):
+        """Test whether TF is self consistent."""
+        if self.skip_tf:
+            self.skipTest("Unsupported backend")
+        obj1 = self.init_backend_cls(self.tf_class)
+        self.reset_unique_id()
+        ret1, data1 = self.get_tf_ret_serialization_from_cls(obj1)
+        self.reset_unique_id()
+        obj2 = self.tf_class.deserialize(data1, suffix=self.unique_id)
+        ret2, data2 = self.get_tf_ret_serialization_from_cls(obj2)
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_dp_consistent_with_ref(self):
+        """Test whether DP and reference are consistent."""
+        if self.skip_dp:
+            self.skipTest("Unsupported backend")
+        ref_backend = self.get_reference_backend()
+        if ref_backend == self.RefBackend.DP:
+            self.skipTest("Reference is self")
+        ret1, data1 = self.get_reference_ret_serialization(ref_backend)
+        ret1 = self.extract_ret(ret1, ref_backend)
+        dp_obj = self.dp_class.deserialize(data1)
+        ret2 = self.eval_dp(dp_obj)
+        ret2 = self.extract_ret(ret2, self.RefBackend.DP)
+        data2 = dp_obj.serialize()
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_dp_self_consistent(self):
+        """Test whether DP is self consistent."""
+        if self.skip_dp:
+            self.skipTest("Unsupported backend")
+        obj1 = self.init_backend_cls(self.dp_class)
+        ret1, data1 = self.get_dp_ret_serialization_from_cls(obj1)
+        obj1 = self.dp_class.deserialize(data1)
+        ret2, data2 = self.get_dp_ret_serialization_from_cls(obj1)
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            if isinstance(rr1, np.ndarray) and isinstance(rr2, np.ndarray):
+                np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+                assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+            else:
+                self.assertEqual(rr1, rr2)
+
+    def test_pt_consistent_with_ref(self):
+        """Test whether PT and reference are consistent."""
+        if self.skip_pt:
+            self.skipTest("Unsupported backend")
+        ref_backend = self.get_reference_backend()
+        if ref_backend == self.RefBackend.PT:
+            self.skipTest("Reference is self")
+        ret1, data1 = self.get_reference_ret_serialization(ref_backend)
+        ret1 = self.extract_ret(ret1, ref_backend)
+        obj = self.pt_class.deserialize(data1)
+        ret2 = self.eval_pt(obj)
+        ret2 = self.extract_ret(ret2, self.RefBackend.PT)
+        data2 = obj.serialize()
+        if obj.__class__.__name__.startswith(("Polar", "Dipole", "DOS")):
+            # tf, pt serialization mismatch
+            common_keys = set(data1.keys()) & set(data2.keys())
+            data1 = {k: data1[k] for k in common_keys}
+            data2 = {k: data2[k] for k in common_keys}
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+            assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+
+    def test_pt_self_consistent(self):
+        """Test whether PT is self consistent."""
+        if self.skip_pt:
+            self.skipTest("Unsupported backend")
+        obj1 = self.init_backend_cls(self.pt_class)
+        ret1, data1 = self.get_pt_ret_serialization_from_cls(obj1)
+        obj2 = self.pt_class.deserialize(data1)
+        ret2, data2 = self.get_pt_ret_serialization_from_cls(obj2)
+        np.testing.assert_equal(data1, data2)
+        for rr1, rr2 in zip(ret1, ret2):
+            if isinstance(rr1, np.ndarray) and isinstance(rr2, np.ndarray):
+                np.testing.assert_allclose(rr1, rr2, rtol=self.rtol, atol=self.atol)
+                assert rr1.dtype == rr2.dtype, f"{rr1.dtype} != {rr2.dtype}"
+            else:
+                self.assertEqual(rr1, rr2)
+
+    def tearDown(self) -> None:
+        """Clear the TF session."""
+        if not self.skip_tf:
+            clear_session()
+
+
+def parameterized(*attrs: tuple) -> Callable:
+    """Parameterized test.
+
+    Orginal class will not be actually generated. Avoid inherbiting from it.
+    New classes are generated with the name of the original class and the
+    parameters.
+
+    Parameters
+    ----------
+    *attrs : tuple
+        The attributes to be parameterized.
+
+    Returns
+    -------
+    object
+        The decorator.
+
+    Examples
+    --------
+    >>> @parameterized(
+    ...     (True, False),
+    ...     (True, False),
+    ... )
+    ... class TestSeA(CommonTest, unittest.TestCase):
+    ...     @property
+    ...     def data(self) -> dict:
+    ...         (
+    ...             param1,
+    ...             param2,
+    ...         ) = self.param
+    ...         return {
+    ...             "param1": param1,
+    ...             "param2": param2,
+    ...         }
+    """
+
+    def decorator(base_class: type):
+        class_module = sys.modules[base_class.__module__].__dict__
+        for pp in itertools.product(*attrs):
+
+            class TestClass(base_class):
+                param: ClassVar = pp
+
+            name = f"{base_class.__name__}_{'_'.join(str(x) for x in pp)}"
+
+            class_module[name] = TestClass
+        # make unittest module happy by ignoring the original one
+        return object
+
+    return decorator
diff --git a/source/tests/consistent/descriptor/__init__.py b/source/tests/consistent/descriptor/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/consistent/descriptor/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/consistent/descriptor/common.py b/source/tests/consistent/descriptor/common.py
new file mode 100644
index 0000000000..ef7b39b52e
--- /dev/null
+++ b/source/tests/consistent/descriptor/common.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.dpmodel.utils.nlist import (
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+    from deepmd.pt.utils.nlist import build_neighbor_list as build_neighbor_list_pt
+    from deepmd.pt.utils.nlist import (
+        extend_coord_with_ghosts as extend_coord_with_ghosts_pt,
+    )
+if INSTALLED_TF:
+    from deepmd.tf.env import (
+        GLOBAL_TF_FLOAT_PRECISION,
+        tf,
+    )
+
+
+class DescriptorTest:
+    """Useful utilities for descriptor tests."""
+
+    def build_tf_descriptor(self, obj, natoms, coords, atype, box, suffix):
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        t_des = obj.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            suffix=suffix,
+        )
+        return [t_des], {
+            t_coord: coords,
+            t_type: atype,
+            t_natoms: natoms,
+            t_box: box,
+            t_mesh: make_default_mesh(True, False),
+        }
+
+    def eval_dp_descriptor(self, dp_obj: Any, natoms, coords, atype, box) -> Any:
+        ext_coords, ext_atype, mapping = extend_coord_with_ghosts(
+            coords.reshape(1, -1, 3),
+            atype.reshape(1, -1),
+            box.reshape(1, 3, 3),
+            dp_obj.get_rcut(),
+        )
+        nlist = build_neighbor_list(
+            ext_coords,
+            ext_atype,
+            natoms[0],
+            dp_obj.get_rcut(),
+            dp_obj.get_sel(),
+            distinguish_types=True,
+        )
+        return dp_obj(ext_coords, ext_atype, nlist=nlist)
+
+    def eval_pt_descriptor(self, pt_obj: Any, natoms, coords, atype, box) -> Any:
+        ext_coords, ext_atype, mapping = extend_coord_with_ghosts_pt(
+            torch.from_numpy(coords).to(PT_DEVICE).reshape(1, -1, 3),
+            torch.from_numpy(atype).to(PT_DEVICE).reshape(1, -1),
+            torch.from_numpy(box).to(PT_DEVICE).reshape(1, 3, 3),
+            pt_obj.get_rcut(),
+        )
+        nlist = build_neighbor_list_pt(
+            ext_coords,
+            ext_atype,
+            natoms[0],
+            pt_obj.get_rcut(),
+            pt_obj.get_sel(),
+            distinguish_types=True,
+        )
+        return [
+            x.detach().cpu().numpy() if torch.is_tensor(x) else x
+            for x in pt_obj(ext_coords, ext_atype, nlist=nlist)
+        ]
diff --git a/source/tests/consistent/descriptor/test_hybrid.py b/source/tests/consistent/descriptor/test_hybrid.py
new file mode 100644
index 0000000000..7cfb627d54
--- /dev/null
+++ b/source/tests/consistent/descriptor/test_hybrid.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor.hybrid import DescrptHybrid as DescrptHybridDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+)
+from .common import (
+    DescriptorTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.descriptor.hybrid import DescrptHybrid as DescrptHybridPT
+else:
+    DescrptHybridPT = None
+if INSTALLED_TF:
+    from deepmd.tf.descriptor.hybrid import DescrptHybrid as DescrptHybridTF
+else:
+    DescrptHybridTF = None
+from deepmd.utils.argcheck import (
+    descrpt_hybrid_args,
+)
+
+
+class TestHybrid(CommonTest, DescriptorTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        return {
+            "list": [
+                {
+                    "type": "se_e2_r",
+                    # test the case that sel are different!
+                    "sel": [10, 10],
+                    "rcut_smth": 5.80,
+                    "rcut": 6.00,
+                    "neuron": [6, 12, 24],
+                    "resnet_dt": False,
+                    "type_one_side": True,
+                    "precision": "float64",
+                    "seed": 20240229,
+                },
+                {
+                    "type": "se_e2_a",
+                    "sel": [9, 11],
+                    "rcut_smth": 2.80,
+                    "rcut": 3.00,
+                    "neuron": [6, 12, 24],
+                    "axis_neuron": 3,
+                    "resnet_dt": True,
+                    "type_one_side": True,
+                    "precision": "float64",
+                    "seed": 20240229,
+                },
+            ]
+        }
+
+    tf_class = DescrptHybridTF
+    dp_class = DescrptHybridDP
+    pt_class = DescrptHybridPT
+    args = descrpt_hybrid_args()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_descriptor(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_descriptor(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_descriptor(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        return (ret[0],)
diff --git a/source/tests/consistent/descriptor/test_se_e2_a.py b/source/tests/consistent/descriptor/test_se_e2_a.py
new file mode 100644
index 0000000000..1e3e5ae86d
--- /dev/null
+++ b/source/tests/consistent/descriptor/test_se_e2_a.py
@@ -0,0 +1,216 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor.se_e2_a import DescrptSeA as DescrptSeADP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    DescriptorTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.descriptor.se_a import DescrptSeA as DescrptSeAPT
+else:
+    DescrptSeAPT = None
+if INSTALLED_TF:
+    from deepmd.tf.descriptor.se_a import DescrptSeA as DescrptSeATF
+else:
+    DescrptSeATF = None
+from deepmd.utils.argcheck import (
+    descrpt_se_a_args,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    (True, False),  # type_one_side
+    ([], [[0, 1]]),  # excluded_types
+    ("float32", "float64"),  # precision
+    (0.0, 1e-8, 1e-2),  # env_protection
+)
+class TestSeA(CommonTest, DescriptorTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return {
+            "sel": [9, 10],
+            "rcut_smth": 5.80,
+            "rcut": 6.00,
+            "neuron": [6, 12, 24],
+            "axis_neuron": 3,
+            "resnet_dt": resnet_dt,
+            "type_one_side": type_one_side,
+            "exclude_types": excluded_types,
+            "env_protection": env_protection,
+            "precision": precision,
+            "seed": 1145141919810,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    @property
+    def skip_dp(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return CommonTest.skip_dp
+
+    @property
+    def skip_tf(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        return env_protection != 0.0
+
+    tf_class = DescrptSeATF
+    dp_class = DescrptSeADP
+    pt_class = DescrptSeAPT
+    args = descrpt_se_a_args()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        # TF se_e2_a type_one_side=False requires atype sorted
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        if not type_one_side:
+            idx = np.argsort(self.atype)
+            self.atype = self.atype[idx]
+            self.coords = self.coords.reshape(-1, 3)[idx].ravel()
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_descriptor(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_descriptor(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_descriptor(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        return (ret[0],)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+            env_protection,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/descriptor/test_se_r.py b/source/tests/consistent/descriptor/test_se_r.py
new file mode 100644
index 0000000000..8b835f3b5c
--- /dev/null
+++ b/source/tests/consistent/descriptor/test_se_r.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor.se_r import DescrptSeR as DescrptSeRDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    DescriptorTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.descriptor.se_r import DescrptSeR as DescrptSeRPT
+else:
+    DescrptSeAPT = None
+if INSTALLED_TF:
+    from deepmd.tf.descriptor.se_r import DescrptSeR as DescrptSeRTF
+else:
+    DescrptSeATF = None
+from deepmd.utils.argcheck import (
+    descrpt_se_r_args,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    (True, False),  # type_one_side
+    ([], [[0, 1]]),  # excluded_types
+    ("float32", "float64"),  # precision
+)
+class TestSeA(CommonTest, DescriptorTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+        ) = self.param
+        return {
+            "sel": [9, 10],
+            "rcut_smth": 5.80,
+            "rcut": 6.00,
+            "neuron": [6, 12, 24],
+            "resnet_dt": resnet_dt,
+            "type_one_side": type_one_side,
+            "exclude_types": excluded_types,
+            "precision": precision,
+            "seed": 1145141919810,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+        ) = self.param
+        return not type_one_side or CommonTest.skip_pt
+
+    @property
+    def skip_dp(self) -> bool:
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+        ) = self.param
+        return not type_one_side or CommonTest.skip_dp
+
+    tf_class = DescrptSeRTF
+    dp_class = DescrptSeRDP
+    pt_class = DescrptSeRPT
+    args = descrpt_se_r_args()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        )
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_descriptor(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_descriptor(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_descriptor(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        return (ret[0],)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            type_one_side,
+            excluded_types,
+            precision,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/fitting/__init__.py b/source/tests/consistent/fitting/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/consistent/fitting/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/consistent/fitting/common.py b/source/tests/consistent/fitting/common.py
new file mode 100644
index 0000000000..bdd4b7cf81
--- /dev/null
+++ b/source/tests/consistent/fitting/common.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+)
+
+if INSTALLED_PT:
+    pass
+if INSTALLED_TF:
+    from deepmd.tf.env import (
+        GLOBAL_TF_FLOAT_PRECISION,
+        tf,
+    )
+
+
+class FittingTest:
+    """Useful utilities for descriptor tests."""
+
+    def build_tf_fitting(self, obj, inputs, natoms, atype, fparam, suffix):
+        t_inputs = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_inputs")
+        t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
+        t_atype = tf.placeholder(tf.int32, [None], name="i_atype")
+        extras = {}
+        feed_dict = {}
+        if fparam is not None:
+            t_fparam = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None], name="i_fparam"
+            )
+            extras["fparam"] = t_fparam
+            feed_dict[t_fparam] = fparam
+        t_out = obj.build(
+            t_inputs,
+            t_natoms,
+            {"atype": t_atype, **extras},
+            suffix=suffix,
+        )
+        return [t_out], {
+            t_inputs: inputs,
+            t_natoms: natoms,
+            t_atype: atype,
+            **feed_dict,
+        }
+
+
+class DipoleFittingTest:
+    """Useful utilities for descriptor tests."""
+
+    def build_tf_fitting(self, obj, inputs, rot_mat, natoms, atype, fparam, suffix):
+        t_inputs = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_inputs")
+        t_rot_mat = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, rot_mat.shape, name="i_rot_mat"
+        )
+        t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
+        t_atype = tf.placeholder(tf.int32, [None], name="i_atype")
+        extras = {}
+        feed_dict = {}
+        if fparam is not None:
+            t_fparam = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None], name="i_fparam"
+            )
+            extras["fparam"] = t_fparam
+            feed_dict[t_fparam] = fparam
+        t_out = obj.build(
+            t_inputs,
+            t_rot_mat,
+            t_natoms,
+            {"atype": t_atype, **extras},
+            suffix=suffix,
+        )
+        return [t_out], {
+            t_inputs: inputs,
+            t_rot_mat: rot_mat,
+            t_natoms: natoms,
+            t_atype: atype,
+            **feed_dict,
+        }
diff --git a/source/tests/consistent/fitting/test_dipole.py b/source/tests/consistent/fitting/test_dipole.py
new file mode 100644
index 0000000000..18a29934ca
--- /dev/null
+++ b/source/tests/consistent/fitting/test_dipole.py
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.fitting.dipole_fitting import DipoleFitting as DipoleFittingDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    DipoleFittingTest,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.task.dipole import DipoleFittingNet as DipoleFittingPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    DipoleFittingPT = object
+if INSTALLED_TF:
+    from deepmd.tf.fit.dipole import DipoleFittingSeA as DipoleFittingTF
+else:
+    DipoleFittingTF = object
+from deepmd.utils.argcheck import (
+    fitting_dipole,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float64", "float32"),  # precision
+    (True, False),  # mixed_types
+)
+class TestDipole(CommonTest, DipoleFittingTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return {
+            "neuron": [5, 5, 5],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "seed": 20240217,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    tf_class = DipoleFittingTF
+    dp_class = DipoleFittingDP
+    pt_class = DipoleFittingPT
+    args = fitting_dipole()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.inputs = np.ones((1, 6, 20), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.gr = np.ones((1, 6, 30, 3), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # inconsistent if not sorted
+        self.atype.sort()
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return {
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.inputs.shape[-1],
+            "mixed_types": mixed_types,
+            "var_name": "dipole",
+            "embedding_width": 30,
+        }
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return self.build_tf_fitting(
+            obj,
+            self.inputs.ravel(),
+            self.gr,
+            self.natoms,
+            self.atype,
+            None,
+            suffix,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return (
+            pt_obj(
+                torch.from_numpy(self.inputs).to(device=PT_DEVICE),
+                torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
+                torch.from_numpy(self.gr).to(device=PT_DEVICE),
+                None,
+            )["dipole"]
+            .detach()
+            .cpu()
+            .numpy()
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return dp_obj(
+            self.inputs,
+            self.atype.reshape(1, -1),
+            self.gr,
+            None,
+        )["dipole"]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        if backend == self.RefBackend.TF:
+            # shape is not same
+            ret = ret[0].reshape(-1, self.natoms[0], 1)
+        return (ret,)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/fitting/test_dos.py b/source/tests/consistent/fitting/test_dos.py
new file mode 100644
index 0000000000..bfdf76c8ff
--- /dev/null
+++ b/source/tests/consistent/fitting/test_dos.py
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.fitting.dos_fitting import DOSFittingNet as DOSFittingDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    FittingTest,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.task.dos import DOSFittingNet as DOSFittingPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    DOSFittingPT = object
+if INSTALLED_TF:
+    from deepmd.tf.fit.dos import DOSFitting as DOSFittingTF
+else:
+    DOSFittingTF = object
+from deepmd.utils.argcheck import (
+    fitting_dos,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float64", "float32"),  # precision
+    (True, False),  # mixed_types
+    (0, 1),  # numb_fparam
+    (10, 20),  # numb_dos
+)
+class TestDOS(CommonTest, FittingTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return {
+            "neuron": [5, 5, 5],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "numb_fparam": numb_fparam,
+            "seed": 20240217,
+            "numb_dos": numb_dos,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    tf_class = DOSFittingTF
+    dp_class = DOSFittingDP
+    pt_class = DOSFittingPT
+    args = fitting_dos()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.inputs = np.ones((1, 6, 20), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # inconsistent if not sorted
+        self.atype.sort()
+        self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return {
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.inputs.shape[-1],
+            "mixed_types": mixed_types,
+        }
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return self.build_tf_fitting(
+            obj,
+            self.inputs.ravel(),
+            self.natoms,
+            self.atype,
+            self.fparam if numb_fparam else None,
+            suffix,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return (
+            pt_obj(
+                torch.from_numpy(self.inputs).to(device=PT_DEVICE),
+                torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
+                fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
+                if numb_fparam
+                else None,
+            )["dos"]
+            .detach()
+            .cpu()
+            .numpy()
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        return dp_obj(
+            self.inputs,
+            self.atype.reshape(1, -1),
+            fparam=self.fparam if numb_fparam else None,
+        )["dos"]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        if backend == self.RefBackend.TF:
+            # shape is not same
+            ret = ret[0].reshape(-1, self.natoms[0], 1)
+        return (ret,)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            numb_dos,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/fitting/test_ener.py b/source/tests/consistent/fitting/test_ener.py
new file mode 100644
index 0000000000..157b1bab8a
--- /dev/null
+++ b/source/tests/consistent/fitting/test_ener.py
@@ -0,0 +1,203 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.fitting.ener_fitting import EnergyFittingNet as EnerFittingDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    FittingTest,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.task.ener import EnergyFittingNet as EnerFittingPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    EnerFittingPT = object
+if INSTALLED_TF:
+    from deepmd.tf.fit.ener import EnerFitting as EnerFittingTF
+else:
+    EnerFittingTF = object
+from deepmd.utils.argcheck import (
+    fitting_ener,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float64", "float32", "bfloat16"),  # precision
+    (True, False),  # mixed_types
+    (0, 1),  # numb_fparam
+    ([], [-12345.6, None]),  # atom_ener
+)
+class TestEner(CommonTest, FittingTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return {
+            "neuron": [5, 5, 5],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "numb_fparam": numb_fparam,
+            "seed": 20240217,
+            "atom_ener": atom_ener,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    tf_class = EnerFittingTF
+    dp_class = EnerFittingDP
+    pt_class = EnerFittingPT
+    args = fitting_ener()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.inputs = np.ones((1, 6, 20), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # inconsistent if not sorted
+        self.atype.sort()
+        self.fparam = -np.ones((1,), dtype=GLOBAL_NP_FLOAT_PRECISION)
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return {
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.inputs.shape[-1],
+            "mixed_types": mixed_types,
+        }
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return self.build_tf_fitting(
+            obj,
+            self.inputs.ravel(),
+            self.natoms,
+            self.atype,
+            self.fparam if numb_fparam else None,
+            suffix,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return (
+            pt_obj(
+                torch.from_numpy(self.inputs).to(device=PT_DEVICE),
+                torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
+                fparam=torch.from_numpy(self.fparam).to(device=PT_DEVICE)
+                if numb_fparam
+                else None,
+            )["energy"]
+            .detach()
+            .cpu()
+            .numpy()
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        return dp_obj(
+            self.inputs,
+            self.atype.reshape(1, -1),
+            fparam=self.fparam if numb_fparam else None,
+        )["energy"]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        if backend == self.RefBackend.TF:
+            # shape is not same
+            ret = ret[0].reshape(-1, self.natoms[0], 1)
+        return (ret,)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        elif precision == "bfloat16":
+            return 1e-1
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+            numb_fparam,
+            atom_ener,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        elif precision == "bfloat16":
+            return 1e-1
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/fitting/test_polar.py b/source/tests/consistent/fitting/test_polar.py
new file mode 100644
index 0000000000..5b55c6d333
--- /dev/null
+++ b/source/tests/consistent/fitting/test_polar.py
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.fitting.polarizability_fitting import PolarFitting as PolarFittingDP
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    DipoleFittingTest,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.task.polarizability import PolarFittingNet as PolarFittingPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    PolarFittingPT = object
+if INSTALLED_TF:
+    from deepmd.tf.fit.polar import PolarFittingSeA as PolarFittingTF
+else:
+    PolarFittingTF = object
+from deepmd.utils.argcheck import (
+    fitting_polar,
+)
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float64", "float32"),  # precision
+    (True, False),  # mixed_types
+)
+class TestPolar(CommonTest, DipoleFittingTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return {
+            "neuron": [5, 5, 5],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "seed": 20240217,
+        }
+
+    @property
+    def skip_pt(self) -> bool:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return CommonTest.skip_pt
+
+    tf_class = PolarFittingTF
+    dp_class = PolarFittingDP
+    pt_class = PolarFittingPT
+    args = fitting_polar()
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+        self.inputs = np.ones((1, 6, 20), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.gr = np.ones((1, 6, 30, 3), dtype=GLOBAL_NP_FLOAT_PRECISION)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32)
+        # inconsistent if not sorted
+        self.atype.sort()
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return {
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.inputs.shape[-1],
+            "mixed_types": mixed_types,
+            "var_name": "polar",
+            "embedding_width": 30,
+        }
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return self.build_tf_fitting(
+            obj,
+            self.inputs.ravel(),
+            self.gr,
+            self.natoms,
+            self.atype,
+            None,
+            suffix,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return (
+            pt_obj(
+                torch.from_numpy(self.inputs).to(device=PT_DEVICE),
+                torch.from_numpy(self.atype.reshape(1, -1)).to(device=PT_DEVICE),
+                torch.from_numpy(self.gr).to(device=PT_DEVICE),
+                None,
+            )["polar"]
+            .detach()
+            .cpu()
+            .numpy()
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        return dp_obj(
+            self.inputs,
+            self.atype.reshape(1, -1),
+            self.gr,
+            None,
+        )["polar"]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        if backend == self.RefBackend.TF:
+            # shape is not same
+            ret = ret[0].reshape(-1, self.natoms[0], 1)
+        return (ret,)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-3
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            mixed_types,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-3
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/consistent/io/__init__.py b/source/tests/consistent/io/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/consistent/io/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/consistent/io/test_io.py b/source/tests/consistent/io/test_io.py
new file mode 100644
index 0000000000..71e4002128
--- /dev/null
+++ b/source/tests/consistent/io/test_io.py
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.backend.backend import (
+    Backend,
+)
+from deepmd.dpmodel.model.model import (
+    get_model,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+
+infer_path = Path(__file__).parent.parent.parent / "infer"
+
+
+class IOTest:
+    data: dict
+
+    def get_data_from_model(self, model_file: str) -> dict:
+        """Get data from a model file.
+
+        Parameters
+        ----------
+        model_file : str
+            The model file.
+
+        Returns
+        -------
+        dict
+            The data from the model file.
+        """
+        inp_backend: Backend = Backend.detect_backend_by_model(model_file)()
+        inp_hook = inp_backend.serialize_hook
+        return inp_hook(model_file)
+
+    def save_data_to_model(self, model_file: str, data: dict) -> None:
+        """Save data to a model file.
+
+        Parameters
+        ----------
+        model_file : str
+            The model file.
+        data : dict
+            The data to save.
+        """
+        out_backend: Backend = Backend.detect_backend_by_model(model_file)()
+        out_hook = out_backend.deserialize_hook
+        out_hook(model_file, data)
+
+    def tearDown(self):
+        prefix = "test_consistent_io_" + self.__class__.__name__.lower()
+        for ii in Path(".").glob(prefix + ".*"):
+            if Path(ii).exists():
+                Path(ii).unlink()
+
+    def test_data_equal(self):
+        prefix = "test_consistent_io_" + self.__class__.__name__.lower()
+        for backend_name in ("tensorflow", "pytorch", "dpmodel"):
+            with self.subTest(backend_name=backend_name):
+                backend = Backend.get_backend(backend_name)()
+                if not backend.is_available:
+                    continue
+                reference_data = copy.deepcopy(self.data)
+                self.save_data_to_model(prefix + backend.suffixes[0], reference_data)
+                data = self.get_data_from_model(prefix + backend.suffixes[0])
+                data = copy.deepcopy(data)
+                reference_data = copy.deepcopy(self.data)
+                # some keys are not expected to be not the same
+                for kk in [
+                    "backend",
+                    "tf_version",
+                    "pt_version",
+                    "@variables",
+                    # dpmodel only
+                    "software",
+                    "version",
+                    "time",
+                ]:
+                    data.pop(kk, None)
+                    reference_data.pop(kk, None)
+                np.testing.assert_equal(data, reference_data)
+
+    def test_deep_eval(self):
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, -1, 3)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32).reshape(1, -1)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, 9)
+        prefix = "test_consistent_io_" + self.__class__.__name__.lower()
+        rets = []
+        for backend_name in ("tensorflow", "pytorch", "dpmodel"):
+            backend = Backend.get_backend(backend_name)()
+            if not backend.is_available:
+                continue
+            reference_data = copy.deepcopy(self.data)
+            self.save_data_to_model(prefix + backend.suffixes[0], reference_data)
+            deep_eval = DeepEval(prefix + backend.suffixes[0])
+            ret = deep_eval.eval(
+                self.coords,
+                self.box,
+                self.atype,
+            )
+            rets.append(ret)
+        for ret in rets[1:]:
+            for vv1, vv2 in zip(rets[0], ret):
+                if np.isnan(vv2).all():
+                    # expect all nan if not supported
+                    continue
+                np.testing.assert_allclose(vv1, vv2, rtol=1e-12, atol=1e-12)
+
+
+class TestDeepPot(unittest.TestCase, IOTest):
+    def setUp(self):
+        model_def_script = {
+            "type_map": ["O", "H"],
+            "descriptor": {
+                "type": "se_e2_a",
+                "sel": [20, 20],
+                "rcut_smth": 0.50,
+                "rcut": 6.00,
+                "neuron": [
+                    3,
+                    6,
+                ],
+                "resnet_dt": False,
+                "axis_neuron": 2,
+                "precision": "float64",
+                "type_one_side": True,
+                "seed": 1,
+            },
+            "fitting_net": {
+                "type": "ener",
+                "neuron": [
+                    5,
+                    5,
+                ],
+                "resnet_dt": True,
+                "precision": "float64",
+                "atom_ener": [],
+                "seed": 1,
+            },
+        }
+        model = get_model(copy.deepcopy(model_def_script))
+        self.data = {
+            "model": model.serialize(),
+            "backend": "test",
+            "model_def_script": model_def_script,
+        }
+
+    def tearDown(self):
+        IOTest.tearDown(self)
diff --git a/source/tests/consistent/model/__init__.py b/source/tests/consistent/model/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/consistent/model/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/consistent/model/common.py b/source/tests/consistent/model/common.py
new file mode 100644
index 0000000000..294edec1d6
--- /dev/null
+++ b/source/tests/consistent/model/common.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.common import (
+    make_default_mesh,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.utils.utils import to_numpy_array as torch_to_numpy
+    from deepmd.pt.utils.utils import to_torch_tensor as numpy_to_torch
+if INSTALLED_TF:
+    from deepmd.tf.env import (
+        GLOBAL_TF_FLOAT_PRECISION,
+        tf,
+    )
+
+
+class ModelTest:
+    """Useful utilities for model tests."""
+
+    def build_tf_model(self, obj, natoms, coords, atype, box, suffix):
+        t_coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, None, None], name="i_coord"
+        )
+        t_type = tf.placeholder(tf.int32, [None, None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, natoms.shape, name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        ret = obj.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            suffix=suffix,
+        )
+        return [ret["energy"], ret["atom_ener"]], {
+            t_coord: coords,
+            t_type: atype,
+            t_natoms: natoms,
+            t_box: box,
+            t_mesh: make_default_mesh(True, False),
+        }
+
+    def eval_dp_model(self, dp_obj: Any, natoms, coords, atype, box) -> Any:
+        return dp_obj(coords, atype, box=box)
+
+    def eval_pt_model(self, pt_obj: Any, natoms, coords, atype, box) -> Any:
+        return {
+            kk: torch_to_numpy(vv)
+            for kk, vv in pt_obj(
+                numpy_to_torch(coords),
+                numpy_to_torch(atype),
+                box=numpy_to_torch(box),
+            ).items()
+        }
diff --git a/source/tests/consistent/model/test_ener.py b/source/tests/consistent/model/test_ener.py
new file mode 100644
index 0000000000..da5033a3b6
--- /dev/null
+++ b/source/tests/consistent/model/test_ener.py
@@ -0,0 +1,180 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.model.dp_model import DPModel as EnergyModelDP
+from deepmd.dpmodel.model.model import get_model as get_model_dp
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    ModelTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.model import get_model as get_model_pt
+    from deepmd.pt.model.model.ener_model import EnergyModel as EnergyModelPT
+
+else:
+    EnergyModelPT = None
+if INSTALLED_TF:
+    from deepmd.tf.model.ener import EnerModel as EnergyModelTF
+else:
+    EnergyModelTF = None
+from deepmd.utils.argcheck import (
+    model_args,
+)
+
+
+@parameterized(
+    (
+        [],
+        [[0, 1]],
+    ),
+    (
+        [],
+        [1],
+    ),
+)
+class TestEner(CommonTest, ModelTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        pair_exclude_types, atom_exclude_types = self.param
+        return {
+            "type_map": ["O", "H"],
+            "pair_exclude_types": pair_exclude_types,
+            "atom_exclude_types": atom_exclude_types,
+            "descriptor": {
+                "type": "se_e2_a",
+                "sel": [20, 20],
+                "rcut_smth": 0.50,
+                "rcut": 6.00,
+                "neuron": [
+                    3,
+                    6,
+                ],
+                "resnet_dt": False,
+                "axis_neuron": 2,
+                "precision": "float64",
+                "type_one_side": True,
+                "seed": 1,
+            },
+            "fitting_net": {
+                "neuron": [
+                    5,
+                    5,
+                ],
+                "resnet_dt": True,
+                "precision": "float64",
+                "seed": 1,
+            },
+        }
+
+    tf_class = EnergyModelTF
+    dp_class = EnergyModelDP
+    pt_class = EnergyModelPT
+    args = model_args()
+
+    def skip_tf(self):
+        return (
+            self.data["pair_exclude_types"] != []
+            or self.data["atom_exclude_types"] != []
+        )
+
+    def pass_data_to_cls(self, cls, data) -> Any:
+        """Pass data to the class."""
+        data = data.copy()
+        if cls is EnergyModelDP:
+            return get_model_dp(data)
+        elif cls is EnergyModelPT:
+            return get_model_pt(data)
+        return cls(**data, **self.addtional_data)
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, -1, 3)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32).reshape(1, -1)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, 9)
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+
+        # TF requires the atype to be sort
+        idx_map = np.argsort(self.atype.ravel())
+        self.atype = self.atype[:, idx_map]
+        self.coords = self.coords[:, idx_map]
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_model(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_model(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_model(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        # shape not matched. ravel...
+        if backend is self.RefBackend.DP:
+            return (ret["energy_redu"].ravel(), ret["energy"].ravel())
+        elif backend is self.RefBackend.PT:
+            return (ret["energy"].ravel(), ret["atom_energy"].ravel())
+        elif backend is self.RefBackend.TF:
+            return (ret[0].ravel(), ret[1].ravel())
+        raise ValueError(f"Unknown backend: {backend}")
diff --git a/source/tests/consistent/model/test_frozen.py b/source/tests/consistent/model/test_frozen.py
new file mode 100644
index 0000000000..a60a6abb3f
--- /dev/null
+++ b/source/tests/consistent/model/test_frozen.py
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+
+from ..common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+from .common import (
+    ModelTest,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.model.model import BaseModel as FrozenModelPT
+
+else:
+    FrozenModelPT = None
+if INSTALLED_TF:
+    from deepmd.tf.model.model import Model as FrozenModelTF
+else:
+    FrozenModelTF = None
+from pathlib import (
+    Path,
+)
+
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.utils.argcheck import (
+    model_args,
+)
+
+original_model = str(Path(__file__).parent.parent.parent / "infer" / "deeppot.dp")
+pt_model = "deeppot_for_consistent_frozen.pth"
+tf_model = "deeppot_for_consistent_frozen.pb"
+dp_model = original_model
+
+
+def setUpModule():
+    convert_backend(
+        INPUT=dp_model,
+        OUTPUT=tf_model,
+    )
+    convert_backend(
+        INPUT=dp_model,
+        OUTPUT=pt_model,
+    )
+
+
+def tearDownModule():
+    for model_file in (pt_model, tf_model):
+        try:
+            os.remove(model_file)
+        except FileNotFoundError:
+            pass
+
+
+@parameterized((pt_model, tf_model, dp_model))
+class TestFrozen(CommonTest, ModelTest, unittest.TestCase):
+    @property
+    def data(self) -> dict:
+        (model_file,) = self.param
+        if not INSTALLED_PT and model_file.endswith(".pth"):
+            raise unittest.SkipTest("PyTorch is not installed")
+        if not INSTALLED_TF and model_file.endswith(".pb"):
+            raise unittest.SkipTest("TensorFlow is not installed")
+        return {
+            "type": "frozen",
+            "model_file": model_file,
+        }
+
+    tf_class = FrozenModelTF
+    dp_class = None
+    pt_class = FrozenModelPT
+    args = model_args()
+
+    def skip_dp(self):
+        return True
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, -1, 3)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32).reshape(1, -1)
+        self.box = np.array(
+            [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0],
+            dtype=GLOBAL_NP_FLOAT_PRECISION,
+        ).reshape(1, 9)
+        self.natoms = np.array([6, 6, 2, 4], dtype=np.int32)
+
+        # TF requires the atype to be sort
+        idx_map = np.argsort(self.atype.ravel())
+        self.atype = self.atype[:, idx_map]
+        self.coords = self.coords[:, idx_map]
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return self.build_tf_model(
+            obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            suffix,
+        )
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return self.eval_dp_model(
+            dp_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return self.eval_pt_model(
+            pt_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+        )
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        # shape not matched. ravel...
+        if backend is self.RefBackend.DP:
+            return (ret["energy_redu"].ravel(), ret["energy"].ravel())
+        elif backend is self.RefBackend.PT:
+            return (ret["energy"].ravel(), ret["atom_energy"].ravel())
+        elif backend is self.RefBackend.TF:
+            return (ret[0].ravel(), ret[1].ravel())
+        raise ValueError(f"Unknown backend: {backend}")
diff --git a/source/tests/consistent/test_activation.py b/source/tests/consistent/test_activation.py
new file mode 100644
index 0000000000..9dcac6746e
--- /dev/null
+++ b/source/tests/consistent/test_activation.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.common import (
+    VALID_ACTIVATION,
+)
+from deepmd.dpmodel.utils.network import get_activation_fn as get_activation_fn_dp
+
+from .common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    parameterized,
+)
+
+if INSTALLED_PT:
+    from deepmd.pt.utils.utils import ActivationFn as ActivationFn_pt
+    from deepmd.pt.utils.utils import (
+        to_numpy_array,
+        to_torch_tensor,
+    )
+if INSTALLED_TF:
+    from deepmd.tf.common import get_activation_func as get_activation_fn_tf
+    from deepmd.tf.env import (
+        tf,
+    )
+
+
+@parameterized(
+    tuple([x.capitalize() for x in VALID_ACTIVATION]),
+)
+class TestActivationFunctionConsistent(unittest.TestCase):
+    def setUp(self):
+        (self.activation,) = self.param
+        self.random_input = np.random.default_rng().normal(scale=10, size=(10, 10))
+        self.ref = get_activation_fn_dp(self.activation)(self.random_input)
+
+    @unittest.skipUnless(INSTALLED_TF, "TensorFlow is not installed")
+    def test_tf_consistent_with_ref(self):
+        if INSTALLED_TF:
+            place_holder = tf.placeholder(tf.float64, self.random_input.shape)
+            t_test = get_activation_fn_tf(self.activation)(place_holder)
+            with tf.Session() as sess:
+                test = sess.run(t_test, feed_dict={place_holder: self.random_input})
+            np.testing.assert_allclose(self.ref, test, atol=1e-10)
+
+    @unittest.skipUnless(INSTALLED_PT, "PyTorch is not installed")
+    def test_pt_consistent_with_ref(self):
+        if INSTALLED_PT:
+            test = to_numpy_array(
+                ActivationFn_pt(self.activation)(to_torch_tensor(self.random_input))
+            )
+            np.testing.assert_allclose(self.ref, test, atol=1e-10)
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
new file mode 100644
index 0000000000..2e20142a66
--- /dev/null
+++ b/source/tests/consistent/test_type_embedding.py
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from typing import (
+    Any,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.dpmodel.utils.type_embed import TypeEmbedNet as TypeEmbedNetDP
+from deepmd.utils.argcheck import (
+    type_embedding_args,
+)
+
+from .common import (
+    INSTALLED_PT,
+    INSTALLED_TF,
+    CommonTest,
+    parameterized,
+)
+
+if INSTALLED_PT:
+    import torch
+
+    from deepmd.pt.model.network.network import TypeEmbedNetConsistent as TypeEmbedNetPT
+    from deepmd.pt.utils.env import DEVICE as PT_DEVICE
+else:
+    TypeEmbedNetPT = object
+if INSTALLED_TF:
+    from deepmd.tf.utils.type_embed import TypeEmbedNet as TypeEmbedNetTF
+else:
+    TypeEmbedNetTF = object
+
+
+@parameterized(
+    (True, False),  # resnet_dt
+    ("float32", "float64"),  # precision
+    (True, False),  # padding
+)
+class TestTypeEmbedding(CommonTest, unittest.TestCase):
+    """Useful utilities for descriptor tests."""
+
+    @property
+    def data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            padding,
+        ) = self.param
+        return {
+            "neuron": [2, 4, 4],
+            "resnet_dt": resnet_dt,
+            "precision": precision,
+            "seed": 20240327,
+        }
+
+    tf_class = TypeEmbedNetTF
+    dp_class = TypeEmbedNetDP
+    pt_class = TypeEmbedNetPT
+    args = type_embedding_args()
+
+    @property
+    def addtional_data(self) -> dict:
+        (
+            resnet_dt,
+            precision,
+            padding,
+        ) = self.param
+        # implict argument not input by users
+        return {
+            "ntypes": self.ntypes,
+            "padding": padding,
+        }
+
+    def setUp(self):
+        CommonTest.setUp(self)
+
+        self.ntypes = 2
+
+    def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
+        return [
+            obj.build(
+                obj.ntypes,
+                suffix=suffix,
+            ),
+        ], {}
+
+    def eval_dp(self, dp_obj: Any) -> Any:
+        return (dp_obj(),)
+
+    def eval_pt(self, pt_obj: Any) -> Any:
+        return [
+            x.detach().cpu().numpy() if torch.is_tensor(x) else x
+            for x in (pt_obj(device=PT_DEVICE),)
+        ]
+
+    def extract_ret(self, ret: Any, backend) -> Tuple[np.ndarray, ...]:
+        return (ret[0],)
+
+    @property
+    def rtol(self) -> float:
+        """Relative tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            padding,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        elif precision == "bfloat16":
+            return 1e-1
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
+
+    @property
+    def atol(self) -> float:
+        """Absolute tolerance for comparing the return value."""
+        (
+            resnet_dt,
+            precision,
+            padding,
+        ) = self.param
+        if precision == "float64":
+            return 1e-10
+        elif precision == "float32":
+            return 1e-4
+        elif precision == "bfloat16":
+            return 1e-1
+        else:
+            raise ValueError(f"Unknown precision: {precision}")
diff --git a/source/tests/infer/deeppot.dp b/source/tests/infer/deeppot.dp
new file mode 100644
index 0000000000..2f7d9e3f6f
Binary files /dev/null and b/source/tests/infer/deeppot.dp differ
diff --git a/source/tests/infer/deeppot_sea.pth b/source/tests/infer/deeppot_sea.pth
new file mode 100644
index 0000000000..c830f0df9e
Binary files /dev/null and b/source/tests/infer/deeppot_sea.pth differ
diff --git a/source/tests/infer/fparam_aparam.pbtxt b/source/tests/infer/fparam_aparam.pbtxt
index a89596961e..8c2e884090 100644
--- a/source/tests/infer/fparam_aparam.pbtxt
+++ b/source/tests/infer/fparam_aparam.pbtxt
@@ -35,7 +35,7 @@ node {
         dtype: DT_STRING
         tensor_shape {
         }
-        string_val: "{\"model\":{\"data_stat_nbatch\":1,\"descriptor\":{\"type\":\"se_e2_a\",\"sel\":[60],\"rcut_smth\":1.8,\"rcut\":6.0,\"neuron\":[5,10,20],\"resnet_dt\":false,\"axis_neuron\":8,\"seed\":1,\"activation_function\":\"tanh\",\"type_one_side\":false,\"precision\":\"default\",\"trainable\":true,\"exclude_types\":[],\"set_davg_zero\":false},\"fitting_net\":{\"neuron\":[5,5,5],\"resnet_dt\":true,\"numb_fparam\":1,\"numb_aparam\":1,\"seed\":1,\"type\":\"ener\",\"activation_function\":\"tanh\",\"precision\":\"default\",\"trainable\":true,\"rcond\":0.001,\"atom_ener\":[],\"use_aparam_as_mask\":false},\"data_stat_protect\":0.01,\"data_bias_nsample\":10},\"loss\":{\"start_pref_e\":0.02,\"limit_pref_e\":1,\"start_pref_f\":1000,\"limit_pref_f\":1,\"start_pref_v\":0,\"limit_pref_v\":0,\"type\":\"ener\",\"start_pref_ae\":0.0,\"limit_pref_ae\":0.0,\"start_pref_pf\":0.0,\"limit_pref_pf\":0.0,\"enable_atom_ener_coeff\":false},\"learning_rate\":{\"start_lr\":0.001,\"stop_lr\":3e-08,\"decay_steps\":5000,\"scale_by_worker\":\"linear\",\"type\":\"exp\"},\"training\":{\"training_data\":{\"systems\":[\"../data/e3000_i2000/\",\"../data/e8000_i2000/\"],\"set_prefix\":\"set\",\"batch_size\":1,\"auto_prob\":\"prob_sys_size\",\"sys_probs\":null},\"seed\":1,\"disp_file\":\"lcurve.out\",\"disp_freq\":100,\"save_freq\":1000,\"save_ckpt\":\"model.ckpt\",\"disp_training\":true,\"time_training\":true,\"profiling\":false,\"profiling_file\":\"timeline.json\",\"numb_steps\":1000,\"validation_data\":null,\"enable_profiler\":false,\"tensorboard\":false,\"tensorboard_log_dir\":\"log\",\"tensorboard_freq\":1}}"
+        string_val: "{\"model\":{\"data_stat_nbatch\":1,\"type_map\":[\"O\"],\"descriptor\":{\"type\":\"se_e2_a\",\"sel\":[60],\"rcut_smth\":1.8,\"rcut\":6.0,\"neuron\":[5,10,20],\"resnet_dt\":false,\"axis_neuron\":8,\"seed\":1,\"activation_function\":\"tanh\",\"type_one_side\":false,\"precision\":\"default\",\"trainable\":true,\"exclude_types\":[],\"set_davg_zero\":false},\"fitting_net\":{\"neuron\":[5,5,5],\"resnet_dt\":true,\"numb_fparam\":1,\"numb_aparam\":1,\"seed\":1,\"type\":\"ener\",\"activation_function\":\"tanh\",\"precision\":\"default\",\"trainable\":true,\"rcond\":0.001,\"atom_ener\":[],\"use_aparam_as_mask\":false},\"data_stat_protect\":0.01,\"data_bias_nsample\":10},\"loss\":{\"start_pref_e\":0.02,\"limit_pref_e\":1,\"start_pref_f\":1000,\"limit_pref_f\":1,\"start_pref_v\":0,\"limit_pref_v\":0,\"type\":\"ener\",\"start_pref_ae\":0.0,\"limit_pref_ae\":0.0,\"start_pref_pf\":0.0,\"limit_pref_pf\":0.0,\"enable_atom_ener_coeff\":false},\"learning_rate\":{\"start_lr\":0.001,\"stop_lr\":3e-08,\"decay_steps\":5000,\"scale_by_worker\":\"linear\",\"type\":\"exp\"},\"training\":{\"training_data\":{\"systems\":[\"../data/e3000_i2000/\",\"../data/e8000_i2000/\"],\"set_prefix\":\"set\",\"batch_size\":1,\"auto_prob\":\"prob_sys_size\",\"sys_probs\":null},\"seed\":1,\"disp_file\":\"lcurve.out\",\"disp_freq\":100,\"save_freq\":1000,\"save_ckpt\":\"model.ckpt\",\"disp_training\":true,\"time_training\":true,\"profiling\":false,\"profiling_file\":\"timeline.json\",\"numb_steps\":1000,\"validation_data\":null,\"enable_profiler\":false,\"tensorboard\":false,\"tensorboard_log_dir\":\"log\",\"tensorboard_freq\":1}}"
       }
     }
   }
diff --git a/source/tests/infer/fparam_aparam.pth b/source/tests/infer/fparam_aparam.pth
new file mode 100644
index 0000000000..703f7267be
Binary files /dev/null and b/source/tests/infer/fparam_aparam.pth differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/box.npy b/source/tests/pt/NiO/data/data_0/set.000/box.npy
new file mode 100644
index 0000000000..1f72eb7185
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/box.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/coord.npy b/source/tests/pt/NiO/data/data_0/set.000/coord.npy
new file mode 100644
index 0000000000..4b60ae0e0b
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/coord.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/energy.npy b/source/tests/pt/NiO/data/data_0/set.000/energy.npy
new file mode 100644
index 0000000000..8754b6dad2
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/energy.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/force.npy b/source/tests/pt/NiO/data/data_0/set.000/force.npy
new file mode 100644
index 0000000000..e95173d561
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/force.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/force_mag.npy b/source/tests/pt/NiO/data/data_0/set.000/force_mag.npy
new file mode 100644
index 0000000000..65bc1ef837
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/force_mag.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/set.000/spin.npy b/source/tests/pt/NiO/data/data_0/set.000/spin.npy
new file mode 100644
index 0000000000..c426f1c7f6
Binary files /dev/null and b/source/tests/pt/NiO/data/data_0/set.000/spin.npy differ
diff --git a/source/tests/pt/NiO/data/data_0/type.raw b/source/tests/pt/NiO/data/data_0/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/source/tests/pt/NiO/data/data_0/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pt/NiO/data/data_0/type_map.raw b/source/tests/pt/NiO/data/data_0/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/source/tests/pt/NiO/data/data_0/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/source/tests/pt/NiO/data/single/set.000/box.npy b/source/tests/pt/NiO/data/single/set.000/box.npy
new file mode 100644
index 0000000000..d3ac265aa8
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/box.npy differ
diff --git a/source/tests/pt/NiO/data/single/set.000/coord.npy b/source/tests/pt/NiO/data/single/set.000/coord.npy
new file mode 100644
index 0000000000..4060f0fc53
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/coord.npy differ
diff --git a/source/tests/pt/NiO/data/single/set.000/energy.npy b/source/tests/pt/NiO/data/single/set.000/energy.npy
new file mode 100644
index 0000000000..fd7d1420ee
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/energy.npy differ
diff --git a/source/tests/pt/NiO/data/single/set.000/force.npy b/source/tests/pt/NiO/data/single/set.000/force.npy
new file mode 100644
index 0000000000..c5c238d200
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/force.npy differ
diff --git a/source/tests/pt/NiO/data/single/set.000/force_mag.npy b/source/tests/pt/NiO/data/single/set.000/force_mag.npy
new file mode 100644
index 0000000000..3f0323ad8e
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/force_mag.npy differ
diff --git a/source/tests/pt/NiO/data/single/set.000/spin.npy b/source/tests/pt/NiO/data/single/set.000/spin.npy
new file mode 100644
index 0000000000..88985f5d2c
Binary files /dev/null and b/source/tests/pt/NiO/data/single/set.000/spin.npy differ
diff --git a/source/tests/pt/NiO/data/single/type.raw b/source/tests/pt/NiO/data/single/type.raw
new file mode 100644
index 0000000000..d9664c7a22
--- /dev/null
+++ b/source/tests/pt/NiO/data/single/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/pt/NiO/data/single/type_map.raw b/source/tests/pt/NiO/data/single/type_map.raw
new file mode 100644
index 0000000000..7eca995c31
--- /dev/null
+++ b/source/tests/pt/NiO/data/single/type_map.raw
@@ -0,0 +1,2 @@
+Ni
+O
diff --git a/source/tests/pt/__init__.py b/source/tests/pt/__init__.py
new file mode 100644
index 0000000000..1a6de0591a
--- /dev/null
+++ b/source/tests/pt/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import torch
+
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+# testing purposes; device should always be set explicitly
+torch.set_default_device("cuda:9999999")
diff --git a/source/tests/pt/conftest.py b/source/tests/pt/conftest.py
new file mode 100644
index 0000000000..a1dea6da5a
--- /dev/null
+++ b/source/tests/pt/conftest.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import pytest
+import torch
+
+
+@pytest.fixture(scope="package", autouse=True)
+def clear_cuda_memory(request):
+    yield
+    torch.cuda.empty_cache()
diff --git a/source/tests/pt/dos/data/atomic_system/set.000/atom_dos.npy b/source/tests/pt/dos/data/atomic_system/set.000/atom_dos.npy
new file mode 100644
index 0000000000..22809c1068
Binary files /dev/null and b/source/tests/pt/dos/data/atomic_system/set.000/atom_dos.npy differ
diff --git a/source/tests/pt/dos/data/atomic_system/set.000/box.npy b/source/tests/pt/dos/data/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/source/tests/pt/dos/data/atomic_system/set.000/box.npy differ
diff --git a/source/tests/pt/dos/data/atomic_system/set.000/coord.npy b/source/tests/pt/dos/data/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/source/tests/pt/dos/data/atomic_system/set.000/coord.npy differ
diff --git a/source/tests/pt/dos/data/atomic_system/type.raw b/source/tests/pt/dos/data/atomic_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/source/tests/pt/dos/data/atomic_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pt/dos/data/atomic_system/type_map.raw b/source/tests/pt/dos/data/atomic_system/type_map.raw
new file mode 100644
index 0000000000..a9edc74f38
--- /dev/null
+++ b/source/tests/pt/dos/data/atomic_system/type_map.raw
@@ -0,0 +1 @@
+H
diff --git a/source/tests/pt/dos/data/global_system/set.000/box.npy b/source/tests/pt/dos/data/global_system/set.000/box.npy
new file mode 100644
index 0000000000..6265bf150e
Binary files /dev/null and b/source/tests/pt/dos/data/global_system/set.000/box.npy differ
diff --git a/source/tests/pt/dos/data/global_system/set.000/coord.npy b/source/tests/pt/dos/data/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..f33ce430bf
Binary files /dev/null and b/source/tests/pt/dos/data/global_system/set.000/coord.npy differ
diff --git a/source/tests/pt/dos/data/global_system/set.000/dos.npy b/source/tests/pt/dos/data/global_system/set.000/dos.npy
new file mode 100644
index 0000000000..904b23e709
Binary files /dev/null and b/source/tests/pt/dos/data/global_system/set.000/dos.npy differ
diff --git a/source/tests/pt/dos/data/global_system/type.raw b/source/tests/pt/dos/data/global_system/type.raw
new file mode 100644
index 0000000000..de3c26ec4e
--- /dev/null
+++ b/source/tests/pt/dos/data/global_system/type.raw
@@ -0,0 +1,32 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pt/dos/data/global_system/type_map.raw b/source/tests/pt/dos/data/global_system/type_map.raw
new file mode 100644
index 0000000000..a9edc74f38
--- /dev/null
+++ b/source/tests/pt/dos/data/global_system/type_map.raw
@@ -0,0 +1 @@
+H
diff --git a/source/tests/pt/dos/input.json b/source/tests/pt/dos/input.json
new file mode 100644
index 0000000000..991f5acf70
--- /dev/null
+++ b/source/tests/pt/dos/input.json
@@ -0,0 +1,83 @@
+{
+  "model": {
+    "type_map": [
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        90
+      ],
+      "rcut_smth": 1.8,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "precision": "float64",
+      "seed": 1
+    },
+    "fitting_net": {
+      "type": "dos",
+      "numb_dos": 250,
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 0,
+      "precision": "float64",
+      "seed": 1
+    }
+  },
+  "loss": {
+    "type": "dos",
+    "start_pref_dos": 1.0,
+    "limit_pref_dos": 1.0,
+    "start_pref_cdf": 0.0,
+    "limit_pref_cdf": 0.0,
+    "start_pref_ados": 1.0,
+    "limit_pref_ados": 1.0,
+    "start_pref_acdf": 0.0,
+    "limit_pref_acdf": 0.0
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "stop_lr": 1e-08
+  },
+  "training": {
+    "stop_batch": 100000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json",
+    "training_data": {
+      "systems": [
+        "pt/dos/data/atomic_system/",
+        "pt/dos/data/global_system/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
+    },
+    "validation_data": {
+      "systems": [
+        "pt/dos/data/atomic_system/",
+        "pt/dos/data/global_system/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
+    }
+  },
+  "_comment1": "that's all"
+}
diff --git a/source/tests/pt/model/__init__.py b/source/tests/pt/model/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/pt/model/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/pt/model/models/dpa1.json b/source/tests/pt/model/models/dpa1.json
new file mode 100644
index 0000000000..5d2c65c214
--- /dev/null
+++ b/source/tests/pt/model/models/dpa1.json
@@ -0,0 +1,39 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "se_atten",
+    "sel": 30,
+    "rcut_smth": 2.0,
+    "rcut": 6.0,
+    "neuron": [
+      2,
+      4,
+      8
+    ],
+    "axis_neuron": 4,
+    "attn": 5,
+    "attn_layer": 2,
+    "attn_dotr": true,
+    "attn_mask": false,
+    "post_ln": true,
+    "ffn": false,
+    "ffn_embed_dim": 10,
+    "activation_function": "tanh",
+    "scaling_factor": 1.0,
+    "head_num": 1,
+    "normalize": true,
+    "temperature": 1.0
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pt/model/models/dpa1.pth b/source/tests/pt/model/models/dpa1.pth
new file mode 100644
index 0000000000..75acf2fa15
Binary files /dev/null and b/source/tests/pt/model/models/dpa1.pth differ
diff --git a/source/tests/pt/model/models/dpa2.json b/source/tests/pt/model/models/dpa2.json
new file mode 100644
index 0000000000..8b9c735851
--- /dev/null
+++ b/source/tests/pt/model/models/dpa2.json
@@ -0,0 +1,48 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "dpa2",
+    "repinit_rcut": 6.0,
+    "repinit_rcut_smth": 2.0,
+    "repinit_nsel": 30,
+    "repformer_rcut": 4.0,
+    "repformer_rcut_smth": 0.5,
+    "repformer_nsel": 10,
+    "repinit_neuron": [
+      2,
+      4,
+      8
+    ],
+    "repinit_axis_neuron": 4,
+    "repinit_activation": "tanh",
+    "repformer_nlayers": 12,
+    "repformer_g1_dim": 8,
+    "repformer_g2_dim": 5,
+    "repformer_attn2_hidden": 3,
+    "repformer_attn2_nhead": 1,
+    "repformer_attn1_hidden": 5,
+    "repformer_attn1_nhead": 1,
+    "repformer_axis_dim": 4,
+    "repformer_update_h2": false,
+    "repformer_update_g1_has_conv": true,
+    "repformer_update_g1_has_grrg": true,
+    "repformer_update_g1_has_drrd": true,
+    "repformer_update_g1_has_attn": true,
+    "repformer_update_g2_has_g1g1": true,
+    "repformer_update_g2_has_attn": true,
+    "repformer_attn2_has_gate": true,
+    "repformer_add_type_ebd_to_seq": false
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pt/model/models/dpa2.pth b/source/tests/pt/model/models/dpa2.pth
new file mode 100644
index 0000000000..0559d30c48
Binary files /dev/null and b/source/tests/pt/model/models/dpa2.pth differ
diff --git a/source/tests/pt/model/models/dpa2_hyb.json b/source/tests/pt/model/models/dpa2_hyb.json
new file mode 100644
index 0000000000..ee69ed4d69
--- /dev/null
+++ b/source/tests/pt/model/models/dpa2_hyb.json
@@ -0,0 +1,69 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "hybrid",
+    "hybrid_mode": "sequential",
+    "list": [
+      {
+        "type": "se_atten",
+        "sel": 30,
+        "rcut_smth": 2.0,
+        "rcut": 6.0,
+        "neuron": [
+          2,
+          4,
+          8
+        ],
+        "axis_neuron": 4,
+        "attn": 5,
+        "attn_layer": 0,
+        "attn_dotr": true,
+        "attn_mask": false,
+        "post_ln": true,
+        "ffn": false,
+        "ffn_embed_dim": 10,
+        "activation_function": "tanh",
+        "scaling_factor": 1.0,
+        "head_num": 1,
+        "normalize": true,
+        "temperature": 1.0
+      },
+      {
+        "type": "se_uni",
+        "sel": 10,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "nlayers": 12,
+        "g1_dim": 8,
+        "g2_dim": 5,
+        "attn2_hidden": 3,
+        "attn2_nhead": 1,
+        "attn1_hidden": 5,
+        "attn1_nhead": 1,
+        "axis_dim": 4,
+        "update_h2": false,
+        "update_g1_has_conv": true,
+        "update_g1_has_grrg": true,
+        "update_g1_has_drrd": true,
+        "update_g1_has_attn": true,
+        "update_g2_has_g1g1": true,
+        "update_g2_has_attn": true,
+        "attn2_has_gate": true,
+        "add_type_ebd_to_seq": false,
+        "smooth": true
+      }
+    ]
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pt/model/models/dpa2_tebd.pth b/source/tests/pt/model/models/dpa2_tebd.pth
new file mode 100644
index 0000000000..6a1cc225f1
Binary files /dev/null and b/source/tests/pt/model/models/dpa2_tebd.pth differ
diff --git a/source/tests/pt/model/test_autodiff.py b/source/tests/pt/model/test_autodiff.py
new file mode 100644
index 0000000000..91fc3cabf6
--- /dev/null
+++ b/source/tests/pt/model/test_autodiff.py
@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+dtype = torch.float64
+
+from .test_permutation import (
+    eval_model,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+
+# from deepmd-kit repo
+def finite_difference(f, x, delta=1e-6):
+    in_shape = x.shape
+    y0 = f(x)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape)
+    for idx in np.ndindex(*in_shape):
+        diff = np.zeros(in_shape)
+        diff[idx] += delta
+        y1p = f(x + diff)
+        y1n = f(x - diff)
+        res[(Ellipsis, *idx)] = (y1p - y1n) / (2 * delta)
+    return res
+
+
+def stretch_box(old_coord, old_box, new_box):
+    ocoord = old_coord.reshape(-1, 3)
+    obox = old_box.reshape(3, 3)
+    nbox = new_box.reshape(3, 3)
+    ncoord = ocoord @ np.linalg.inv(obox) @ nbox
+    return ncoord.reshape(old_coord.shape)
+
+
+class ForceTest:
+    def test(
+        self,
+    ):
+        places = 8
+        delta = 1e-5
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype, device="cpu")
+        cell = (cell + cell.T) + 5.0 * torch.eye(3, device="cpu")
+        coord = torch.rand([natoms, 3], dtype=dtype, device="cpu")
+        coord = torch.matmul(coord, cell)
+        spin = torch.rand([natoms, 3], dtype=dtype, device="cpu")
+        atype = torch.IntTensor([0, 0, 0, 1, 1])
+        # assumes input to be numpy tensor
+        coord = coord.numpy()
+        spin = spin.numpy()
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+
+        def np_infer_coord(
+            coord,
+        ):
+            result = eval_model(
+                self.model,
+                torch.tensor(coord, device=env.DEVICE).unsqueeze(0),
+                cell.unsqueeze(0),
+                atype,
+                spins=torch.tensor(spin, device=env.DEVICE).unsqueeze(0),
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            return ret
+
+        def np_infer_spin(
+            spin,
+        ):
+            result = eval_model(
+                self.model,
+                torch.tensor(coord, device=env.DEVICE).unsqueeze(0),
+                cell.unsqueeze(0),
+                atype,
+                spins=torch.tensor(spin, device=env.DEVICE).unsqueeze(0),
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            return ret
+
+        def ff_coord(_coord):
+            return np_infer_coord(_coord)["energy"]
+
+        def ff_spin(_spin):
+            return np_infer_spin(_spin)["energy"]
+
+        if not test_spin:
+            fdf = -finite_difference(ff_coord, coord, delta=delta).squeeze()
+            rff = np_infer_coord(coord)["force"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+        else:
+            # real force
+            fdf = -finite_difference(ff_coord, coord, delta=delta).squeeze()
+            rff = np_infer_coord(coord)["force"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+            # magnetic force
+            fdf = -finite_difference(ff_spin, spin, delta=delta).squeeze()
+            rff = np_infer_spin(spin)["force_mag"]
+            np.testing.assert_almost_equal(fdf, rff, decimal=places)
+
+
+class VirialTest:
+    def test(
+        self,
+    ):
+        places = 8
+        delta = 1e-4
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype, device="cpu")
+        cell = (cell) + 5.0 * torch.eye(3, device="cpu")
+        coord = torch.rand([natoms, 3], dtype=dtype, device="cpu")
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1])
+        # assumes input to be numpy tensor
+        coord = coord.numpy()
+        cell = cell.numpy()
+        test_keys = ["energy", "force", "virial"]
+
+        def np_infer(
+            new_cell,
+        ):
+            result = eval_model(
+                self.model,
+                torch.tensor(
+                    stretch_box(coord, cell, new_cell), device="cpu"
+                ).unsqueeze(0),
+                torch.tensor(new_cell, device="cpu").unsqueeze(0),
+                atype,
+            )
+            # detach
+            ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
+            # detach
+            return ret
+
+        def ff(bb):
+            return np_infer(bb)["energy"]
+
+        fdv = (
+            -(finite_difference(ff, cell, delta=delta).transpose(0, 2, 1) @ cell)
+            .squeeze()
+            .reshape(9)
+        )
+        rfv = np_infer(cell)["virial"]
+        np.testing.assert_almost_equal(fdv, rfv, decimal=places)
+
+
+class TestEnergyModelSeAForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSeAVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1Force(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1Virial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2Force(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPAUniVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybridForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybridVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBLForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBLVirial(unittest.TestCase, VirialTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeAForce(unittest.TestCase, ForceTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pt/model/test_deeppot.py b/source/tests/pt/model/test_deeppot.py
new file mode 100644
index 0000000000..68b1ff65d5
--- /dev/null
+++ b/source/tests/pt/model/test_deeppot.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from argparse import (
+    Namespace,
+)
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import torch
+
+from deepmd.infer.deep_pot import DeepPot as DeepPotUni
+from deepmd.pt.entrypoints.main import (
+    freeze,
+    get_trainer,
+)
+from deepmd.pt.infer.deep_eval import (
+    DeepPot,
+)
+
+from ...tf.test_deeppot_a import (
+    FparamAparamCommonTest,
+)
+
+
+class TestDeepPot(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        with torch.device("cpu"):
+            input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        trainer.wrapper(**input_dict, label=label_dict, cur_lr=1.0)
+        self.model = "model.pt"
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+
+    def test_dp_test(self):
+        dp = DeepPot(str(self.model))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret = dp.eval(coord, cell, atype, atomic=True)
+        e, f, v, ae, av = ret[0], ret[1], ret[2], ret[3], ret[4]
+        self.assertEqual(e.shape, (1, 1))
+        self.assertEqual(f.shape, (1, 5, 3))
+        self.assertEqual(v.shape, (1, 9))
+        self.assertEqual(ae.shape, (1, 5, 1))
+        self.assertEqual(av.shape, (1, 5, 9))
+
+        self.assertEqual(dp.get_type_map(), ["O", "H"])
+        self.assertEqual(dp.get_ntypes(), 2)
+        self.assertEqual(dp.get_dim_fparam(), 0)
+        self.assertEqual(dp.get_dim_aparam(), 0)
+        self.assertEqual(dp.deep_eval.model_type, DeepPot)
+
+    def test_uni(self):
+        dp = DeepPotUni("model.pt")
+        self.assertIsInstance(dp, DeepPot)
+        # its methods has been tested in test_dp_test
+
+
+class TestDeepPotFrozen(TestDeepPot):
+    def setUp(self):
+        super().setUp()
+        frozen_model = "frozen_model.pth"
+        ns = Namespace(
+            model=self.model,
+            output=frozen_model,
+            head=None,
+        )
+        freeze(ns)
+        self.model = frozen_model
+
+    # Note: this can not actually disable cuda device to be used
+    # only can be used to test whether devices are mismatched
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @unittest.mock.patch("deepmd.pt.utils.env.DEVICE", torch.device("cpu"))
+    @unittest.mock.patch("deepmd.pt.infer.deep_eval.DEVICE", torch.device("cpu"))
+    def test_dp_test_cpu(self):
+        self.test_dp_test()
+
+
+class TestFparamAparamPT(FparamAparamCommonTest, unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.dp = DeepPot(
+            str(Path(__file__).parent.parent.parent / "infer/fparam_aparam.pth")
+        )
+
+    def setUp(self):
+        super().setUp()
+        # For unclear reason, the precision is only 1e-7
+        # not sure if it is expected...
+        self.places = 1e-7
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
diff --git a/source/tests/pt/model/test_descriptor.py b/source/tests/pt/model/test_descriptor.py
new file mode 100644
index 0000000000..7d21d1c13d
--- /dev/null
+++ b/source/tests/pt/model/test_descriptor.py
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+import torch
+
+tf.disable_eager_execution()
+
+import json
+from pathlib import (
+    Path,
+)
+
+from deepmd.pt.model.descriptor import (
+    prod_env_mat,
+)
+from deepmd.pt.utils import (
+    dp_random,
+    env,
+)
+from deepmd.pt.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_PT_FLOAT_PRECISION,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.env import (
+    op_module,
+)
+
+from ..test_stat import (
+    energy_data_requirement,
+)
+from .test_embedding_net import (
+    get_single_batch,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def base_se_a(rcut, rcut_smth, sel, batch, mean, stddev):
+    g = tf.Graph()
+    with g.as_default():
+        coord = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        box = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        atype = tf.placeholder(tf.int32, [None, None])
+        natoms_vec = tf.placeholder(tf.int32, [None])
+        default_mesh = tf.placeholder(tf.int32, [None])
+        stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+            coord,
+            atype,
+            natoms_vec,
+            box,
+            default_mesh,
+            tf.constant(mean),
+            tf.constant(stddev),
+            rcut_a=-1.0,
+            rcut_r=rcut,
+            rcut_r_smth=rcut_smth,
+            sel_a=sel,
+            sel_r=[0 for i in sel],
+        )
+
+        net_deriv_reshape = tf.ones_like(stat_descrpt)
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            natoms_vec,
+            n_a_sel=sum(sel),
+            n_r_sel=0,
+        )
+
+    with tf.Session(graph=g) as sess:
+        y = sess.run(
+            [stat_descrpt, force, nlist],
+            feed_dict={
+                coord: batch["coord"],
+                box: batch["box"],
+                natoms_vec: batch["natoms"],
+                atype: batch["atype"],
+                default_mesh: np.array([0, 0, 0, 2, 2, 2]),
+            },
+        )
+    tf.reset_default_graph()
+    return y
+
+
+class TestSeA(unittest.TestCase):
+    def setUp(self):
+        dp_random.seed(20)
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.bsz = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        ds = DeepmdDataSetForLoader(
+            self.systems[0],
+            model_config["type_map"],
+        )
+        ds.add_data_requirement(energy_data_requirement)
+        self.np_batch, self.pt_batch = get_single_batch(ds)
+        self.sec = np.cumsum(self.sel)
+        self.ntypes = len(self.sel)
+        self.nnei = sum(self.sel)
+
+    def test_consistency(self):
+        avg_zero = torch.zeros(
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        std_ones = torch.ones(
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        base_d, base_force, base_nlist = base_se_a(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            batch=self.np_batch,
+            mean=avg_zero.detach().cpu(),
+            stddev=std_ones.detach().cpu(),
+        )
+
+        pt_coord = self.pt_batch["coord"].to(env.DEVICE)
+        atype = self.pt_batch["atype"].to(env.DEVICE)
+        pt_coord.requires_grad_(True)
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            pt_coord,
+            self.pt_batch["atype"].to(env.DEVICE),
+            self.rcut,
+            self.sel,
+            mixed_types=False,
+            box=self.pt_batch["box"].to(env.DEVICE),
+        )
+        my_d, _, _ = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE),
+            std_ones.reshape([-1, self.nnei, 4]).to(DEVICE),
+            self.rcut,
+            self.rcut_smth,
+        )
+        my_d.sum().backward()
+        bsz = pt_coord.shape[0]
+        my_force = pt_coord.grad.view(bsz, -1, 3).cpu().detach().numpy()
+        base_force = base_force.reshape(bsz, -1, 3)
+        base_d = base_d.reshape(bsz, -1, self.nnei, 4)
+        my_d = my_d.view(bsz, -1, self.nnei, 4).cpu().detach().numpy()
+        base_nlist = base_nlist.reshape(bsz, -1, self.nnei)
+
+        mapping = mapping.cpu()
+        my_nlist = nlist.view(bsz, -1).cpu()
+        mask = my_nlist == -1
+        my_nlist = my_nlist * ~mask
+        my_nlist = torch.gather(mapping, dim=-1, index=my_nlist)
+        my_nlist = my_nlist * ~mask - mask.long()
+        my_nlist = my_nlist.cpu().view(bsz, -1, self.nnei).numpy()
+        self.assertTrue(np.allclose(base_nlist, my_nlist))
+        self.assertTrue(np.allclose(np.mean(base_d, axis=2), np.mean(my_d, axis=2)))
+        self.assertTrue(np.allclose(np.std(base_d, axis=2), np.std(my_d, axis=2)))
+        # descriptors may be different when there are multiple neighbors in the same distance
+        self.assertTrue(np.allclose(base_force, -my_force))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py
new file mode 100644
index 0000000000..6dadcdacb7
--- /dev/null
+++ b/source/tests/pt/model/test_descriptor_dpa1.py
@@ -0,0 +1,372 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.model.descriptor import (
+    DescrptBlockSeAtten,
+    DescrptDPA1,
+)
+from deepmd.pt.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestDPA1(unittest.TestCase):
+    def setUp(self):
+        cell = [
+            5.122106549439247480e00,
+            4.016537340154059388e-01,
+            6.951654033828678081e-01,
+            4.016537340154059388e-01,
+            6.112136112297989143e00,
+            8.178091365465004481e-01,
+            6.951654033828678081e-01,
+            8.178091365465004481e-01,
+            6.159552512682983760e00,
+        ]
+        self.cell = torch.tensor(
+            cell, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        ).view(1, 3, 3)
+        coord = [
+            2.978060152121375648e00,
+            3.588469695887098077e00,
+            2.792459820604495491e00,
+            3.895592322591093115e00,
+            2.712091020667753760e00,
+            1.366836847133650501e00,
+            9.955616170888935690e-01,
+            4.121324820711413039e00,
+            1.817239061889086571e00,
+            3.553661462345699906e00,
+            5.313046969500791583e00,
+            6.635182659098815883e00,
+            6.088601018589653080e00,
+            6.575011420004332585e00,
+            6.825240650611076099e00,
+        ]
+        self.coord = torch.tensor(
+            coord, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        ).view(1, -1, 3)
+        self.atype = torch.tensor(
+            [0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE
+        ).view(1, -1)
+        self.ref_d = torch.tensor(
+            [
+                8.382518544113587780e-03,
+                -3.390120566088597812e-03,
+                6.145981571114964362e-03,
+                -4.880300873973819273e-03,
+                -3.390120566088597812e-03,
+                1.372540996564941464e-03,
+                -2.484163690574096341e-03,
+                1.972313058658722688e-03,
+                6.145981571114964362e-03,
+                -2.484163690574096341e-03,
+                4.507748738021747671e-03,
+                -3.579717194906019764e-03,
+                -4.880300873973819273e-03,
+                1.972313058658722688e-03,
+                -3.579717194906019764e-03,
+                2.842794615687799838e-03,
+                6.733043802494966066e-04,
+                -2.721540313345096771e-04,
+                4.936158526085561134e-04,
+                -3.919743287822345223e-04,
+                -1.311123004527576900e-02,
+                5.301179352601203924e-03,
+                -9.614612349318877454e-03,
+                7.634884975521277241e-03,
+                8.877088452901006621e-03,
+                -3.590945566653638409e-03,
+                6.508042782015627942e-03,
+                -5.167671664327699171e-03,
+                -2.697241463040870365e-03,
+                1.091350446825975137e-03,
+                -1.976895708961905022e-03,
+                1.569671412121975348e-03,
+                8.645131636261189911e-03,
+                -3.557395265621639355e-03,
+                6.298048561552698106e-03,
+                -4.999272007935521948e-03,
+                -3.557395265621639355e-03,
+                1.467866637220284964e-03,
+                -2.587004431651147504e-03,
+                2.052752235601402672e-03,
+                6.298048561552698106e-03,
+                -2.587004431651147504e-03,
+                4.594085551315935101e-03,
+                -3.647656549789176847e-03,
+                -4.999272007935521948e-03,
+                2.052752235601402672e-03,
+                -3.647656549789176847e-03,
+                2.896359275520481256e-03,
+                6.689620176492027878e-04,
+                -2.753606422414641049e-04,
+                4.864958810186969444e-04,
+                -3.860599754167503119e-04,
+                -1.349238259226558101e-02,
+                5.547478630961994242e-03,
+                -9.835472300819447095e-03,
+                7.808197926069362048e-03,
+                9.220744348752592245e-03,
+                -3.795799103392961601e-03,
+                6.716516319358462918e-03,
+                -5.331265718473574867e-03,
+                -2.783836698392940304e-03,
+                1.147461939123531121e-03,
+                -2.025013030986024063e-03,
+                1.606944814423778541e-03,
+                9.280385723343491378e-03,
+                -3.515852178447095942e-03,
+                7.085282215778941628e-03,
+                -5.675852414643783178e-03,
+                -3.515852178447095942e-03,
+                1.337760635271160884e-03,
+                -2.679428786337713451e-03,
+                2.145400621815936413e-03,
+                7.085282215778941628e-03,
+                -2.679428786337713451e-03,
+                5.414439648102228192e-03,
+                -4.338426468139268931e-03,
+                -5.675852414643783178e-03,
+                2.145400621815936413e-03,
+                -4.338426468139268931e-03,
+                3.476467482674507146e-03,
+                7.166961981167455130e-04,
+                -2.697932188839837972e-04,
+                5.474643906631899504e-04,
+                -4.386556623669893621e-04,
+                -1.480434821331240956e-02,
+                5.604647062899507579e-03,
+                -1.130745349141585449e-02,
+                9.059113563516829268e-03,
+                9.758791063112262978e-03,
+                -3.701477720487638626e-03,
+                7.448215522796466058e-03,
+                -5.966057584545172120e-03,
+                -2.845102393948158344e-03,
+                1.078743584169829543e-03,
+                -2.170093031447992756e-03,
+                1.738010461687942770e-03,
+                9.867599071916231118e-03,
+                -3.811041717688905522e-03,
+                7.121877634386481262e-03,
+                -5.703120290113914553e-03,
+                -3.811041717688905522e-03,
+                1.474046183772771213e-03,
+                -2.747386907428428938e-03,
+                2.199711055637492037e-03,
+                7.121877634386481262e-03,
+                -2.747386907428428938e-03,
+                5.145050639440944609e-03,
+                -4.120642824501622239e-03,
+                -5.703120290113914553e-03,
+                2.199711055637492037e-03,
+                -4.120642824501622239e-03,
+                3.300262321758350853e-03,
+                1.370499995344566383e-03,
+                -5.313041843655797901e-04,
+                9.860110343046961986e-04,
+                -7.892505817954784597e-04,
+                -1.507686316307561489e-02,
+                5.818961290579217904e-03,
+                -1.088774506142304276e-02,
+                8.719460408506790952e-03,
+                9.764630842803939323e-03,
+                -3.770134041110058572e-03,
+                7.049438389985595785e-03,
+                -5.645302934019884485e-03,
+                -3.533582373572779437e-03,
+                1.367148320603491559e-03,
+                -2.546602904764623705e-03,
+                2.038882844528267305e-03,
+                7.448297038731285964e-03,
+                -2.924276815200288742e-03,
+                5.355960540523636154e-03,
+                -4.280386435083473329e-03,
+                -2.924276815200288742e-03,
+                1.150311064893848757e-03,
+                -2.100635980860638373e-03,
+                1.678427895009850001e-03,
+                5.355960540523636154e-03,
+                -2.100635980860638373e-03,
+                3.853607053247790071e-03,
+                -3.080076301871465493e-03,
+                -4.280386435083473329e-03,
+                1.678427895009850001e-03,
+                -3.080076301871465493e-03,
+                2.461876613756722523e-03,
+                9.730712866459405395e-04,
+                -3.821759579990726546e-04,
+                6.994242056622360787e-04,
+                -5.589662297882965055e-04,
+                -1.138916742131982317e-02,
+                4.469391132927387489e-03,
+                -8.192016282448397885e-03,
+                6.547234460517113892e-03,
+                7.460070829043288082e-03,
+                -2.929867802018087421e-03,
+                5.363646855497249989e-03,
+                -4.286347242903034739e-03,
+                -2.643569023340565718e-03,
+                1.038826463247002245e-03,
+                -1.899910089750410976e-03,
+                1.518237240362583541e-03,
+            ],
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        with open(Path(CUR_DIR) / "models" / "dpa1.json") as fp:
+            self.model_json = json.load(fp)
+        self.file_model_param = Path(CUR_DIR) / "models" / "dpa1.pth"
+        self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pth"
+
+    def test_descriptor_block(self):
+        # torch.manual_seed(0)
+        model_dpa1 = self.model_json
+        dparams = model_dpa1["descriptor"]
+        ntypes = len(model_dpa1["type_map"])
+        assert "se_atten" == dparams.pop("type")
+        dparams["ntypes"] = ntypes
+        des = DescrptBlockSeAtten(
+            **dparams,
+        ).to(env.DEVICE)
+        des.load_state_dict(torch.load(self.file_model_param))
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        # handel type_embedding
+        type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
+        type_embedding.load_state_dict(torch.load(self.file_type_embed))
+
+        ## to save model parameters
+        # torch.save(des.state_dict(), 'model_weights.pth')
+        # torch.save(type_embedding.state_dict(), 'model_weights.pth')
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            nlist,
+            extended_coord,
+            extended_atype,
+            type_embedding(extended_atype),
+            mapping=None,
+        )
+        # np.savetxt('tmp.out', descriptor.detach().numpy().reshape(1,-1), delimiter=",")
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        torch.testing.assert_close(
+            descriptor.view(-1), self.ref_d, atol=1e-10, rtol=1e-10
+        )
+
+    def test_descriptor(self):
+        with open(Path(CUR_DIR) / "models" / "dpa1.json") as fp:
+            self.model_json = json.load(fp)
+        model_dpa2 = self.model_json
+        ntypes = len(model_dpa2["type_map"])
+        dparams = model_dpa2["descriptor"]
+        dparams["ntypes"] = ntypes
+        assert dparams.pop("type") == "se_atten"
+        dparams["concat_output_tebd"] = False
+        des = DescrptDPA1(
+            **dparams,
+        ).to(env.DEVICE)
+        target_dict = des.state_dict()
+        source_dict = torch.load(self.file_model_param)
+        type_embd_dict = torch.load(self.file_type_embed)
+        target_dict = translate_se_atten_and_type_embd_dicts_to_dpa1(
+            target_dict,
+            source_dict,
+            type_embd_dict,
+        )
+        des.load_state_dict(target_dict)
+
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        torch.testing.assert_close(
+            descriptor.view(-1), self.ref_d, atol=1e-10, rtol=1e-10
+        )
+
+        dparams["concat_output_tebd"] = True
+        des = DescrptDPA1(
+            **dparams,
+        ).to(env.DEVICE)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+
+
+def translate_se_atten_and_type_embd_dicts_to_dpa1(
+    target_dict,
+    source_dict,
+    type_embd_dict,
+):
+    all_keys = list(target_dict.keys())
+    record = [False for ii in all_keys]
+    for kk, vv in source_dict.items():
+        tk = "se_atten." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = vv
+    assert len(type_embd_dict.keys()) == 2
+    it = iter(type_embd_dict.keys())
+    for _ in range(2):
+        kk = next(it)
+        tk = "type_embedding." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = type_embd_dict[kk]
+    assert all(record)
+    return target_dict
diff --git a/source/tests/pt/model/test_descriptor_dpa2.py b/source/tests/pt/model/test_descriptor_dpa2.py
new file mode 100644
index 0000000000..662108ee99
--- /dev/null
+++ b/source/tests/pt/model/test_descriptor_dpa2.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.model.descriptor import (
+    DescrptBlockHybrid,
+    DescrptDPA2,
+)
+from deepmd.pt.model.network.network import (
+    TypeEmbedNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    build_multiple_neighbor_list,
+    extend_input_and_build_neighbor_list,
+    get_multiple_nlist_key,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestDPA2(unittest.TestCase):
+    def setUp(self):
+        cell = [
+            5.122106549439247480e00,
+            4.016537340154059388e-01,
+            6.951654033828678081e-01,
+            4.016537340154059388e-01,
+            6.112136112297989143e00,
+            8.178091365465004481e-01,
+            6.951654033828678081e-01,
+            8.178091365465004481e-01,
+            6.159552512682983760e00,
+        ]
+        self.cell = torch.tensor(
+            cell, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        ).view(1, 3, 3)
+        coord = [
+            2.978060152121375648e00,
+            3.588469695887098077e00,
+            2.792459820604495491e00,
+            3.895592322591093115e00,
+            2.712091020667753760e00,
+            1.366836847133650501e00,
+            9.955616170888935690e-01,
+            4.121324820711413039e00,
+            1.817239061889086571e00,
+            3.553661462345699906e00,
+            5.313046969500791583e00,
+            6.635182659098815883e00,
+            6.088601018589653080e00,
+            6.575011420004332585e00,
+            6.825240650611076099e00,
+        ]
+        self.coord = torch.tensor(
+            coord, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+        ).view(1, -1, 3)
+        self.atype = torch.tensor(
+            [0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE
+        ).view(1, -1)
+        self.ref_d = torch.tensor(
+            [
+                8.435412613327306630e-01,
+                -4.717109614540972440e-01,
+                -1.812643456954206256e00,
+                -2.315248767961955167e-01,
+                -7.112973006771171613e-01,
+                -4.162041919507591392e-01,
+                -1.505159810095323181e00,
+                -1.191652416985768403e-01,
+                8.439214937875325617e-01,
+                -4.712976890460106594e-01,
+                -1.812605149396642856e00,
+                -2.307222236291133766e-01,
+                -7.115427800870099961e-01,
+                -4.164729253167227530e-01,
+                -1.505483119125936797e00,
+                -1.191288524278367872e-01,
+                8.286420823261241297e-01,
+                -4.535033763979030574e-01,
+                -1.787877160970498425e00,
+                -1.961763875645104460e-01,
+                -7.475459187804838201e-01,
+                -5.231446874663764346e-01,
+                -1.488399984491664219e00,
+                -3.974117581747104583e-02,
+                8.283793431613817315e-01,
+                -4.551551577556525729e-01,
+                -1.789253136645859943e00,
+                -1.977673627726055372e-01,
+                -7.448826048241211639e-01,
+                -5.161350182531234676e-01,
+                -1.487589463573479209e00,
+                -4.377376017839779143e-02,
+                8.295404560710329944e-01,
+                -4.492219258475603216e-01,
+                -1.784484611185287450e00,
+                -1.901182059718481143e-01,
+                -7.537407667483000395e-01,
+                -5.384371277650709109e-01,
+                -1.490368056268364549e00,
+                -3.073744832541754762e-02,
+            ],
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
+        )
+        with open(Path(CUR_DIR) / "models" / "dpa2_hyb.json") as fp:
+            self.model_json = json.load(fp)
+        self.file_model_param = Path(CUR_DIR) / "models" / "dpa2.pth"
+        self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pth"
+
+    # TODO This test for hybrid descriptor should be removed!
+    def test_descriptor_hyb(self):
+        # torch.manual_seed(0)
+        model_hybrid_dpa2 = self.model_json
+        dparams = model_hybrid_dpa2["descriptor"]
+        ntypes = len(model_hybrid_dpa2["type_map"])
+        dlist = dparams.pop("list")
+        des = DescrptBlockHybrid(
+            dlist,
+            ntypes,
+            hybrid_mode=dparams["hybrid_mode"],
+        ).to(env.DEVICE)
+        model_dict = torch.load(self.file_model_param)
+        # type_embd of repformer is removed
+        model_dict.pop("descriptor_list.1.type_embd.embedding.weight")
+        des.load_state_dict(model_dict)
+        all_rcut = sorted([ii["rcut"] for ii in dlist])
+        all_nsel = sorted([ii["sel"] for ii in dlist])
+        rcut_max = max(all_rcut)
+        sel_max = max(all_nsel)
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        # handel type_embedding
+        type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
+        type_embedding.load_state_dict(torch.load(self.file_type_embed))
+
+        ## to save model parameters
+        # torch.save(des.state_dict(), 'model_weights.pth')
+        # torch.save(type_embedding.state_dict(), 'model_weights.pth')
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist_max,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            rcut_max,
+            sel_max,
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        nlist_dict = build_multiple_neighbor_list(
+            extended_coord,
+            nlist_max,
+            all_rcut,
+            all_nsel,
+        )
+        nlist_list = []
+        for ii in des.descriptor_list:
+            nlist_list.append(
+                nlist_dict[get_multiple_nlist_key(ii.get_rcut(), ii.get_nsel())]
+            )
+        nlist = torch.cat(nlist_list, -1)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            nlist,
+            extended_coord,
+            extended_atype,
+            type_embedding(extended_atype),
+            mapping=mapping,
+        )
+        torch.testing.assert_close(
+            descriptor.view(-1), self.ref_d, atol=1e-10, rtol=1e-10
+        )
+
+    def test_descriptor(self):
+        with open(Path(CUR_DIR) / "models" / "dpa2.json") as fp:
+            self.model_json = json.load(fp)
+        model_dpa2 = self.model_json
+        ntypes = len(model_dpa2["type_map"])
+        dparams = model_dpa2["descriptor"]
+        dparams["ntypes"] = ntypes
+        assert dparams.pop("type") == "dpa2"
+        dparams["concat_output_tebd"] = False
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        target_dict = des.state_dict()
+        source_dict = torch.load(self.file_model_param)
+        # type_embd of repformer is removed
+        source_dict.pop("descriptor_list.1.type_embd.embedding.weight")
+        type_embd_dict = torch.load(self.file_type_embed)
+        target_dict = translate_hybrid_and_type_embd_dicts_to_dpa2(
+            target_dict,
+            source_dict,
+            type_embd_dict,
+        )
+        des.load_state_dict(target_dict)
+
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        torch.testing.assert_close(
+            descriptor.view(-1), self.ref_d, atol=1e-10, rtol=1e-10
+        )
+
+        dparams["concat_output_tebd"] = True
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+
+
+def translate_hybrid_and_type_embd_dicts_to_dpa2(
+    target_dict,
+    source_dict,
+    type_embd_dict,
+):
+    all_keys = list(target_dict.keys())
+    record = [False for ii in all_keys]
+    for kk, vv in source_dict.items():
+        tk = kk.replace("descriptor_list.1", "repformers")
+        tk = tk.replace("descriptor_list.0", "repinit")
+        tk = tk.replace("sequential_transform.0", "g1_shape_tranform")
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = vv
+    assert len(type_embd_dict.keys()) == 2
+    it = iter(type_embd_dict.keys())
+    for _ in range(2):
+        kk = next(it)
+        tk = "type_embedding." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = type_embd_dict[kk]
+    assert all(record)
+    return target_dict
diff --git a/source/tests/pt/model/test_descriptor_hybrid.py b/source/tests/pt/model/test_descriptor_hybrid.py
new file mode 100644
index 0000000000..6742388bd9
--- /dev/null
+++ b/source/tests/pt/model/test_descriptor_hybrid.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pt.model.descriptor.hybrid import (
+    DescrptHybrid,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.descriptor.se_r import (
+    DescrptSeR,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestDescrptHybrid(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_jit(
+        self,
+    ):
+        ddsub0 = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            old_impl=False,
+        )
+        ddsub1 = DescrptSeR(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        dd0 = DescrptHybrid(list=[ddsub0, ddsub1])
+        dd1 = DescrptHybrid.deserialize(dd0.serialize())
+        dd0 = torch.jit.script(dd0)
+        dd1 = torch.jit.script(dd1)
+
+    def test_hybrid_mixed_and_no_mixed(self):
+        coord_ext = to_torch_tensor(self.coord_ext)
+        atype_ext = to_torch_tensor(self.atype_ext)
+        nlist1 = to_torch_tensor(self.nlist)
+        nlist2 = to_torch_tensor(-np.sort(-self.nlist, axis=-1))
+        ddsub0 = DescrptSeA(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+        )
+        ddsub1 = DescrptDPA1(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=np.sum(self.sel).item() - 1,
+            ntypes=len(self.sel),
+        )
+        ddsub2 = DescrptSeR(
+            rcut=self.rcut / 2,
+            rcut_smth=self.rcut_smth,
+            sel=[3, 1],
+        )
+        dd = DescrptHybrid(list=[ddsub0, ddsub1, ddsub2])
+        ret = dd(
+            coord_ext,
+            atype_ext,
+            nlist2,
+        )
+        ret0 = ddsub0(
+            coord_ext,
+            atype_ext,
+            nlist1,
+        )
+        ret1 = ddsub1(coord_ext, atype_ext, nlist2[:, :, :-1])
+        ret2 = ddsub2(coord_ext, atype_ext, nlist1[:, :, [0, 1, 2, self.sel[0]]])
+        torch.testing.assert_close(
+            ret[0],
+            torch.cat([ret0[0], ret1[0], ret2[0]], dim=2),
+        )
diff --git a/source/tests/pt/model/test_descriptor_se_r.py b/source/tests/pt/model/test_descriptor_se_r.py
new file mode 100644
index 0000000000..5b8b6c9251
--- /dev/null
+++ b/source/tests/pt/model/test_descriptor_se_r.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.descriptor import DescrptSeR as DPDescrptSeR
+from deepmd.pt.model.descriptor.se_r import (
+    DescrptSeR,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pt.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+# to be merged with the tf test case
+class TestDescrptSeR(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, em in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+            [[], [[0, 1]], [[1, 1]]],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                old_impl=False,
+                exclude_mask=em,
+            ).to(env.DEVICE)
+            dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
+            dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
+
+            rd0, _, _, _, _ = dd0(
+                torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+                torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+                torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            rd1, _, _, _, sw1 = dd1(
+                torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+                torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+                torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]],
+                rd0.detach().cpu().numpy()[1],
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeR.deserialize(dd0.serialize())
+            rd2, _, _, _, sw2 = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            for aa, bb in zip([rd1, sw1], [rd2, sw2]):
+                np.testing.assert_allclose(
+                    aa.detach().cpu().numpy(),
+                    bb,
+                    rtol=rtol,
+                    atol=atol,
+                    err_msg=err_msg,
+                )
+
+    def test_load_stat(self):
+        rng = np.random.default_rng()
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                old_impl=False,
+            )
+            dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
+            dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            dd1.compute_input_stats(
+                [
+                    {
+                        "r0": None,
+                        "coord": torch.from_numpy(self.coord_ext)
+                        .reshape(-1, self.nall, 3)
+                        .to(env.DEVICE),
+                        "atype": torch.from_numpy(self.atype_ext).to(env.DEVICE),
+                        "box": None,
+                        "natoms": self.nall,
+                    }
+                ]
+            )
+
+            with self.assertRaises(ValueError) as cm:
+                ev = EnvMatStatSe(dd1)
+                ev.last_dim = 3
+                ev.load_or_compute_stats([])
+            self.assertEqual(
+                "last_dim should be 1 for raial-only or 4 for full descriptor.",
+                str(cm.exception),
+            )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng()
+        _, _, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 1))
+        dstd = rng.normal(size=(self.nt, nnei, 1))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+
+            # sea new impl
+            dd0 = DescrptSeR(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                old_impl=False,
+            )
+            dd0.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
+            dd0.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
+            dd1 = DescrptSeR.deserialize(dd0.serialize())
+            torch.jit.script(dd0)
+            torch.jit.script(dd1)
diff --git a/source/tests/pt/model/test_dipole_fitting.py b/source/tests/pt/model/test_dipole_fitting.py
new file mode 100644
index 0000000000..fa4be9171c
--- /dev/null
+++ b/source/tests/pt/model/test_dipole_fitting.py
@@ -0,0 +1,354 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import os
+import unittest
+
+import numpy as np
+import torch
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.dpmodel.fitting import DipoleFitting as DPDipoleFitting
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.model.dipole_model import (
+    DipoleModel,
+)
+from deepmd.pt.model.task.dipole import (
+    DipoleFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+def finite_difference(f, x, a, delta=1e-6):
+    in_shape = x.shape
+    y0 = f(x, a)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape)
+    for idx in np.ndindex(*in_shape):
+        diff = np.zeros(in_shape)
+        diff[idx] += delta
+        y1p = f(x + diff, a)
+        y1n = f(x - diff, a)
+        res[(Ellipsis, *idx)] = (y1p - y1n) / (2 * delta)
+    return res
+
+
+class TestDipoleFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.rng = np.random.default_rng()
+        self.nf, self.nloc, _ = self.nlist.shape
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+
+    def test_consistency(
+        self,
+    ):
+        rd0, gr, _, _, _ = self.dd0(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(
+            self.atype_ext[:, : self.nloc], dtype=int, device=env.DEVICE
+        )
+
+        for mixed_types, nfp, nap in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+            ).to(env.DEVICE)
+            ft1 = DPDipoleFitting.deserialize(ft0.serialize())
+            ft2 = DipoleFittingNet.deserialize(ft1.serialize())
+
+            if nfp > 0:
+                ifp = torch.tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    self.rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, gr, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["dipole"]),
+                ret1["dipole"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["dipole"]),
+                to_numpy_array(ret2["dipole"]),
+            )
+
+    def test_jit(
+        self,
+    ):
+        for mixed_types, nfp, nap in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+            ).to(env.DEVICE)
+            torch.jit.script(ft0)
+
+
+class TestEquivalence(unittest.TestCase):
+    def setUp(self) -> None:
+        self.natoms = 5
+        self.rcut = 4
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.coord = 2 * torch.rand([self.natoms, 3], dtype=dtype, device=env.DEVICE)
+        self.shift = torch.tensor([4, 4, 4], dtype=dtype, device=env.DEVICE)
+        self.atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        self.cell = (self.cell + self.cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+
+    def test_rot(self):
+        atype = self.atype.reshape(1, 5)
+        rmat = torch.tensor(special_ortho_group.rvs(3), dtype=dtype, device=env.DEVICE)
+        coord_rot = torch.matmul(self.coord, rmat)
+        rng = np.random.default_rng()
+        for mixed_types, nfp, nap in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = DipoleFittingNet(
+                3,  # ntype
+                self.dd0.dim_out,  # dim_descrpt
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+            ).to(env.DEVICE)
+            if nfp > 0:
+                ifp = torch.tensor(
+                    rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    rng.normal(size=(self.nf, self.natoms, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            res = []
+            for xyz in [self.coord, coord_rot]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz + self.shift, atype, self.rcut, self.sel, not mixed_types
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, extended_atype, gr0, fparam=ifp, aparam=iap)
+                res.append(ret0["dipole"])
+
+            np.testing.assert_allclose(
+                to_numpy_array(res[1]), to_numpy_array(torch.matmul(res[0], rmat))
+            )
+
+    def test_permu(self):
+        coord = torch.matmul(self.coord, self.cell)
+        ft0 = DipoleFittingNet(
+            3,  # ntype
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=False,
+        ).to(env.DEVICE)
+        res = []
+        for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
+            atype = self.atype[idx_perm].reshape(1, 5)
+            (
+                extended_coord,
+                extended_atype,
+                _,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                coord[idx_perm], atype, self.rcut, self.sel, True
+            )
+
+            rd0, gr0, _, _, _ = self.dd0(
+                extended_coord,
+                extended_atype,
+                nlist,
+            )
+
+            ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
+            res.append(ret0["dipole"])
+
+        np.testing.assert_allclose(
+            to_numpy_array(res[0][:, idx_perm]), to_numpy_array(res[1])
+        )
+
+    def test_trans(self):
+        atype = self.atype.reshape(1, 5)
+        coord_s = torch.matmul(
+            torch.remainder(
+                torch.matmul(self.coord + self.shift, torch.linalg.inv(self.cell)), 1.0
+            ),
+            self.cell,
+        )
+        ft0 = DipoleFittingNet(
+            3,  # ntype
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=True,
+        ).to(env.DEVICE)
+        res = []
+        for xyz in [self.coord, coord_s]:
+            (
+                extended_coord,
+                extended_atype,
+                _,
+                nlist,
+            ) = extend_input_and_build_neighbor_list(
+                xyz, atype, self.rcut, self.sel, False
+            )
+
+            rd0, gr0, _, _, _ = self.dd0(
+                extended_coord,
+                extended_atype,
+                nlist,
+            )
+
+            ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
+            res.append(ret0["dipole"])
+
+        np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
+
+
+class TestDipoleModel(unittest.TestCase):
+    def setUp(self):
+        self.natoms = 5
+        self.rcut = 4.0
+        self.nt = 3
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.coord = 2 * torch.rand([self.natoms, 3], dtype=dtype, device=env.DEVICE)
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        self.cell = (cell + cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        self.atype = torch.IntTensor([0, 0, 0, 1, 1], device="cpu").to(env.DEVICE)
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.ft0 = DipoleFittingNet(
+            self.nt,
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=True,
+        ).to(env.DEVICE)
+        self.type_mapping = ["O", "H", "B"]
+        self.model = DipoleModel(self.dd0, self.ft0, self.type_mapping)
+        self.file_path = "model_output.pth"
+
+    def test_auto_diff(self):
+        places = 5
+        delta = 1e-5
+        atype = self.atype.view(self.nf, self.natoms)
+
+        def ff(coord, atype):
+            return (
+                self.model(to_torch_tensor(coord), to_torch_tensor(atype))[
+                    "global_dipole"
+                ]
+                .detach()
+                .cpu()
+                .numpy()
+            )
+
+        fdf = -finite_difference(
+            ff, to_numpy_array(self.coord), to_numpy_array(atype), delta=delta
+        )
+        rff = self.model(self.coord, atype)["force"].detach().cpu().numpy()
+
+        np.testing.assert_almost_equal(fdf, rff.transpose(0, 2, 1, 3), decimal=places)
+
+    def test_deepdipole_infer(self):
+        atype = to_numpy_array(self.atype.view(self.nf, self.natoms))
+        coord = to_numpy_array(self.coord.reshape(1, 5, 3))
+        cell = to_numpy_array(self.cell.reshape(1, 9))
+        jit_md = torch.jit.script(self.model)
+        torch.jit.save(jit_md, self.file_path)
+        load_md = DeepDipole(self.file_path)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=False)
+        load_md.eval_full(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval_full(coords=coord, atom_types=atype, cells=cell, atomic=False)
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_dp_atomic_model.py b/source/tests/pt/model/test_dp_atomic_model.py
new file mode 100644
index 0000000000..4a35b4676a
--- /dev/null
+++ b/source/tests/pt/model/test_dp_atomic_model.py
@@ -0,0 +1,236 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.atomic_model import DPAtomicModel as DPDPAtomicModel
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.dpmodel.fitting import InvarFitting as DPInvarFitting
+from deepmd.pt.model.atomic_model import (
+    DPAtomicModel,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithNlistWithVirtual,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestDPAtomicModel(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+
+        # test the case of exclusion
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            md0 = DPAtomicModel(
+                ds,
+                ft,
+                type_map=type_map,
+            ).to(env.DEVICE)
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+            args = [
+                to_torch_tensor(ii)
+                for ii in [self.coord_ext, self.atype_ext, self.nlist]
+            ]
+            ret0 = md0.forward_common_atomic(*args)
+            ret1 = md1.forward_common_atomic(*args)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+    def test_dp_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPDPAtomicModel(ds, ft, type_map=type_map)
+        md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args0 = [self.coord_ext, self.atype_ext, self.nlist]
+        args1 = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+        )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = torch.jit.script(md0)
+        self.assertEqual(md0.get_rcut(), self.rcut)
+        self.assertEqual(md0.get_type_map(), type_map)
+
+    def test_excl_consistency(self):
+        type_map = ["foo", "bar"]
+
+        # test the case of exclusion
+        for atom_excl, pair_excl in itertools.product([[], [1]], [[], [[0, 1]]]):
+            ds = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+            ).to(env.DEVICE)
+            ft = InvarFitting(
+                "energy",
+                self.nt,
+                ds.get_dim_out(),
+                1,
+                mixed_types=ds.mixed_types(),
+            ).to(env.DEVICE)
+            md0 = DPAtomicModel(
+                ds,
+                ft,
+                type_map=type_map,
+            ).to(env.DEVICE)
+            md1 = DPAtomicModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+            md0.reinit_atom_exclude(atom_excl)
+            md0.reinit_pair_exclude(pair_excl)
+            # hacking!
+            md1.descriptor.reinit_exclude(pair_excl)
+            md1.fitting_net.reinit_exclude(atom_excl)
+
+            # check energy consistency
+            args = [
+                to_torch_tensor(ii)
+                for ii in [self.coord_ext, self.atype_ext, self.nlist]
+            ]
+            ret0 = md0.forward_common_atomic(*args)
+            ret1 = md1.forward_common_atomic(*args)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+            # check output def
+            out_names = [vv.name for vv in md0.atomic_output_def().get_data().values()]
+            self.assertEqual(out_names, ["energy", "mask"])
+            if atom_excl != []:
+                for ii in md0.atomic_output_def().get_data().values():
+                    if ii.name == "mask":
+                        self.assertEqual(ii.shape, [1])
+                        self.assertFalse(ii.reduciable)
+                        self.assertFalse(ii.r_differentiable)
+                        self.assertFalse(ii.c_differentiable)
+
+            # check mask
+            if atom_excl == []:
+                pass
+            elif atom_excl == [1]:
+                self.assertIn("mask", ret0.keys())
+                expected = np.array([1, 1, 0], dtype=int)
+                expected = np.concatenate(
+                    [expected, expected[self.perm[: self.nloc]]]
+                ).reshape(2, 3)
+                np.testing.assert_array_equal(to_numpy_array(ret0["mask"]), expected)
+            else:
+                raise ValueError(f"not expected atom_excl {atom_excl}")
+
+
+class TestDPAtomicModelVirtualConsistency(unittest.TestCase):
+    def setUp(self):
+        self.case0 = TestCaseSingleFrameWithNlist()
+        self.case1 = TestCaseSingleFrameWithNlistWithVirtual()
+        self.case0.setUp()
+        self.case1.setUp()
+
+    def test_virtual_consistency(self):
+        nf, _, _ = self.case0.nlist.shape
+        ds = DescrptSeA(
+            self.case0.rcut,
+            self.case0.rcut_smth,
+            self.case0.sel,
+        )
+        ft = InvarFitting(
+            "energy",
+            self.case0.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md1 = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+
+        args0 = [self.case0.coord_ext, self.case0.atype_ext, self.case0.nlist]
+        args0 = [to_torch_tensor(ii) for ii in args0]
+        args1 = [self.case1.coord_ext, self.case1.atype_ext, self.case1.nlist]
+        args1 = [to_torch_tensor(ii) for ii in args1]
+
+        ret0 = md1.forward_common_atomic(*args0)
+        ret1 = md1.forward_common_atomic(*args1)
+
+        for dd in range(self.case0.nf):
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"])[dd],
+                to_numpy_array(ret1["energy"])[dd, self.case1.get_real_mapping[dd], :],
+            )
+        expected_mask = np.array(
+            [
+                [1, 0, 1, 1],
+                [1, 1, 0, 1],
+            ]
+        )
+        np.testing.assert_equal(to_numpy_array(ret1["mask"]), expected_mask)
diff --git a/source/tests/pt/model/test_dp_model.py b/source/tests/pt/model/test_dp_model.py
new file mode 100644
index 0000000000..7470cf96d0
--- /dev/null
+++ b/source/tests/pt/model/test_dp_model.py
@@ -0,0 +1,654 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel import DPModel as DPDPModel
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.dpmodel.fitting import InvarFitting as DPInvarFitting
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.model import (
+    DPModel,
+    EnergyModel,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+    TestCaseSingleFrameWithoutNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestDPModel(unittest.TestCase, TestCaseSingleFrameWithoutNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithoutNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc = self.atype.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [to_torch_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        ret0 = md0.forward_common(*args)
+        ret1 = md1.forward_common(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_redu"]),
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_r"]),
+            to_numpy_array(ret1["energy_derv_r"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret1["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_common(*args, do_atomic_virial=True)
+        ret1 = md1.forward_common(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c"]),
+            to_numpy_array(ret1["energy_derv_c"]),
+            atol=self.atol,
+        )
+
+        coord_ext, atype_ext, mapping = extend_coord_with_ghosts(
+            to_torch_tensor(self.coord),
+            to_torch_tensor(self.atype),
+            to_torch_tensor(self.cell),
+            self.rcut,
+        )
+        nlist = build_neighbor_list(
+            coord_ext,
+            atype_ext,
+            self.nloc,
+            self.rcut,
+            self.sel,
+            distinguish_types=(not md0.mixed_types()),
+        )
+        args = [coord_ext, atype_ext, nlist]
+        ret2 = md0.forward_common_lower(*args, do_atomic_virial=True)
+        # check the consistency between the reduced virial from
+        # forward_common and forward_common_lower
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret2["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency(self):
+        nf, nloc = self.atype.shape
+        nfp, nap = 2, 3
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+            numb_fparam=nfp,
+            numb_aparam=nap,
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPDPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        rng = np.random.default_rng()
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+        args0 = [self.coord, self.atype, self.cell]
+        args1 = [to_torch_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        kwargs0 = {"fparam": fparam, "aparam": aparam}
+        kwargs1 = {kk: to_torch_tensor(vv) for kk, vv in kwargs0.items()}
+        ret0 = md0.call(*args0, **kwargs0)
+        ret1 = md1.forward_common(*args1, **kwargs1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency_nopbc(self):
+        nf, nloc = self.atype.shape
+        nfp, nap = 2, 3
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+            numb_fparam=nfp,
+            numb_aparam=nap,
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPDPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        rng = np.random.default_rng()
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, self.nloc, nap])
+        args0 = [self.coord, self.atype]
+        args1 = [to_torch_tensor(ii) for ii in args0]
+        kwargs0 = {"fparam": fparam, "aparam": aparam}
+        kwargs1 = {kk: to_torch_tensor(vv) for kk, vv in kwargs0.items()}
+        ret0 = md0.call(*args0, **kwargs0)
+        ret1 = md1.forward_common(*args1, **kwargs1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc = self.atype.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md0 = DPDPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        args64 = [to_torch_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        args64[0] = args64[0].to(torch.float64)
+        args64[2] = args64[2].to(torch.float64)
+        args32 = [to_torch_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        args32[0] = args32[0].to(torch.float32)
+        args32[2] = args32[2].to(torch.float32)
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = to_torch_tensor(fparam)
+        aparam = to_torch_tensor(aparam)
+
+        model_l_ret_64 = md1.forward_common(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.forward_common(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, torch.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.int32)
+            np.testing.assert_allclose(
+                to_numpy_array(model_l_ret_32[ii]),
+                to_numpy_array(model_l_ret_64[ii]),
+                atol=self.atol,
+            )
+
+
+class TestDPModelLower(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_common_lower(*args)
+        ret1 = md1.forward_common_lower(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_redu"]),
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_r"]),
+            to_numpy_array(ret1["energy_derv_r"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c_redu"]),
+            to_numpy_array(ret1["energy_derv_c_redu"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_common_lower(*args, do_atomic_virial=True)
+        ret1 = md1.forward_common_lower(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy_derv_c"]),
+            to_numpy_array(ret1["energy_derv_c"]),
+            atol=self.atol,
+        )
+
+    def test_dp_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        type_map = ["foo", "bar"]
+        md0 = DPDPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args0 = [self.coord_ext, self.atype_ext, self.nlist]
+        args1 = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.call_lower(*args0)
+        ret1 = md1.forward_common_lower(*args1)
+        np.testing.assert_allclose(
+            ret0["energy"],
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            ret0["energy_redu"],
+            to_numpy_array(ret1["energy_redu"]),
+            atol=self.atol,
+        )
+
+    def test_prec_consistency(self):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DPDescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        )
+        ft = DPInvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        )
+        nfp, nap = 2, 3
+        type_map = ["foo", "bar"]
+        fparam = rng.normal(size=[self.nf, nfp])
+        aparam = rng.normal(size=[self.nf, nloc, nap])
+
+        md0 = DPDPModel(ds, ft, type_map=type_map)
+        md1 = DPModel.deserialize(md0.serialize()).to(env.DEVICE)
+
+        args64 = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        args64[0] = args64[0].to(torch.float64)
+        args32 = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        args32[0] = args32[0].to(torch.float32)
+        # fparam, aparam are converted to coordinate precision by model
+        fparam = to_torch_tensor(fparam)
+        aparam = to_torch_tensor(aparam)
+
+        model_l_ret_64 = md1.forward_common_lower(*args64, fparam=fparam, aparam=aparam)
+        model_l_ret_32 = md1.forward_common_lower(*args32, fparam=fparam, aparam=aparam)
+
+        for ii in model_l_ret_32.keys():
+            if ii[-4:] == "redu":
+                self.assertEqual(model_l_ret_32[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_32[ii].dtype, torch.float32)
+            if ii != "mask":
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.float64)
+            else:
+                self.assertEqual(model_l_ret_64[ii].dtype, torch.int32)
+            np.testing.assert_allclose(
+                to_numpy_array(model_l_ret_32[ii]),
+                to_numpy_array(model_l_ret_64[ii]),
+                atol=self.atol,
+            )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = DPModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = torch.jit.script(md0)
+        md0.get_rcut()
+        md0.get_type_map()
+
+
+class TestDPModelFormatNlist(unittest.TestCase):
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 5
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+                [2.3, 0, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.expected_nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.atype_ext = np.array([0, 0, 1, 0, 1], dtype=int).reshape([1, self.nall])
+        self.rcut_smth = 0.4
+        self.rcut = 2.0
+
+        nf, nloc, nnei = self.expected_nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        self.md = DPModel(ds, ft, type_map=type_map).to(env.DEVICE)
+
+    def test_nlist_eq(self):
+        # n_nnei == nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_torch_tensor(self.coord_ext),
+            to_torch_tensor(self.atype_ext),
+            to_torch_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+    def test_nlist_st(self):
+        # n_nnei < nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, 2],
+                [0, -1, -1, 2],
+                [0, 1, -1, -1],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_torch_tensor(self.coord_ext),
+            to_torch_tensor(self.atype_ext),
+            to_torch_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+    def test_nlist_lt(self):
+        # n_nnei > nnei
+        nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1, -1, 4],
+                [0, -1, 4, -1, -1, 2, -1, 3, -1],
+                [0, 1, -1, -1, -1, 4, -1, -1, 3],
+            ],
+            dtype=np.int64,
+        ).reshape([1, self.nloc, -1])
+        nlist1 = self.md.format_nlist(
+            to_torch_tensor(self.coord_ext),
+            to_torch_tensor(self.atype_ext),
+            to_torch_tensor(nlist),
+        )
+        np.testing.assert_equal(self.expected_nlist, to_numpy_array(nlist1))
+
+
+class TestEnergyModel(unittest.TestCase, TestCaseSingleFrameWithoutNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithoutNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc = self.atype.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [to_torch_tensor(ii) for ii in [self.coord, self.atype, self.cell]]
+        ret0 = md0.forward(*args)
+        ret1 = md1.forward(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_energy"]),
+            to_numpy_array(ret1["atom_energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["force"]),
+            to_numpy_array(ret1["force"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret1["virial"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward(*args, do_atomic_virial=True)
+        ret1 = md1.forward(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_virial"]),
+            to_numpy_array(ret1["atom_virial"]),
+            atol=self.atol,
+        )
+        coord_ext, atype_ext, mapping, nlist = extend_input_and_build_neighbor_list(
+            to_torch_tensor(self.coord),
+            to_torch_tensor(self.atype),
+            self.rcut,
+            self.sel,
+            mixed_types=md0.mixed_types(),
+            box=to_torch_tensor(self.cell),
+        )
+        args = [coord_ext, atype_ext, nlist]
+        ret2 = md0.forward_lower(*args, do_atomic_virial=True)
+        # check the consistency between the reduced virial from
+        # forward and forward_lower
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret2["virial"]),
+            atol=self.atol,
+        )
+
+
+class TestEnergyModelLower(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md1 = EnergyModel.deserialize(md0.serialize()).to(env.DEVICE)
+        args = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = md0.forward_lower(*args)
+        ret1 = md1.forward_lower(*args)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["atom_energy"]),
+            to_numpy_array(ret1["atom_energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["extended_force"]),
+            to_numpy_array(ret1["extended_force"]),
+            atol=self.atol,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["virial"]),
+            to_numpy_array(ret1["virial"]),
+            atol=self.atol,
+        )
+        ret0 = md0.forward_lower(*args, do_atomic_virial=True)
+        ret1 = md1.forward_lower(*args, do_atomic_virial=True)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["extended_virial"]),
+            to_numpy_array(ret1["extended_virial"]),
+            atol=self.atol,
+        )
+
+    def test_jit(self):
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        md0 = EnergyModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        md0 = torch.jit.script(md0)
+        self.assertEqual(md0.get_rcut(), self.rcut)
+        self.assertEqual(md0.get_type_map(), type_map)
diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py
new file mode 100644
index 0000000000..63a3534c74
--- /dev/null
+++ b/source/tests/pt/model/test_embedding_net.py
@@ -0,0 +1,221 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import re
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+
+tf.disable_eager_execution()
+
+from pathlib import (
+    Path,
+)
+
+from deepmd.pt.model.descriptor import (
+    DescrptSeA,
+)
+from deepmd.pt.utils import (
+    dp_random,
+)
+from deepmd.pt.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
+
+from ..test_stat import (
+    energy_data_requirement,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def gen_key(worb, depth, elemid):
+    return (worb, depth, elemid)
+
+
+def get_single_batch(dataset, index=None):
+    if index is None:
+        index = dp_random.choice(np.arange(len(dataset)))
+    np_batch = dataset[index]
+    pt_batch = {}
+
+    for key in [
+        "coord",
+        "box",
+        "force",
+        "force_mag",
+        "energy",
+        "virial",
+        "atype",
+        "natoms",
+    ]:
+        if key in np_batch.keys():
+            np_batch[key] = np.expand_dims(np_batch[key], axis=0)
+            pt_batch[key] = torch.as_tensor(np_batch[key], device=env.DEVICE)
+            if key in ["coord", "force", "force_mag"]:
+                np_batch[key] = np_batch[key].reshape(1, -1)
+    np_batch["natoms"] = np_batch["natoms"][0]
+    return np_batch, pt_batch
+
+
+def base_se_a(descriptor, coord, atype, natoms, box):
+    g = tf.Graph()
+    with g.as_default():
+        name_pfx = "d_sea_"
+        t_coord = tf.placeholder(
+            GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_coord"
+        )
+        t_atype = tf.placeholder(tf.int32, [None, None], name=name_pfx + "t_type")
+        t_natoms = tf.placeholder(
+            tf.int32, [descriptor.ntypes + 2], name=name_pfx + "t_natoms"
+        )
+        t_box = tf.placeholder(
+            GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_box"
+        )
+        t_default_mesh = tf.placeholder(tf.int32, [None], name=name_pfx + "t_mesh")
+        t_embedding = descriptor.build(
+            t_coord, t_atype, t_natoms, t_box, t_default_mesh, input_dict={}
+        )
+        fake_energy = tf.reduce_sum(t_embedding)
+        t_force = descriptor.prod_force_virial(fake_energy, t_natoms)[0]
+        t_vars = {}
+        for var in tf.global_variables():
+            ms = re.findall(r"([a-z]+)_(\d)_(\d)", var.name)
+            if len(ms) == 1:
+                m = ms[0]
+                key = gen_key(worb=m[0], depth=int(m[1]), elemid=int(m[2]))
+                t_vars[key] = var
+        init_op = tf.global_variables_initializer()
+
+    with tf.Session(graph=g) as sess:
+        sess.run(init_op)
+        embedding, force, values = sess.run(
+            [t_embedding, t_force, t_vars],
+            feed_dict={
+                t_coord: coord,
+                t_atype: atype,
+                t_natoms: natoms,
+                t_box: box,
+                t_default_mesh: np.array([0, 0, 0, 2, 2, 2]),
+            },
+        )
+    tf.reset_default_graph()
+    return embedding, force, values
+
+
+class TestSeA(unittest.TestCase):
+    def setUp(self):
+        dp_random.seed(0)
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.bsz = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        ds = DeepmdDataSetForLoader(
+            self.systems[0],
+            model_config["type_map"],
+        )
+        ds.add_data_requirement(energy_data_requirement)
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.np_batch, self.torch_batch = get_single_batch(ds)
+
+    def test_consistency(self):
+        dp_d = DescrptSeA_tf(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+            seed=1,
+        )
+        dp_embedding, dp_force, dp_vars = base_se_a(
+            descriptor=dp_d,
+            coord=self.np_batch["coord"],
+            atype=self.np_batch["atype"],
+            natoms=self.np_batch["natoms"],
+            box=self.np_batch["box"],
+        )
+
+        # Reproduced
+        old_impl = False
+        descriptor = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+            old_impl=old_impl,
+        ).to(DEVICE)
+        for name, param in descriptor.named_parameters():
+            if old_impl:
+                ms = re.findall(r"(\d)\.deep_layers\.(\d)\.([a-z]+)", name)
+            else:
+                ms = re.findall(r"(\d)\.layers\.(\d)\.([a-z]+)", name)
+            if len(ms) == 1:
+                m = ms[0]
+                key = gen_key(worb=m[2], depth=int(m[1]) + 1, elemid=int(m[0]))
+                var = dp_vars[key]
+                with torch.no_grad():
+                    # Keep parameter value consistency between 2 implentations
+                    param.data.copy_(torch.from_numpy(var))
+
+        pt_coord = self.torch_batch["coord"].to(env.DEVICE)
+        pt_coord.requires_grad_(True)
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            pt_coord,
+            self.torch_batch["atype"].to(env.DEVICE),
+            self.rcut,
+            self.sel,
+            mixed_types=False,
+            box=self.torch_batch["box"].to(env.DEVICE),
+        )
+        descriptor_out, _, _, _, _ = descriptor(
+            extended_coord,
+            extended_atype,
+            nlist,
+        )
+        my_embedding = descriptor_out.cpu().detach().numpy()
+        fake_energy = torch.sum(descriptor_out)
+        fake_energy.backward()
+        my_force = -pt_coord.grad.cpu().numpy()
+
+        # Check
+        np.testing.assert_allclose(dp_embedding, my_embedding)
+        dp_force = dp_force.reshape(*my_force.shape)
+        np.testing.assert_allclose(dp_force, my_force)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_ener_fitting.py b/source/tests/pt/model/test_ener_fitting.py
new file mode 100644
index 0000000000..f63e17c2fa
--- /dev/null
+++ b/source/tests/pt/model/test_ener_fitting.py
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.fitting import InvarFitting as DPInvarFitting
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.task.ener import (
+    EnergyFittingNet,
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        rd0, _, _, _, _ = dd0(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
+
+        for od, mixed_types, nfp, nap, et, nn in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [[], [0], [1]],
+            [[4, 4, 4], []],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                dd0.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+                neuron=nn,
+            ).to(env.DEVICE)
+            ft1 = DPInvarFitting.deserialize(ft0.serialize())
+            ft2 = InvarFitting.deserialize(ft0.serialize())
+
+            if nfp > 0:
+                ifp = torch.tensor(
+                    rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                ret1["foo"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                to_numpy_array(ret2["foo"]),
+            )
+            self.assertEqual(ft0.get_sel_type(), ft1.get_sel_type())
+
+    def test_new_old(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        rd0, _, _, _, _ = dd(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
+
+        od = 1
+        for foo, mixed_types in itertools.product(
+            [True],
+            [True, False],
+        ):
+            ft0 = EnergyFittingNet(
+                self.nt,
+                dd.dim_out,
+                mixed_types=mixed_types,
+            ).to(env.DEVICE)
+            ft1 = EnergyFittingNet(
+                self.nt,
+                dd.dim_out,
+                mixed_types=mixed_types,
+                old_impl=True,
+            ).to(env.DEVICE)
+            dd0 = ft0.state_dict()
+            dd1 = ft1.state_dict()
+            for kk, vv in dd1.items():
+                new_kk = kk
+                new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks")
+                new_kk = new_kk.replace("deep_layers", "layers")
+                new_kk = new_kk.replace("final_layer", "layers.3")
+                dd1[kk] = dd0[new_kk]
+                if kk.split(".")[-1] in ["idt", "bias"]:
+                    dd1[kk] = dd1[kk].unsqueeze(0)
+            dd1["bias_atom_e"] = dd0["bias_atom_e"]
+            ft1.load_state_dict(dd1)
+            ret0 = ft0(rd0, atype)
+            ret1 = ft1(rd0, atype)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+    def test_jit(
+        self,
+    ):
+        for od, mixed_types, nfp, nap, et in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [[], [0]],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                9,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                exclude_types=et,
+            ).to(env.DEVICE)
+            torch.jit.script(ft0)
+
+    def test_get_set(self):
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            3,
+            1,
+        )
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ii in [
+            "bias_atom_e",
+            "fparam_avg",
+            "fparam_inv_std",
+            "aparam_avg",
+            "aparam_inv_std",
+        ]:
+            ifn0[ii] = torch.tensor(foo, dtype=dtype, device=env.DEVICE)
+            np.testing.assert_allclose(
+                foo, np.reshape(ifn0[ii].detach().cpu().numpy(), foo.shape)
+            )
diff --git a/source/tests/pt/model/test_ener_spin_model.py b/source/tests/pt/model/test_ener_spin_model.py
new file mode 100644
index 0000000000..2bd5c22aaf
--- /dev/null
+++ b/source/tests/pt/model/test_ener_spin_model.py
@@ -0,0 +1,420 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.model import SpinModel as DPSpinModel
+from deepmd.pt.model.model import (
+    SpinEnergyModel,
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+    model_spin,
+)
+
+dtype = torch.float64
+
+
+def reduce_tensor(extended_tensor, mapping, nloc: int):
+    nframes, nall = extended_tensor.shape[:2]
+    ext_dims = extended_tensor.shape[2:]
+    reduced_tensor = torch.zeros(
+        [nframes, nloc, *ext_dims],
+        dtype=extended_tensor.dtype,
+        device=extended_tensor.device,
+    )
+    mldims = list(mapping.shape)
+    mapping = mapping.view(mldims + [1] * len(ext_dims)).expand(
+        [-1] * len(mldims) + list(ext_dims)
+    )
+    # nf x nloc x (*ext_dims)
+    reduced_tensor = torch.scatter_reduce(
+        reduced_tensor,
+        1,
+        index=mapping,
+        src=extended_tensor,
+        reduce="sum",
+    )
+    return reduced_tensor
+
+
+class SpinTest:
+    def setUp(self):
+        self.prec = 1e-10
+        natoms = 5
+        self.ntypes = 3  # ["O", "H", "B"] for test
+        self.cell = 4.0 * torch.eye(3, dtype=dtype, device=env.DEVICE).unsqueeze(0)
+        self.coord = 3.0 * torch.rand(
+            [natoms, 3], dtype=dtype, device=env.DEVICE
+        ).unsqueeze(0)
+        self.spin = 0.5 * torch.rand(
+            [natoms, 3], dtype=dtype, device=env.DEVICE
+        ).unsqueeze(0)
+        self.atype = torch.tensor(
+            [0, 0, 0, 1, 1], dtype=torch.int64, device=env.DEVICE
+        ).unsqueeze(0)
+
+        self.expected_mask = torch.tensor(
+            [
+                [True],
+                [True],
+                [True],
+                [False],
+                [False],
+            ],
+            dtype=torch.bool,
+            device=env.DEVICE,
+        ).unsqueeze(0)
+        self.expected_atype_with_spin = torch.tensor(
+            [0, 0, 0, 1, 1, 3, 3, 3, 4, 4], dtype=torch.int64, device=env.DEVICE
+        ).unsqueeze(0)
+        self.expected_nloc_spin_index = (
+            torch.arange(natoms, natoms * 2, dtype=torch.int64, device=env.DEVICE)
+            .unsqueeze(0)
+            .unsqueeze(-1)
+        )
+
+    def test_output_shape(
+        self,
+    ):
+        result = self.model(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        # check magnetic mask
+        torch.testing.assert_close(result["mask_mag"], self.expected_mask)
+        # check output shape to assure split
+        nframes, nloc = self.coord.shape[:2]
+        torch.testing.assert_close(result["energy"].shape, [nframes, 1])
+        torch.testing.assert_close(result["atom_energy"].shape, [nframes, nloc, 1])
+        torch.testing.assert_close(result["force"].shape, [nframes, nloc, 3])
+        torch.testing.assert_close(result["force_mag"].shape, [nframes, nloc, 3])
+
+    def test_input_output_process(self):
+        nframes, nloc = self.coord.shape[:2]
+        self.real_ntypes = self.model.spin.get_ntypes_real()
+        # 1. test forward input process
+        coord_updated, atype_updated = self.model.process_spin_input(
+            self.coord, self.atype, self.spin
+        )
+        # compare atypes of real and virtual atoms
+        torch.testing.assert_close(atype_updated, self.expected_atype_with_spin)
+        # compare coords of real and virtual atoms
+        torch.testing.assert_close(coord_updated.shape, [nframes, nloc * 2, 3])
+        torch.testing.assert_close(coord_updated[:, :nloc], self.coord)
+        virtual_scale = torch.tensor(
+            self.model.spin.get_virtual_scale_mask()[self.atype.cpu()],
+            dtype=dtype,
+            device=env.DEVICE,
+        )
+        virtual_coord = self.coord + self.spin * virtual_scale.unsqueeze(-1)
+        torch.testing.assert_close(coord_updated[:, nloc:], virtual_coord)
+
+        # 2. test forward output process
+        model_ret = self.model.backbone_model.forward_common(
+            coord_updated,
+            atype_updated,
+            self.cell,
+            do_atomic_virial=True,
+        )
+        if self.model.do_grad_r("energy"):
+            force_all = model_ret["energy_derv_r"].squeeze(-2)
+            force_real, force_mag, _ = self.model.process_spin_output(
+                self.atype, force_all
+            )
+            torch.testing.assert_close(
+                force_real, force_all[:, :nloc] + force_all[:, nloc:]
+            )
+            torch.testing.assert_close(
+                force_mag, force_all[:, nloc:] * virtual_scale.unsqueeze(-1)
+            )
+
+        # 3. test forward_lower input process
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            self.coord,
+            self.atype,
+            self.model.get_rcut(),
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=self.cell,
+        )
+        nall = extended_coord.shape[1]
+        nnei = nlist.shape[-1]
+        extended_spin = torch.gather(
+            self.spin, index=mapping.unsqueeze(-1).tile((1, 1, 3)), dim=1
+        )
+        (
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping_updated,
+        ) = self.model.process_spin_input_lower(
+            extended_coord, extended_atype, extended_spin, nlist, mapping=mapping
+        )
+        # compare atypes of real and virtual atoms
+        # Note that the real and virtual atoms corresponding to the local ones are switch to the first nloc * 2 atoms
+        torch.testing.assert_close(extended_atype_updated.shape, [nframes, nall * 2])
+        torch.testing.assert_close(
+            extended_atype_updated[:, :nloc], extended_atype[:, :nloc]
+        )
+        torch.testing.assert_close(
+            extended_atype_updated[:, nloc : nloc + nloc],
+            extended_atype[:, :nloc] + self.real_ntypes,
+        )
+        torch.testing.assert_close(
+            extended_atype_updated[:, nloc + nloc : nloc + nall],
+            extended_atype[:, nloc:nall],
+        )
+        torch.testing.assert_close(
+            extended_atype_updated[:, nloc + nall :],
+            extended_atype[:, nloc:nall] + self.real_ntypes,
+        )
+        virtual_scale = torch.tensor(
+            self.model.spin.get_virtual_scale_mask()[extended_atype.cpu()],
+            dtype=dtype,
+            device=env.DEVICE,
+        )
+        # compare coords of real and virtual atoms
+        virtual_coord = extended_coord + extended_spin * virtual_scale.unsqueeze(-1)
+        torch.testing.assert_close(extended_coord_updated.shape, [nframes, nall * 2, 3])
+        torch.testing.assert_close(
+            extended_coord_updated[:, :nloc], extended_coord[:, :nloc]
+        )
+        torch.testing.assert_close(
+            extended_coord_updated[:, nloc : nloc + nloc], virtual_coord[:, :nloc]
+        )
+        torch.testing.assert_close(
+            extended_coord_updated[:, nloc + nloc : nloc + nall],
+            extended_coord[:, nloc:nall],
+        )
+        torch.testing.assert_close(
+            extended_coord_updated[:, nloc + nall :], virtual_coord[:, nloc:nall]
+        )
+
+        # compare mapping
+        torch.testing.assert_close(mapping_updated.shape, [nframes, nall * 2])
+        torch.testing.assert_close(mapping_updated[:, :nloc], mapping[:, :nloc])
+        torch.testing.assert_close(
+            mapping_updated[:, nloc : nloc + nloc], mapping[:, :nloc] + nloc
+        )
+        torch.testing.assert_close(
+            mapping_updated[:, nloc + nloc : nloc + nall], mapping[:, nloc:nall]
+        )
+        torch.testing.assert_close(
+            mapping_updated[:, nloc + nall :], mapping[:, nloc:nall] + nloc
+        )
+
+        # compare nlist
+        torch.testing.assert_close(
+            nlist_updated.shape, [nframes, nloc * 2, nnei * 2 + 1]
+        )
+        # self spin
+        torch.testing.assert_close(
+            nlist_updated[:, :nloc, :1], self.expected_nloc_spin_index
+        )
+        # real and virtual neighbors
+        loc_atoms_mask = (nlist < nloc) & (nlist != -1)
+        ghost_atoms_mask = nlist >= nloc
+        real_neighbors = nlist.clone()
+        real_neighbors[ghost_atoms_mask] += nloc
+        torch.testing.assert_close(
+            nlist_updated[:, :nloc, 1 : 1 + nnei], real_neighbors
+        )
+        virtual_neighbors = nlist.clone()
+        virtual_neighbors[loc_atoms_mask] += nloc
+        virtual_neighbors[ghost_atoms_mask] += nall
+        torch.testing.assert_close(
+            nlist_updated[:, :nloc, 1 + nnei :], virtual_neighbors
+        )
+
+        # 4. test forward_lower output process
+        model_ret = self.model.backbone_model.forward_common_lower(
+            extended_coord_updated,
+            extended_atype_updated,
+            nlist_updated,
+            mapping=mapping_updated,
+            do_atomic_virial=True,
+        )
+        if self.model.do_grad_r("energy"):
+            force_all = model_ret["energy_derv_r"].squeeze(-2)
+            force_real, force_mag, _ = self.model.process_spin_output_lower(
+                extended_atype, force_all, nloc
+            )
+            force_all_switched = torch.zeros_like(force_all)
+            force_all_switched[:, :nloc] = force_all[:, :nloc]
+            force_all_switched[:, nloc:nall] = force_all[:, nloc + nloc : nloc + nall]
+            force_all_switched[:, nall : nall + nloc] = force_all[:, nloc : nloc + nloc]
+            force_all_switched[:, nall + nloc :] = force_all[:, nloc + nall :]
+            torch.testing.assert_close(
+                force_real, force_all_switched[:, :nall] + force_all_switched[:, nall:]
+            )
+            torch.testing.assert_close(
+                force_mag, force_all_switched[:, nall:] * virtual_scale.unsqueeze(-1)
+            )
+
+    def test_jit(self):
+        model = torch.jit.script(self.model)
+        self.assertEqual(model.get_rcut(), self.rcut)
+        self.assertEqual(model.get_nsel(), self.nsel)
+        self.assertEqual(model.get_type_map(), self.type_map)
+
+    def test_self_consistency(self):
+        if hasattr(self, "serial_test") and not self.serial_test:
+            # not implement serialize and deserialize
+            return
+        model1 = SpinEnergyModel.deserialize(self.model.serialize())
+        result = model1(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        expected_result = self.model(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        for key in result:
+            torch.testing.assert_close(
+                result[key], expected_result[key], rtol=self.prec, atol=self.prec
+            )
+        model1 = torch.jit.script(model1)
+
+    def test_dp_consistency(self):
+        if hasattr(self, "serial_test") and not self.serial_test:
+            # not implement serialize and deserialize
+            return
+        dp_model = DPSpinModel.deserialize(self.model.serialize())
+        # test call
+        dp_ret = dp_model.call(
+            to_numpy_array(self.coord),
+            to_numpy_array(self.atype),
+            to_numpy_array(self.spin),
+            to_numpy_array(self.cell),
+        )
+        result = self.model.forward_common(
+            self.coord,
+            self.atype,
+            self.spin,
+            self.cell,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result["energy"]),
+            dp_ret["energy"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result["energy_redu"]),
+            dp_ret["energy_redu"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+
+        # test call_lower
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            self.coord,
+            self.atype,
+            self.model.get_rcut(),
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=self.cell,
+        )
+        extended_spin = torch.gather(
+            self.spin, index=mapping.unsqueeze(-1).tile((1, 1, 3)), dim=1
+        )
+        dp_ret_lower = dp_model.call_lower(
+            to_numpy_array(extended_coord),
+            to_numpy_array(extended_atype),
+            to_numpy_array(extended_spin),
+            to_numpy_array(nlist),
+            to_numpy_array(mapping),
+        )
+        result_lower = self.model.forward_common_lower(
+            extended_coord,
+            extended_atype,
+            extended_spin,
+            nlist,
+            mapping,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result_lower["energy"]),
+            dp_ret_lower["energy"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            to_numpy_array(result_lower["energy_redu"]),
+            dp_ret_lower["energy_redu"],
+            rtol=self.prec,
+            atol=self.prec,
+        )
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_se_e2_a["descriptor"])
+        self.rcut = model_params["descriptor"]["rcut"]
+        self.nsel = sum(model_params["descriptor"]["sel"])
+        self.type_map = model_params["type_map"]
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA1(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa1["descriptor"])
+        self.rcut = model_params["descriptor"]["rcut"]
+        self.nsel = model_params["descriptor"]["sel"]
+        self.type_map = model_params["type_map"]
+        # not implement serialize and deserialize
+        self.serial_test = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinDPA2(unittest.TestCase, SpinTest):
+    def setUp(self):
+        SpinTest.setUp(self)
+        model_params = copy.deepcopy(model_spin)
+        model_params["descriptor"] = copy.deepcopy(model_dpa2["descriptor"])
+        self.rcut = model_params["descriptor"]["repinit_rcut"]
+        self.nsel = model_params["descriptor"]["repinit_nsel"]
+        self.type_map = model_params["type_map"]
+        # not implement serialize and deserialize
+        self.serial_test = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_env_mat.py b/source/tests/pt/model/test_env_mat.py
new file mode 100644
index 0000000000..e18093b2f1
--- /dev/null
+++ b/source/tests/pt/model/test_env_mat.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.utils import (
+    EnvMat,
+)
+from deepmd.pt.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestCaseSingleFrameWithNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([2, 0, 1, 3], dtype=np.int32)
+        inv_perm = np.array([1, 2, 0, 3], dtype=np.int32)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithNlistWithVirtual:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 4
+        self.nall = 5
+        self.nf, self.nt = 2, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall, 3])
+        self.atype_ext = np.array([0, -1, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [2, 4, -1, -1, -1, 3, -1],
+                [-1, -1, -1, -1, -1, -1, -1],
+                [0, -1, -1, -1, -1, 3, -1],
+                [0, 2, -1, -1, -1, -1, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        # permutations
+        self.perm = np.array([3, 0, 1, 2, 4], dtype=np.int32)
+        inv_perm = np.argsort(self.perm)
+        # permute the coord and atype
+        self.coord_ext = np.concatenate(
+            [self.coord_ext, self.coord_ext[:, self.perm, :]], axis=0
+        ).reshape(self.nf, self.nall * 3)
+        self.atype_ext = np.concatenate(
+            [self.atype_ext, self.atype_ext[:, self.perm]], axis=0
+        )
+        # permute the nlist
+        nlist1 = self.nlist[:, self.perm[: self.nloc], :]
+        mask = nlist1 == -1
+        nlist1 = inv_perm[nlist1]
+        nlist1 = np.where(mask, -1, nlist1)
+        self.nlist = np.concatenate([self.nlist, nlist1], axis=0)
+        self.get_real_mapping = np.array([[0, 2, 3], [0, 1, 3]], dtype=np.int32)
+        self.atol = 1e-12
+
+
+class TestCaseSingleFrameWithoutNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nf, self.nt = 1, 2
+        self.coord = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nloc * 3])
+        self.atype = np.array([0, 0, 1], dtype=int).reshape([1, self.nloc])
+        self.cell = 2.0 * np.eye(3).reshape([1, 9])
+        # sel = [5, 2]
+        self.sel = [16, 8]
+        self.rcut = 2.2
+        self.rcut_smth = 0.4
+        self.atol = 1e-12
+
+
+# to be merged with the tf test case
+class TestEnvMat(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+        em0 = EnvMat(self.rcut, self.rcut_smth)
+        mm0, ww0 = em0.call(self.coord_ext, self.atype_ext, self.nlist, davg, dstd)
+        mm1, _, ww1 = prod_env_mat(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+            torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE),
+            torch.tensor(davg, device=env.DEVICE),
+            torch.tensor(dstd, device=env.DEVICE),
+            self.rcut,
+            self.rcut_smth,
+        )
+        np.testing.assert_allclose(mm0, mm1.detach().cpu().numpy())
+        np.testing.assert_allclose(ww0, ww1.detach().cpu().numpy())
+        np.testing.assert_allclose(mm0[0][self.perm[: self.nloc]], mm0[1])
diff --git a/source/tests/pt/model/test_exclusion_mask.py b/source/tests/pt/model/test_exclusion_mask.py
new file mode 100644
index 0000000000..b50f163eb6
--- /dev/null
+++ b/source/tests/pt/model/test_exclusion_mask.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestAtomExcludeMask(unittest.TestCase):
+    def test_build_type_exclude_mask(self):
+        nf = 2
+        nt = 3
+        exclude_types = [0, 2]
+        atype = np.array(
+            [
+                [0, 2, 1, 2, 0, 1, 0],
+                [1, 2, 0, 0, 2, 2, 1],
+            ],
+            dtype=np.int32,
+        ).reshape([nf, -1])
+        expected_mask = np.array(
+            [
+                [0, 0, 1, 0, 0, 1, 0],
+                [1, 0, 0, 0, 0, 0, 1],
+            ]
+        ).reshape([nf, -1])
+        des = AtomExcludeMask(nt, exclude_types=exclude_types)
+        mask = des(to_torch_tensor(atype))
+        np.testing.assert_equal(to_numpy_array(mask), expected_mask)
+
+
+# to be merged with the tf test case
+class TestPairExcludeMask(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_build_type_exclude_mask(self):
+        exclude_types = [[0, 1]]
+        expected_mask = np.array(
+            [
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [0, 0, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 0, 1],
+            ]
+        ).reshape(self.nf, self.nloc, sum(self.sel))
+        des = PairExcludeMask(self.nt, exclude_types=exclude_types).to(env.DEVICE)
+        mask = des(
+            to_torch_tensor(self.nlist),
+            to_torch_tensor(self.atype_ext),
+        )
+        np.testing.assert_equal(to_numpy_array(mask), expected_mask)
diff --git a/source/tests/pt/model/test_fitting_net.py b/source/tests/pt/model/test_fitting_net.py
new file mode 100644
index 0000000000..c7e1723799
--- /dev/null
+++ b/source/tests/pt/model/test_fitting_net.py
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import re
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+import torch
+
+tf.disable_eager_execution()
+
+from deepmd.pt.model.task import (
+    EnergyFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.tf.fit.ener import (
+    EnerFitting,
+)
+
+
+class FakeDescriptor:
+    def __init__(self, ntypes, embedding_width):
+        self._ntypes = ntypes
+        self._dim_out = embedding_width
+
+    def get_ntypes(self):
+        return self._ntypes
+
+    def get_dim_out(self):
+        return self._dim_out
+
+
+def gen_key(type_id, layer_id, w_or_b):
+    return (type_id, layer_id, w_or_b)
+
+
+def base_fitting_net(dp_fn, embedding, natoms, atype):
+    g = tf.Graph()
+    with g.as_default():
+        t_embedding = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None])
+        t_natoms = tf.placeholder(tf.int32, [None])
+        t_atype = tf.placeholder(tf.int32, [None, None])
+        t_energy = dp_fn.build(t_embedding, t_natoms, {"atype": t_atype})
+        init_op = tf.global_variables_initializer()
+        t_vars = {}
+        for var in tf.global_variables():
+            key = None
+            matched = re.match(r"layer_(\d)_type_(\d)/([a-z]+)", var.name)
+            if matched:
+                key = gen_key(
+                    type_id=matched.group(2),
+                    layer_id=matched.group(1),
+                    w_or_b=matched.group(3),
+                )
+            else:
+                matched = re.match(r"final_layer_type_(\d)/([a-z]+)", var.name)
+                if matched:
+                    key = gen_key(
+                        type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2)
+                    )
+            if key is not None:
+                t_vars[key] = var
+
+    with tf.Session(graph=g) as sess:
+        sess.run(init_op)
+        energy, values = sess.run(
+            [t_energy, t_vars],
+            feed_dict={
+                t_embedding: embedding,
+                t_natoms: natoms,
+                t_atype: atype,
+            },
+        )
+    tf.reset_default_graph()
+    return energy, values
+
+
+class TestFittingNet(unittest.TestCase):
+    def setUp(self):
+        nloc = 7
+        self.embedding_width = 30
+        self.natoms = np.array([nloc, nloc, 2, 5], dtype=np.int32)
+        rng = np.random.default_rng()
+        self.embedding = rng.uniform(size=[4, nloc * self.embedding_width])
+        self.ntypes = self.natoms.size - 2
+        self.n_neuron = [32, 32, 32]
+        self.atype = np.zeros([4, nloc], dtype=np.int32)
+        cnt = 0
+        for i in range(self.ntypes):
+            self.atype[:, cnt : cnt + self.natoms[i + 2]] = i
+            cnt += self.natoms[i + 2]
+
+        fake_d = FakeDescriptor(2, 30)
+        self.dp_fn = EnerFitting(
+            fake_d.get_ntypes(), fake_d.get_dim_out(), self.n_neuron
+        )
+        self.dp_fn.bias_atom_e = rng.uniform(size=[self.ntypes])
+
+    def test_consistency(self):
+        dp_energy, values = base_fitting_net(
+            self.dp_fn, self.embedding, self.natoms, self.atype
+        )
+        my_fn = EnergyFittingNet(
+            self.ntypes,
+            self.embedding_width,
+            neuron=self.n_neuron,
+            bias_atom_e=self.dp_fn.bias_atom_e,
+            mixed_types=False,
+        ).to(env.DEVICE)
+        for name, param in my_fn.named_parameters():
+            matched = re.match(
+                "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name
+            )
+            key = None
+            if matched:
+                if int(matched.group(2)) == len(self.n_neuron):
+                    layer_id = -1
+                else:
+                    layer_id = matched.group(2)
+                key = gen_key(
+                    type_id=matched.group(1),
+                    layer_id=layer_id,
+                    w_or_b=matched.group(3),
+                )
+            assert key is not None
+            var = values[key]
+            with torch.no_grad():
+                # Keep parameter value consistency between 2 implentations
+                param.data.copy_(torch.from_numpy(var))
+        embedding = torch.from_numpy(self.embedding)
+        embedding = embedding.view(4, -1, self.embedding_width)
+        atype = torch.from_numpy(self.atype)
+        ret = my_fn(embedding.to(env.DEVICE), atype.to(env.DEVICE))
+        my_energy = ret["energy"]
+        my_energy = my_energy.detach().cpu()
+        np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_force_grad.py b/source/tests/pt/model/test_force_grad.py
new file mode 100644
index 0000000000..80a72cc176
--- /dev/null
+++ b/source/tests/pt/model/test_force_grad.py
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+
+class CheckSymmetry(DeepmdData):
+    def __init__(
+        self,
+        sys_path: str,
+        type_map: Optional[List[str]] = None,
+    ):
+        super().__init__(sys_path=sys_path, type_map=type_map)
+        self.add("energy", 1, atomic=False, must=False, high_prec=True)
+        self.add("force", 3, atomic=True, must=False, high_prec=False)
+        self.add("virial", 9, atomic=False, must=False, high_prec=False)
+
+    def get_disturb(self, index, atom_index, axis_index, delta):
+        for i in range(
+            0, len(self.dirs) + 1
+        ):  # note: if different sets can be merged, prefix sum is unused to calculate
+            if index < self.prefix_sum[i]:
+                break
+        frames = self._load_set(self.dirs[i - 1])
+        tmp = copy.deepcopy(frames["coord"].reshape(self.nframes, -1, 3))
+        tmp[:, atom_index, axis_index] += delta
+        frames["coord"] = tmp
+        frame = self._get_subdata(frames, index - self.prefix_sum[i - 1])
+        frame = self.reformat_data_torch(frame)
+        return frame
+
+
+def get_data(batch):
+    inputs = {}
+    for key in ["coord", "atype", "box"]:
+        inputs[key] = batch[key].unsqueeze(0).to(env.DEVICE)
+    return inputs
+
+
+class TestForceGrad(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.system_index = 0
+        self.batch_index = 0
+        self.get_dataset(self.system_index, self.batch_index)
+        self.get_model()
+
+    def get_model(self):
+        self.model = get_model(self.config["model"]).to(env.DEVICE)
+
+    def get_dataset(self, system_index=0, batch_index=0):
+        systems = self.config["training"]["training_data"]["systems"]
+        rcut = self.config["model"]["descriptor"]["rcut"]
+        sel = self.config["model"]["descriptor"]["sel"]
+        sec = torch.cumsum(torch.tensor(sel), dim=0)
+        type_map = self.config["model"]["type_map"]
+        self.dpdatasystem = CheckSymmetry(
+            sys_path=systems[system_index], type_map=type_map
+        )
+        self.origin_batch = self.dpdatasystem.get_item_torch(batch_index)
+
+    @unittest.skip("it can be replaced by autodiff")
+    def test_force_grad(self, threshold=1e-2, delta0=1e-6, seed=20):
+        result0 = self.model(**get_data(self.origin_batch))
+        np.random.default_rng(seed)
+        errors = np.zeros((self.dpdatasystem.natoms, 3))
+        for atom_index in range(self.dpdatasystem.natoms):
+            for axis_index in range(3):
+                delta = np.random.default_rng().random() * delta0
+                disturb_batch = self.dpdatasystem.get_disturb(
+                    self.batch_index, atom_index, axis_index, delta
+                )
+                disturb_result = self.model(**get_data(disturb_batch))
+                disturb_force = -(disturb_result["energy"] - result0["energy"]) / delta
+                disturb_error = (
+                    result0["force"][0, atom_index, axis_index] - disturb_force
+                )
+                errors[atom_index, axis_index] = disturb_error.detach().cpu().numpy()
+        self.assertTrue(np.abs(errors).max() < threshold, msg=str(np.abs(errors).max()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_forward_lower.py b/source/tests/pt/model/test_forward_lower.py
new file mode 100644
index 0000000000..32be3b62ad
--- /dev/null
+++ b/source/tests/pt/model/test_forward_lower.py
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+from .test_permutation import (  # model_dpau,
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = torch.float64
+
+
+def reduce_tensor(extended_tensor, mapping, nloc: int):
+    nframes, nall = extended_tensor.shape[:2]
+    ext_dims = extended_tensor.shape[2:]
+    reduced_tensor = torch.zeros(
+        [nframes, nloc, *ext_dims],
+        dtype=extended_tensor.dtype,
+        device=extended_tensor.device,
+    )
+    mldims = list(mapping.shape)
+    mapping = mapping.view(mldims + [1] * len(ext_dims)).expand(
+        [-1] * len(mldims) + list(ext_dims)
+    )
+    # nf x nloc x (*ext_dims)
+    reduced_tensor = torch.scatter_reduce(
+        reduced_tensor,
+        1,
+        index=mapping,
+        src=extended_tensor,
+        reduce="sum",
+    )
+    return reduced_tensor
+
+
+class ForwardLowerTest:
+    def test(
+        self,
+    ):
+        prec = self.prec
+        natoms = 5
+        cell = 4.0 * torch.eye(3, dtype=dtype, device=env.DEVICE)
+        coord = 3.0 * torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        spin = 0.5 * torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int64, device=env.DEVICE)
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag"]
+
+        result_forward = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord.unsqueeze(0),
+            atype.unsqueeze(0),
+            self.model.get_rcut(),
+            self.model.get_sel(),
+            mixed_types=self.model.mixed_types(),
+            box=cell.unsqueeze(0),
+        )
+        extended_spin = torch.gather(
+            spin.unsqueeze(0), index=mapping.unsqueeze(-1).tile((1, 1, 3)), dim=1
+        )
+        input_dict = {
+            "extended_coord": extended_coord,
+            "extended_atype": extended_atype,
+            "nlist": nlist,
+            "mapping": mapping,
+            "do_atomic_virial": False,
+        }
+        if test_spin:
+            input_dict["extended_spin"] = extended_spin
+        result_forward_lower = self.model.forward_lower(**input_dict)
+        for key in test_keys:
+            if key in ["energy"]:
+                torch.testing.assert_close(
+                    result_forward_lower[key], result_forward[key], rtol=prec, atol=prec
+                )
+            elif key in ["force", "force_mag"]:
+                reduced_vv = reduce_tensor(
+                    result_forward_lower[f"extended_{key}"], mapping, natoms
+                )
+                torch.testing.assert_close(
+                    reduced_vv, result_forward[key], rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    torch.testing.assert_close(
+                        result_forward_lower[key],
+                        result_forward[key],
+                        rtol=prec,
+                        atol=prec,
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        self.prec = 1e-10
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, ForwardLowerTest):
+    def setUp(self):
+        # still need to figure out why only 1e-5 rtol and atol
+        self.prec = 1e-5
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_jit.py b/source/tests/pt/model/test_jit.py
new file mode 100644
index 0000000000..41a5902a5a
--- /dev/null
+++ b/source/tests/pt/model/test_jit.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pt.infer import (
+    inference,
+)
+
+from .test_permutation import (
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+)
+
+
+class JITTest:
+    def test_jit(self):
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        model = torch.jit.script(inference.Tester("./model.pt").model)
+        torch.jit.save(model, "./frozen_model.pth", {})
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith("pt"):
+                os.remove(f)
+            if f in ["lcurve.out", "frozen_model.pth"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+            if f in ["checkpoint"]:
+                os.remove(f)
+
+
+class TestEnergyModelSeA(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestDOSModelSeA(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent.parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent.parent / "dos/data/global_system")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        # self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][
+        #     "repinit_rcut"
+        # ]
+        # self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][
+        #     "descriptor"
+        # ]["repinit_rcut_smth"]
+        # self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][
+        #     "repinit_nsel"
+        # ]
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+class TestEnergyModelHybrid2(unittest.TestCase, JITTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        # self.config["model"]["descriptor"]["hybrid_mode"] = "sequential"
+        self.config["training"]["numb_steps"] = 10
+        self.config["training"]["save_freq"] = 10
+
+    def tearDown(self):
+        JITTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_linear_atomic_model.py b/source/tests/pt/model/test_linear_atomic_model.py
new file mode 100644
index 0000000000..7f24ffdc53
--- /dev/null
+++ b/source/tests/pt/model/test_linear_atomic_model.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.atomic_model import (
+    DPZBLLinearEnergyAtomicModel as DPDPZBLLinearEnergyAtomicModel,
+)
+from deepmd.pt.model.atomic_model import (
+    DPAtomicModel,
+    DPZBLLinearEnergyAtomicModel,
+    PairTabAtomicModel,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.model import (
+    DPZBLModel,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestWeightCalculation(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_pairwise(self, mock_loadtxt):
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.05, 1.0, 2.0, 3.0],
+                [0.1, 0.8, 1.6, 2.4],
+                [0.15, 0.5, 1.0, 1.5],
+                [0.2, 0.25, 0.4, 0.75],
+                [0.25, 0.0, 0.0, 0.0],
+            ]
+        )
+        extended_atype = torch.tensor([[0, 0]], device=env.DEVICE)
+        nlist = torch.tensor([[[1], [-1]]], device=env.DEVICE)
+
+        ds = DescrptSeA(
+            rcut_smth=0.3,
+            rcut=0.4,
+            sel=[3],
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            2,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+
+        type_map = ["foo", "bar"]
+        zbl_model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.3, sel=2, type_map=type_map[::-1]
+        )
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        wgt_model = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        wgt_res = []
+        for dist in np.linspace(0.05, 0.3, 10):
+            extended_coord = torch.tensor(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ],
+                device=env.DEVICE,
+            )
+
+            wgt_model.forward_atomic(extended_coord, extended_atype, nlist)
+
+            wgt_res.append(wgt_model.zbl_weight)
+        results = torch.stack(wgt_res).reshape(10, 2)
+        excepted_res = torch.tensor(
+            [
+                [1.0, 0.0],
+                [1.0, 0.0],
+                [0.9995, 0.0],
+                [0.9236, 0.0],
+                [0.6697, 0.0],
+                [0.3303, 0.0],
+                [0.0764, 0.0],
+                [0.0005, 0.0],
+                [0.0, 0.0],
+                [0.0, 0.0],
+            ],
+            dtype=torch.float64,
+            device=env.DEVICE,
+        )
+        torch.testing.assert_close(results, excepted_res, rtol=0.0001, atol=0.0001)
+
+
+class TestIntegration(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+        ).to(env.DEVICE)
+        ft = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            1,
+            mixed_types=ds.mixed_types(),
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        dp_model = DPAtomicModel(ds, ft, type_map=type_map).to(env.DEVICE)
+        zbl_model = PairTabAtomicModel(
+            file_path, self.rcut, sum(self.sel), type_map=type_map
+        )
+        self.md0 = DPZBLLinearEnergyAtomicModel(
+            dp_model,
+            zbl_model,
+            sw_rmin=0.1,
+            sw_rmax=0.25,
+            type_map=type_map,
+        ).to(env.DEVICE)
+        self.md1 = DPZBLLinearEnergyAtomicModel.deserialize(self.md0.serialize()).to(
+            env.DEVICE
+        )
+        self.md2 = DPDPZBLLinearEnergyAtomicModel.deserialize(self.md0.serialize())
+        self.md3 = DPZBLModel(
+            dp_model, zbl_model, sw_rmin=0.1, sw_rmax=0.25, type_map=type_map
+        )
+
+    def test_self_consistency(self):
+        args = [
+            to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist]
+        ]
+        ret0 = self.md0.forward_atomic(*args)
+        ret1 = self.md1.forward_atomic(*args)
+        ret2 = self.md2.forward_atomic(self.coord_ext, self.atype_ext, self.nlist)
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]),
+            to_numpy_array(ret1["energy"]),
+        )
+
+        np.testing.assert_allclose(
+            to_numpy_array(ret0["energy"]), ret2["energy"], atol=0.001, rtol=0.001
+        )
+
+    def test_jit(self):
+        md1 = torch.jit.script(self.md1)
+        # atomic model no more export methods
+        # self.assertEqual(md1.get_rcut(), self.rcut)
+        # self.assertEqual(md1.get_type_map(), ["foo", "bar"])
+        md3 = torch.jit.script(self.md3)
+        # atomic model no more export methods
+        # self.assertEqual(md3.get_rcut(), self.rcut)
+        # self.assertEqual(md3.get_type_map(), ["foo", "bar"])
+
+
+class TestRemmapMethod(unittest.TestCase):
+    def test_valid(self):
+        atype = torch.randint(0, 3, (4, 20), device=env.DEVICE)
+        commonl = ["H", "O", "S"]
+        originl = ["Si", "H", "O", "S"]
+        mapping = DPZBLLinearEnergyAtomicModel.remap_atype(originl, commonl)
+        new_atype = mapping[atype]
+
+        def trans(atype, map):
+            idx = atype.flatten().tolist()
+            res = []
+            for i in idx:
+                res.append(map[i])
+            return res
+
+        assert trans(atype, commonl) == trans(new_atype, originl)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/pt/model/test_make_hessian_model.py b/source/tests/pt/model/test_make_hessian_model.py
new file mode 100644
index 0000000000..7d9ae2b810
--- /dev/null
+++ b/source/tests/pt/model/test_make_hessian_model.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.output_def import (
+    OutputVariableCategory,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.model import (
+    make_hessian_model,
+)
+from deepmd.pt.model.model.dp_model import (
+    DPModel,
+)
+from deepmd.pt.model.task.ener import (
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+dtype = torch.float64
+
+
+def finite_hessian(f, x, delta=1e-6):
+    in_shape = x.shape
+    assert len(in_shape) == 1
+    y0 = f(x)
+    out_shape = y0.shape
+    res = np.empty(out_shape + in_shape + in_shape)
+    for iidx in np.ndindex(*in_shape):
+        for jidx in np.ndindex(*in_shape):
+            i0 = np.zeros(in_shape)
+            i1 = np.zeros(in_shape)
+            i2 = np.zeros(in_shape)
+            i3 = np.zeros(in_shape)
+            i0[iidx] += delta
+            i2[iidx] += delta
+            i1[iidx] -= delta
+            i3[iidx] -= delta
+            i0[jidx] += delta
+            i1[jidx] += delta
+            i2[jidx] -= delta
+            i3[jidx] -= delta
+            y0 = f(x + i0)
+            y1 = f(x + i1)
+            y2 = f(x + i2)
+            y3 = f(x + i3)
+            res[(Ellipsis, *iidx, *jidx)] = (y0 + y3 - y1 - y2) / (4 * delta**2.0)
+    return res
+
+
+class HessianTest:
+    def test(
+        self,
+    ):
+        # setup test case
+        places = 6
+        delta = 1e-3
+        natoms = self.nloc
+        nf = self.nf
+        nv = self.nv
+        cell0 = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell0 = 1.0 * (cell0 + cell0.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        cell1 = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell1 = 1.0 * (cell1 + cell1.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        cell = torch.stack([cell0, cell1])
+        coord = torch.rand([nf, natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        cell = cell.view([nf, 9])
+        coord = coord.view([nf, natoms * 3])
+        atype = (
+            torch.stack(
+                [
+                    torch.IntTensor([0, 0, 1]),
+                    torch.IntTensor([1, 0, 1]),
+                ]
+            )
+            .view([nf, natoms])
+            .to(env.DEVICE)
+        )
+        nfp, nap = 2, 3
+        fparam = torch.rand([nf, nfp], dtype=dtype, device=env.DEVICE)
+        aparam = torch.rand([nf, natoms * nap], dtype=dtype, device=env.DEVICE)
+        # forward hess and valu models
+        ret_dict0 = self.model_hess.forward_common(
+            coord, atype, box=cell, fparam=fparam, aparam=aparam
+        )
+        ret_dict1 = self.model_valu.forward_common(
+            coord, atype, box=cell, fparam=fparam, aparam=aparam
+        )
+        # compare hess and value models
+        torch.testing.assert_close(ret_dict0["energy"], ret_dict1["energy"])
+        ana_hess = ret_dict0["energy_derv_r_derv_r"]
+
+        # compute finite difference
+        fnt_hess = []
+        for ii in range(nf):
+
+            def np_infer(
+                xx,
+            ):
+                ret = self.model_valu.forward_common(
+                    to_torch_tensor(xx).unsqueeze(0),
+                    atype[ii].unsqueeze(0),
+                    box=cell[ii].unsqueeze(0),
+                    fparam=fparam[ii].unsqueeze(0),
+                    aparam=aparam[ii].unsqueeze(0),
+                )
+                # detach
+                ret = {kk: to_numpy_array(ret[kk]) for kk in ret}
+                return ret
+
+            def ff(xx):
+                return np_infer(xx)["energy_redu"]
+
+            xx = to_numpy_array(coord[ii])
+            fnt_hess.append(finite_hessian(ff, xx, delta=delta).squeeze())
+
+        # compare finite difference with autodiff
+        fnt_hess = np.stack(fnt_hess).reshape([nf, nv, natoms * 3, natoms * 3])
+        np.testing.assert_almost_equal(
+            fnt_hess, to_numpy_array(ana_hess), decimal=places
+        )
+
+
+class TestDPModel(unittest.TestCase, HessianTest):
+    def setUp(self):
+        torch.manual_seed(2)
+        self.nf = 2
+        self.nloc = 3
+        self.rcut = 4.0
+        self.rcut_smth = 3.0
+        self.sel = [10, 10]
+        self.nt = 2
+        self.nv = 2
+        ds = DescrptSeA(
+            self.rcut,
+            self.rcut_smth,
+            self.sel,
+            neuron=[2, 4, 8],
+            axis_neuron=2,
+        ).to(env.DEVICE)
+        ft0 = InvarFitting(
+            "energy",
+            self.nt,
+            ds.get_dim_out(),
+            self.nv,
+            mixed_types=ds.mixed_types(),
+            do_hessian=True,
+            neuron=[4, 4, 4],
+        ).to(env.DEVICE)
+        type_map = ["foo", "bar"]
+        self.model_hess = make_hessian_model(DPModel)(ds, ft0, type_map=type_map).to(
+            env.DEVICE
+        )
+        self.model_valu = DPModel.deserialize(self.model_hess.serialize())
+        self.model_hess.requires_hessian("energy")
+
+    def test_output_def(self):
+        self.assertTrue(self.model_hess.atomic_output_def()["energy"].r_hessian)
+        self.assertFalse(self.model_valu.atomic_output_def()["energy"].r_hessian)
+        self.assertTrue(self.model_hess.model_output_def()["energy"].r_hessian)
+        self.assertEqual(
+            self.model_hess.model_output_def()["energy_derv_r_derv_r"].category,
+            OutputVariableCategory.DERV_R_DERV_R,
+        )
diff --git a/source/tests/pt/model/test_mlp.py b/source/tests/pt/model/test_mlp.py
new file mode 100644
index 0000000000..ca2bb6d2cf
--- /dev/null
+++ b/source/tests/pt/model/test_mlp.py
@@ -0,0 +1,279 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.utils import EmbeddingNet as DPEmbeddingNet
+from deepmd.dpmodel.utils import FittingNet as DPFittingNet
+from deepmd.dpmodel.utils import (
+    NativeLayer,
+    NativeNet,
+)
+from deepmd.pt.model.network.mlp import (
+    MLP,
+    EmbeddingNet,
+    FittingNet,
+    MLPLayer,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+)
+
+
+def get_tols(prec):
+    if prec in ["single", "float32"]:
+        rtol, atol = 0.0, 1e-4
+    elif prec in ["double", "float64"]:
+        rtol, atol = 0.0, 1e-12
+    # elif prec in ["half", "float16"]:
+    #   rtol, atol=1e-2, 0
+    else:
+        raise ValueError(f"unknown prec {prec}")
+    return rtol, atol
+
+
+class TestMLPLayer(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [(5, 5), (5, 10), (5, 8), (8, 5)],  # inp, out
+            [True, False],  # bias
+            [True, False],  # use time step
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet
+            [None, [4], [3, 2]],  # prefix shapes
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_native_layer(
+        self,
+    ):
+        for (ninp, nout), bias, ut, ac, resnet, ashp, prec in self.test_cases:
+            # input
+            inp_shap = [ninp]
+            if ashp is not None:
+                inp_shap = ashp + inp_shap
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(
+                inp_shap
+            )
+            # def mlp layer
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(
+                env.DEVICE
+            )
+            # check consistency
+            nl = NativeLayer.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = MLPLayer.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+
+    def test_jit(self):
+        for (ninp, nout), bias, ut, ac, resnet, _, prec in self.test_cases:
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec)
+            model = torch.jit.script(ml)
+            ml1 = MLPLayer.deserialize(ml.serialize())
+            model = torch.jit.script(ml1)
+
+
+class TestMLP(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [[2, 2, 4, 8], [1, 3, 3]],  # inp and hiddens
+            [True, False],  # bias
+            [True, False],  # use time step
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet
+            [None, [4], [3, 2]],  # prefix shapes
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_native_net(
+        self,
+    ):
+        for ndims, bias, ut, ac, resnet, ashp, prec in self.test_cases:
+            # input
+            inp_shap = [ndims[0]]
+            if ashp is not None:
+                inp_shap = ashp + inp_shap
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(
+                inp_shap
+            )
+            # def MLP
+            layers = []
+            for ii in range(1, len(ndims)):
+                layers.append(
+                    MLPLayer(
+                        ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec
+                    ).serialize()
+                )
+            ml = MLP(layers).to(env.DEVICE)
+            # check consistency
+            nl = NativeNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = MLP.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
+            )
+
+    def test_jit(self):
+        for ndims, bias, ut, ac, resnet, _, prec in self.test_cases:
+            layers = []
+            for ii in range(1, len(ndims)):
+                ml = layers.append(
+                    MLPLayer(
+                        ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec
+                    ).serialize()
+                )
+            ml = MLP(ml)
+            model = torch.jit.script(ml)
+            ml1 = MLP.deserialize(ml.serialize())
+            model = torch.jit.script(ml1)
+
+
+class TestEmbeddingNet(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [1, 3],  # inp
+            [[24, 48, 96], [24, 36]],  # and hiddens
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet_dt
+            ["float32", "double"],  # precision
+        )
+
+    def test_match_embedding_net(
+        self,
+    ):
+        for idim, nn, act, idt, prec in self.test_cases:
+            # input
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = torch.arange(idim, dtype=dtype, device=env.DEVICE)
+            # def MLP
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
+            # check consistency
+            nl = DPEmbeddingNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+
+    def test_jit(
+        self,
+    ):
+        for idim, nn, act, idt, prec in self.test_cases:
+            # def MLP
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            model = torch.jit.script(ml)
+            model = torch.jit.script(ml1)
+
+
+class TestFittingNet(unittest.TestCase):
+    def setUp(self):
+        self.test_cases = itertools.product(
+            [1, 3],  # inp
+            [1, 5],  # out
+            [[24, 48, 96], [24, 36]],  # and hiddens
+            ["tanh", "none"],  # activation
+            [True, False],  # resnet_dt
+            ["float32", "double"],  # precision
+            [True, False],  # bias_out
+        )
+
+    def test_match_fitting_net(
+        self,
+    ):
+        for idim, odim, nn, act, idt, prec, ob in self.test_cases:
+            # input
+            rtol, atol = get_tols(prec)
+            dtype = PRECISION_DICT[prec]
+            xx = torch.arange(idim, dtype=dtype, device=env.DEVICE)
+            # def MLP
+            ml = FittingNet(
+                idim,
+                odim,
+                neuron=nn,
+                activation_function=act,
+                resnet_dt=idt,
+                precision=prec,
+                bias_out=ob,
+            ).to(env.DEVICE)
+            # check consistency
+            nl = DPFittingNet.deserialize(ml.serialize())
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+            # check self-consistency
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            np.testing.assert_allclose(
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
+            )
+
+    def test_jit(
+        self,
+    ):
+        for idim, odim, nn, act, idt, prec, ob in self.test_cases:
+            # def MLP
+            ml = FittingNet(
+                idim,
+                odim,
+                neuron=nn,
+                activation_function=act,
+                resnet_dt=idt,
+                precision=prec,
+                bias_out=ob,
+            ).to(env.DEVICE)
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
+            model = torch.jit.script(ml)
+            model = torch.jit.script(ml1)
diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py
new file mode 100644
index 0000000000..493d6e2cc3
--- /dev/null
+++ b/source/tests/pt/model/test_model.py
@@ -0,0 +1,422 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import collections
+import json
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+
+tf.disable_eager_execution()
+
+from pathlib import (
+    Path,
+)
+
+from deepmd.pt.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pt.utils.env import (
+    DEVICE,
+)
+from deepmd.pt.utils.learning_rate import LearningRateExp as MyLRExp
+from deepmd.tf.common import (
+    data_requirement,
+    expand_sys_str,
+)
+from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
+from deepmd.tf.fit import (
+    EnerFitting,
+)
+from deepmd.tf.loss import (
+    EnerStdLoss,
+)
+from deepmd.tf.model import (
+    EnerModel,
+)
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.tf.utils.learning_rate import (
+    LearningRateExp,
+)
+
+from ..test_stat import (
+    energy_data_requirement,
+)
+
+VariableState = collections.namedtuple("VariableState", ["value", "gradient"])
+
+
+def torch2tf(torch_name, last_layer_id=None):
+    fields = torch_name.split(".")
+    offset = int(fields[3] == "networks") + 1
+    element_id = int(fields[2 + offset])
+    if fields[1] == "descriptor":
+        layer_id = int(fields[4 + offset]) + 1
+        weight_type = fields[5 + offset]
+        ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
+    elif fields[1] == "fitting_net":
+        layer_id = int(fields[4 + offset])
+        weight_type = fields[5 + offset]
+        if layer_id != last_layer_id:
+            ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
+        else:
+            ret = "final_layer_type_%d/%s:0" % (element_id, weight_type)
+    else:
+        raise RuntimeError("Unexpected parameter name: %s" % torch_name)
+    return ret
+
+
+class DpTrainer:
+    def __init__(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        self.batch_size = config["training"]["training_data"]["batch_size"]
+        self.type_map = model_config["type_map"]
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.n_neuron = model_config["fitting_net"]["neuron"]
+        self.data_stat_nbatch = 3
+        self.start_lr = 0.001
+        self.stop_lr = 3.51e-8
+        self.decay_steps = 500
+        self.stop_steps = 1600
+        self.start_pref_e = 1.0
+        self.limit_pref_e = 2.0
+        self.start_pref_f = 2.0
+        self.limit_pref_f = 1.0
+        self.ntypes = len(self.type_map)
+
+    def get_intermediate_state(self, num_steps=1):
+        dp_model = self._get_dp_model()
+        dp_loss = self._get_dp_loss()
+        dp_lr = self._get_dp_lr()
+        dp_ds = self._get_dp_dataset()
+        dp_model.data_stat(dp_ds)
+
+        # Build graph
+        g = tf.Graph()
+        with g.as_default():
+            place_holders = self._get_dp_placeholders(dp_ds)
+            model_pred = dp_model.build(
+                coord_=place_holders["coord"],
+                atype_=place_holders["type"],
+                natoms=place_holders["natoms_vec"],
+                box=place_holders["box"],
+                mesh=place_holders["default_mesh"],
+                input_dict=place_holders,
+            )
+            global_step = tf.train.get_or_create_global_step()
+            learning_rate = dp_lr.build(global_step, self.stop_steps)
+            l2_l, _ = dp_loss.build(
+                learning_rate=learning_rate,
+                natoms=place_holders["natoms_vec"],
+                model_dict=model_pred,
+                label_dict=place_holders,
+                suffix="test",
+            )
+            t_vars = tf.trainable_variables()
+            optimizer = tf.train.AdamOptimizer(learning_rate)
+            t_grad_and_vars = optimizer.compute_gradients(l2_l, t_vars)
+            train_op = optimizer.apply_gradients(t_grad_and_vars, global_step)
+            init_op = tf.global_variables_initializer()
+            t_heads = {
+                "loss": l2_l,
+                "energy": model_pred["energy"],
+                "force": model_pred["force"],
+                "virial": model_pred["virial"],
+                "atom_virial": model_pred["atom_virial"],
+            }
+
+        # Get statistics of each component
+        stat_dict = {
+            "descriptor.mean": dp_model.descrpt.davg,
+            "descriptor.stddev": dp_model.descrpt.dstd,
+            "fitting_net.bias_atom_e": dp_model.fitting.bias_atom_e,
+        }
+
+        # Get variables and their gradients
+        with tf.Session(graph=g) as sess:
+            sess.run(init_op)
+            for _ in range(num_steps):
+                batch = dp_ds.get_batch()
+                feeds = self._get_feed_dict(batch, place_holders)
+                sess.run(train_op, feed_dict=feeds)
+
+            batch = dp_ds.get_batch()
+            feeds = self._get_feed_dict(batch, place_holders)
+            grads_and_vars, head_dict = sess.run(
+                [t_grad_and_vars, t_heads], feed_dict=feeds
+            )
+            vs_dict = {}
+            for idx, one in enumerate(t_vars):
+                grad, var = grads_and_vars[idx]
+                vs_dict[one.name] = VariableState(var, grad)
+
+        tf.reset_default_graph()
+        # Used for reproducing
+        return batch, head_dict, stat_dict, vs_dict
+
+    def _get_dp_dataset(self):
+        data = DeepmdDataSystem(
+            systems=self.systems,
+            batch_size=self.batch_size,
+            test_size=1,
+            rcut=self.rcut,
+            type_map=self.type_map,
+            trn_all_set=True,
+        )
+        data.add_dict(data_requirement)
+        return data
+
+    def _get_dp_model(self):
+        dp_descrpt = DescrptSeA_tf(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+        )
+        dp_fitting = EnerFitting(
+            dp_descrpt.get_ntypes(), dp_descrpt.get_dim_out(), neuron=self.n_neuron
+        )
+        return EnerModel(
+            dp_descrpt,
+            dp_fitting,
+            type_map=self.type_map,
+            data_stat_nbatch=self.data_stat_nbatch,
+        )
+
+    def _get_dp_loss(self):
+        return EnerStdLoss(
+            starter_learning_rate=self.start_lr,
+            start_pref_e=self.start_pref_e,
+            limit_pref_e=self.limit_pref_e,
+            start_pref_f=self.start_pref_f,
+            limit_pref_f=self.limit_pref_f,
+        )
+
+    def _get_dp_lr(self):
+        return LearningRateExp(
+            start_lr=self.start_lr, stop_lr=self.stop_lr, decay_steps=self.decay_steps
+        )
+
+    def _get_dp_placeholders(self, dataset):
+        place_holders = {}
+        data_dict = dataset.get_data_dict()
+        for kk in data_dict.keys():
+            if kk == "type":
+                continue
+            prec = tf.float64
+            place_holders[kk] = tf.placeholder(prec, [None], name="t_" + kk)
+            place_holders["find_" + kk] = tf.placeholder(
+                tf.float32, name="t_find_" + kk
+            )
+        place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type")
+        place_holders["natoms_vec"] = tf.placeholder(
+            tf.int32, [self.ntypes + 2], name="t_natoms"
+        )
+        place_holders["default_mesh"] = tf.placeholder(tf.int32, [None], name="t_mesh")
+        place_holders["is_training"] = tf.placeholder(tf.bool)
+        return place_holders
+
+    def _get_feed_dict(self, batch, place_holders):
+        feed_dict = {}
+        for kk in batch.keys():
+            if kk == "find_type" or kk == "type":
+                continue
+            if "find_" in kk:
+                feed_dict[place_holders[kk]] = batch[kk]
+            else:
+                feed_dict[place_holders[kk]] = np.reshape(batch[kk], [-1])
+        for ii in ["type"]:
+            feed_dict[place_holders[ii]] = np.reshape(batch[ii], [-1])
+        for ii in ["natoms_vec", "default_mesh"]:
+            feed_dict[place_holders[ii]] = batch[ii]
+        feed_dict[place_holders["is_training"]] = True
+        return feed_dict
+
+
+class TestEnergy(unittest.TestCase):
+    def setUp(self):
+        self.dp_trainer = DpTrainer()
+        self.wanted_step = 0
+        for key in dir(self.dp_trainer):
+            if not key.startswith("_") or key == "get_intermediate_state":
+                value = getattr(self.dp_trainer, key)
+                setattr(self, key, value)
+
+    def test_consistency(self):
+        batch, head_dict, stat_dict, vs_dict = self.dp_trainer.get_intermediate_state(
+            self.wanted_step
+        )
+        # Build DeePMD graph
+        my_ds = DpLoaderSet(self.systems, self.batch_size, self.type_map)
+        my_ds.add_data_requirement(energy_data_requirement)
+        my_model = get_model(
+            model_params={
+                "descriptor": {
+                    "type": "se_e2_a",
+                    "sel": self.sel,
+                    "rcut_smth": self.rcut_smth,
+                    "rcut": self.rcut,
+                    "neuron": self.filter_neuron,
+                    "axis_neuron": self.axis_neuron,
+                },
+                "fitting_net": {"neuron": self.n_neuron, "mixed_types": False},
+                "data_stat_nbatch": self.data_stat_nbatch,
+                "type_map": self.type_map,
+            },
+        )
+        my_model.to(DEVICE)
+        my_lr = MyLRExp(self.start_lr, self.stop_lr, self.decay_steps, self.stop_steps)
+        my_loss = EnergyStdLoss(
+            starter_learning_rate=self.start_lr,
+            start_pref_e=self.start_pref_e,
+            limit_pref_e=self.limit_pref_e,
+            start_pref_f=self.start_pref_f,
+            limit_pref_f=self.limit_pref_f,
+        )
+
+        # Keep statistics consistency between 2 implentations
+        my_em = my_model.get_descriptor()
+        mean = stat_dict["descriptor.mean"].reshape([self.ntypes, my_em.get_nsel(), 4])
+        stddev = stat_dict["descriptor.stddev"].reshape(
+            [self.ntypes, my_em.get_nsel(), 4]
+        )
+        my_em.set_stat_mean_and_stddev(
+            torch.tensor(mean, device=DEVICE),
+            torch.tensor(stddev, device=DEVICE),
+        )
+        my_model.get_fitting_net().bias_atom_e = torch.tensor(
+            stat_dict["fitting_net.bias_atom_e"], device=DEVICE
+        )
+
+        # Keep parameter value consistency between 2 implentations
+        for name, param in my_model.named_parameters():
+            name = name.replace("sea.", "")
+            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
+            var = vs_dict[var_name].value
+            with torch.no_grad():
+                src = torch.from_numpy(var)
+                dst = param.data
+                # print(name)
+                # print(src.mean(), src.std())
+                # print(dst.mean(), dst.std())
+                dst.copy_(src)
+        # Start forward computing
+        tmp = np.copy(batch["natoms_vec"])
+        batch = my_ds.systems[0]._data_system._get_subdata(batch, 0)
+        batch = my_ds.systems[0]._data_system.reformat_data_torch(batch)
+        for key in ["coord", "atype", "box", "energy", "force"]:
+            batch[key] = torch.as_tensor(batch[key], device=env.DEVICE)
+            batch[key] = batch[key].unsqueeze(0)
+        batch["coord"].requires_grad_(True)
+        batch["natoms_vec"] = tmp
+        batch["natoms"] = torch.tensor(
+            batch["natoms_vec"], device=batch["coord"].device
+        ).unsqueeze(0)
+        model_input = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": True,
+        }
+        model_input_1 = {
+            "coord": batch["coord"].to(env.DEVICE),
+            "atype": batch["atype"].to(env.DEVICE),
+            "box": batch["box"].to(env.DEVICE),
+            "do_atomic_virial": False,
+        }
+        label = {
+            "energy": batch["energy"].to(env.DEVICE),
+            "find_energy": 1.0,
+            "force": batch["force"].to(env.DEVICE),
+            "find_force": 1.0,
+        }
+        cur_lr = my_lr.value(self.wanted_step)
+        model_predict, loss, _ = my_loss(
+            model_input, my_model, label, int(batch["natoms"][0, 0]), cur_lr
+        )
+        model_predict_1 = my_model(**model_input_1)
+        p_energy, p_force, p_virial, p_atomic_virial = (
+            model_predict["energy"],
+            model_predict["force"],
+            model_predict["virial"],
+            model_predict["atom_virial"],
+        )
+        np.testing.assert_allclose(
+            head_dict["energy"], p_energy.view(-1).cpu().detach().numpy()
+        )
+        np.testing.assert_allclose(
+            head_dict["force"],
+            p_force.view(*head_dict["force"].shape).cpu().detach().numpy(),
+        )
+        rtol = 1e-5
+        atol = 1e-8
+        np.testing.assert_allclose(
+            head_dict["loss"], loss.cpu().detach().numpy(), rtol=rtol, atol=atol
+        )
+        np.testing.assert_allclose(
+            head_dict["virial"],
+            p_virial.view(*head_dict["virial"].shape).cpu().detach().numpy(),
+        )
+        np.testing.assert_allclose(
+            head_dict["virial"],
+            model_predict_1["virial"]
+            .view(*head_dict["virial"].shape)
+            .cpu()
+            .detach()
+            .numpy(),
+        )
+        self.assertIsNone(model_predict_1.get("atom_virial", None))
+        np.testing.assert_allclose(
+            head_dict["atom_virial"],
+            p_atomic_virial.view(*head_dict["atom_virial"].shape)
+            .cpu()
+            .detach()
+            .numpy(),
+        )
+        optimizer = torch.optim.Adam(my_model.parameters(), lr=cur_lr)
+        optimizer.zero_grad()
+
+        def step(step_id):
+            bdata = self.training_data.get_trainning_batch()
+            optimizer.zero_grad()
+
+        # Compare gradient for consistency
+        loss.backward()
+
+        for name, param in my_model.named_parameters():
+            name = name.replace("sea.", "")
+            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
+            var_grad = vs_dict[var_name].gradient
+            param_grad = param.grad.cpu()
+            var_grad = torch.tensor(var_grad, device="cpu")
+            assert np.allclose(var_grad, param_grad, rtol=rtol, atol=atol)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_nlist.py b/source/tests/pt/model/test_nlist.py
new file mode 100644
index 0000000000..244b3804c8
--- /dev/null
+++ b/source/tests/pt/model/test_nlist.py
@@ -0,0 +1,220 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    build_multiple_neighbor_list,
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+    get_multiple_nlist_key,
+)
+from deepmd.pt.utils.region import (
+    inter2phys,
+)
+
+dtype = torch.float64
+
+
+class TestNeighList(unittest.TestCase):
+    def setUp(self):
+        self.nf = 3
+        self.nloc = 3
+        self.ns = 5 * 5 * 3
+        self.nall = self.ns * self.nloc
+        self.cell = torch.tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, device=env.DEVICE
+        )
+        self.icoord = torch.tensor(
+            [[0, 0, 0], [0, 0, 0], [0.5, 0.5, 0.1]], dtype=dtype, device=env.DEVICE
+        )
+        self.atype = torch.tensor([-1, 0, 1], dtype=torch.int, device=env.DEVICE)
+        [self.cell, self.icoord, self.atype] = [
+            ii.unsqueeze(0) for ii in [self.cell, self.icoord, self.atype]
+        ]
+        self.coord = inter2phys(self.icoord, self.cell).view([-1, self.nloc * 3])
+        self.cell = self.cell.view([-1, 9])
+        [self.cell, self.coord, self.atype] = [
+            torch.tile(ii, [self.nf, 1]) for ii in [self.cell, self.coord, self.atype]
+        ]
+        self.rcut = 1.01
+        self.prec = 1e-10
+        self.nsel = [10, 10]
+        # genrated by preprocess.build_neighbor_list
+        # ref_nlist, _, _ = legacy_build_neighbor_list(
+        #   2, ecoord[0], eatype[0],
+        #   self.rcut,
+        #   torch.tensor([10,20], dtype=torch.long),
+        #   mapping[0], type_split=True, )
+        self.ref_nlist = torch.tensor(
+            [
+                [-1] * sum(self.nsel),
+                [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 2, 2, 2, 2, -1, -1, -1, -1, -1, -1],
+                [1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, -1, -1, -1, -1],
+            ],
+            device=env.DEVICE,
+        )
+
+    def test_build_notype(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            sum(self.nsel),
+            distinguish_types=False,
+        )
+        torch.testing.assert_close(nlist[0], nlist[1])
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        torch.testing.assert_close(
+            torch.sort(nlist_loc, dim=-1)[0],
+            torch.sort(self.ref_nlist, dim=-1)[0],
+        )
+
+    def test_build_type(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        nlist = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            self.rcut,
+            self.nsel,
+            distinguish_types=True,
+        )
+        torch.testing.assert_close(nlist[0], nlist[1])
+        nlist_mask = nlist[0] == -1
+        nlist_loc = mapping[0][nlist[0]]
+        nlist_loc[nlist_mask] = -1
+        for ii in range(2):
+            torch.testing.assert_close(
+                torch.sort(torch.split(nlist_loc, self.nsel, dim=-1)[ii], dim=-1)[0],
+                torch.sort(torch.split(self.ref_nlist, self.nsel, dim=-1)[ii], dim=-1)[
+                    0
+                ],
+            )
+
+    def test_build_multiple_nlist(self):
+        rcuts = [1.01, 2.01]
+        nsels = [20, 80]
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, max(rcuts)
+        )
+        nlist1 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[1],
+            nsels[1] - 1,
+            distinguish_types=False,
+        )
+        pad = -1 * torch.ones(
+            [self.nf, self.nloc, 1], dtype=nlist1.dtype, device=nlist1.device
+        )
+        nlist2 = torch.cat([nlist1, pad], dim=-1)
+        nlist0 = build_neighbor_list(
+            ecoord,
+            eatype,
+            self.nloc,
+            rcuts[0],
+            nsels[0],
+            distinguish_types=False,
+        )
+        nlists = build_multiple_neighbor_list(ecoord, nlist1, rcuts, nsels)
+        for dd in range(2):
+            self.assertEqual(
+                nlists[get_multiple_nlist_key(rcuts[dd], nsels[dd])].shape[-1],
+                nsels[dd],
+            )
+        torch.testing.assert_close(
+            nlists[get_multiple_nlist_key(rcuts[0], nsels[0])],
+            nlist0,
+        )
+        torch.testing.assert_close(
+            nlists[get_multiple_nlist_key(rcuts[1], nsels[1])],
+            nlist2,
+        )
+
+    def test_extend_coord(self):
+        ecoord, eatype, mapping = extend_coord_with_ghosts(
+            self.coord, self.atype, self.cell, self.rcut
+        )
+        # expected ncopy x nloc
+        self.assertEqual(list(ecoord.shape), [self.nf, self.nall * 3])
+        self.assertEqual(list(eatype.shape), [self.nf, self.nall])
+        self.assertEqual(list(mapping.shape), [self.nf, self.nall])
+        # check the nloc part is identical with original coord
+        torch.testing.assert_close(
+            ecoord[:, : self.nloc * 3], self.coord, rtol=self.prec, atol=self.prec
+        )
+        # check the shift vectors are aligned with grid
+        shift_vec = (
+            ecoord.view([-1, self.ns, self.nloc, 3])
+            - self.coord.view([-1, self.nloc, 3])[:, None, :, :]
+        )
+        shift_vec = shift_vec.view([-1, self.nall, 3])
+        # hack!!! assumes identical cell across frames
+        shift_vec = torch.matmul(
+            shift_vec, torch.linalg.inv(self.cell.view([self.nf, 3, 3])[0])
+        )
+        # nf x nall x 3
+        shift_vec = torch.round(shift_vec)
+        # check: identical shift vecs
+        torch.testing.assert_close(
+            shift_vec[0], shift_vec[1], rtol=self.prec, atol=self.prec
+        )
+        # check: shift idx aligned with grid
+        mm, cc = torch.unique(shift_vec[0][:, 0], dim=-1, return_counts=True)
+        torch.testing.assert_close(
+            mm,
+            torch.tensor([-2, -1, 0, 1, 2], dtype=dtype, device=env.DEVICE),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        torch.testing.assert_close(
+            cc,
+            torch.tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=torch.long, device=env.DEVICE
+            ),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = torch.unique(shift_vec[1][:, 1], dim=-1, return_counts=True)
+        torch.testing.assert_close(
+            mm,
+            torch.tensor([-2, -1, 0, 1, 2], dtype=dtype, device=env.DEVICE),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        torch.testing.assert_close(
+            cc,
+            torch.tensor(
+                [self.ns * self.nloc // 5] * 5, dtype=torch.long, device=env.DEVICE
+            ),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = torch.unique(shift_vec[1][:, 2], dim=-1, return_counts=True)
+        torch.testing.assert_close(
+            mm,
+            torch.tensor([-1, 0, 1], dtype=dtype, device=env.DEVICE),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        torch.testing.assert_close(
+            cc,
+            torch.tensor(
+                [self.ns * self.nloc // 3] * 3, dtype=torch.long, device=env.DEVICE
+            ),
+            rtol=self.prec,
+            atol=self.prec,
+        )
diff --git a/source/tests/pt/model/test_null_input.py b/source/tests/pt/model/test_null_input.py
new file mode 100644
index 0000000000..d5cf2475fb
--- /dev/null
+++ b/source/tests/pt/model/test_null_input.py
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+    get_zbl_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_zbl,
+)
+
+dtype = torch.float64
+
+
+class NullTest:
+    def test_nloc_1(
+        self,
+    ):
+        natoms = 1
+        # torch.manual_seed(1000)
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        # large box to exclude images
+        cell = (cell + cell.T) + 100.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        atype = torch.tensor([0], dtype=torch.int32, device=env.DEVICE)
+        test_keys = ["energy", "force", "virial"]
+        result = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        ret0 = {key: result[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        expect_e_shape = [1]
+        expect_f = torch.zeros([natoms, 3], dtype=dtype, device=env.DEVICE)
+        expect_v = torch.zeros([9], dtype=dtype, device=env.DEVICE)
+        self.assertEqual(list(ret0["energy"].shape), expect_e_shape)
+        self.assertFalse(np.isnan(to_numpy_array(ret0["energy"])[0]))
+        torch.testing.assert_close(ret0["force"], expect_f, rtol=prec, atol=prec)
+        if not hasattr(self, "test_virial") or self.test_virial:
+            torch.testing.assert_close(ret0["virial"], expect_v, rtol=prec, atol=prec)
+
+    def test_nloc_2_far(
+        self,
+    ):
+        natoms = 2
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        # large box to exclude images
+        cell = (cell + cell.T) + 3000.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([1, 3], dtype=dtype, device=env.DEVICE)
+        # 2 far-away atoms
+        coord = torch.cat([coord, coord + 100.0], dim=0)
+        atype = torch.tensor([0, 2], dtype=torch.int32, device=env.DEVICE)
+        test_keys = ["energy", "force", "virial"]
+        result = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        ret0 = {key: result[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        expect_e_shape = [1]
+        expect_f = torch.zeros([natoms, 3], dtype=dtype, device=env.DEVICE)
+        expect_v = torch.zeros([9], dtype=dtype, device=env.DEVICE)
+        self.assertEqual(list(ret0["energy"].shape), expect_e_shape)
+        self.assertFalse(np.isnan(to_numpy_array(ret0["energy"])[0]))
+        torch.testing.assert_close(ret0["force"], expect_f, rtol=prec, atol=prec)
+        if not hasattr(self, "test_virial") or self.test_virial:
+            torch.testing.assert_close(ret0["virial"], expect_v, rtol=prec, atol=prec)
+
+
+class TestEnergyModelSeA(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, NullTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_zbl_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pt/model/test_pairtab_atomic_model.py b/source/tests/pt/model/test_pairtab_atomic_model.py
new file mode 100644
index 0000000000..165e3dead7
--- /dev/null
+++ b/source/tests/pt/model/test_pairtab_atomic_model.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest.mock import (
+    patch,
+)
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.atomic_model import PairTabAtomicModel as DPPairTabAtomicModel
+from deepmd.pt.model.atomic_model import (
+    PairTabAtomicModel,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+
+class TestPairTab(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def setUp(self, mock_loadtxt) -> None:
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0, 2.0, 3.0],
+                [0.01, 0.8, 1.6, 2.4],
+                [0.015, 0.5, 1.0, 1.5],
+                [0.02, 0.25, 0.4, 0.75],
+            ]
+        )
+
+        self.model = PairTabAtomicModel(
+            tab_file=file_path, rcut=0.02, sel=2, type_map=["H", "O"]
+        )
+
+        self.extended_coord = torch.tensor(
+            [
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.02, 0.01, 0.01],
+                ],
+                [
+                    [0.01, 0.01, 0.01],
+                    [0.01, 0.02, 0.01],
+                    [0.01, 0.01, 0.02],
+                    [0.05, 0.01, 0.01],
+                ],
+            ],
+            device=env.DEVICE,
+        )
+
+        # nframes=2, nall=4
+        self.extended_atype = torch.tensor(
+            [[0, 1, 0, 1], [0, 0, 1, 1]], device=env.DEVICE
+        )
+
+        # nframes=2, nloc=2, nnei=2
+        self.nlist = torch.tensor(
+            [[[1, 2], [0, 2]], [[1, 2], [0, 3]]], device=env.DEVICE
+        )
+
+    def test_without_mask(self):
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = torch.tensor(
+            [[[1.2000], [1.3614]], [[1.2000], [0.4000]]],
+            dtype=torch.float64,
+            device=env.DEVICE,
+        )
+
+        torch.testing.assert_close(
+            result["energy"], expected_result, rtol=0.0001, atol=0.0001
+        )
+
+    def test_with_mask(self):
+        self.nlist = torch.tensor(
+            [[[1, -1], [0, 2]], [[1, 2], [0, 3]]], device=env.DEVICE
+        )
+
+        result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = torch.tensor(
+            [[[0.8000], [1.3614]], [[1.2000], [0.4000]]],
+            dtype=torch.float64,
+            device=env.DEVICE,
+        )
+
+        torch.testing.assert_close(
+            result["energy"], expected_result, rtol=0.0001, atol=0.0001
+        )
+
+    def test_jit(self):
+        model = torch.jit.script(self.model)
+        # atomic model no more export methods
+        # self.assertEqual(model.get_rcut(), 0.02)
+        # self.assertEqual(model.get_type_map(), ["H", "O"])
+
+    def test_deserialize(self):
+        model1 = PairTabAtomicModel.deserialize(self.model.serialize())
+        torch.testing.assert_close(self.model.tab_data, model1.tab_data)
+        torch.testing.assert_close(self.model.tab_info, model1.tab_info)
+
+        self.nlist = torch.tensor(
+            [[[1, -1], [0, 2]], [[1, 2], [0, 3]]], device=env.DEVICE
+        )
+        result = model1.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+        expected_result = self.model.forward_atomic(
+            self.extended_coord, self.extended_atype, self.nlist
+        )
+
+        torch.testing.assert_close(
+            result["energy"], expected_result["energy"], rtol=0.0001, atol=0.0001
+        )
+
+        model1 = torch.jit.script(model1)
+        # atomic model no more export methods
+        # self.assertEqual(model1.get_rcut(), 0.02)
+        # self.assertEqual(model1.get_type_map(), ["H", "O"])
+
+    def test_cross_deserialize(self):
+        model_dict = self.model.serialize()  # pytorch model to dict
+        model1 = DPPairTabAtomicModel.deserialize(model_dict)  # dict to numpy model
+        np.testing.assert_allclose(self.model.tab_data, model1.tab_data)
+        np.testing.assert_allclose(self.model.tab_info, model1.tab_info)
+
+        self.nlist = np.array([[[1, -1], [0, 2]], [[1, 2], [0, 3]]])
+        result = model1.forward_atomic(
+            self.extended_coord.cpu().numpy(),
+            self.extended_atype.cpu().numpy(),
+            self.nlist,
+        )
+        expected_result = self.model.forward_atomic(
+            self.extended_coord,
+            self.extended_atype,
+            torch.from_numpy(self.nlist).to(device=env.DEVICE),
+        )
+        np.testing.assert_allclose(
+            result["energy"], to_numpy_array(expected_result["energy"]), 0.0001, 0.0001
+        )
+
+
+class TestPairTabTwoAtoms(unittest.TestCase):
+    @patch("numpy.loadtxt")
+    def test_extrapolation_nonzero_rmax(self, mock_loadtxt) -> None:
+        """Scenarios to test.
+
+        rcut < rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut == rmax:
+            rr < rcut: use table values, or interpolate.
+            rr == rcut: use table values, or interpolate.
+            rr > rcut: should be 0
+        rcut > rmax:
+            rr < rmax: use table values, or interpolate.
+            rr == rmax: use table values, or interpolate.
+            rmax < rr < rcut: extrapolate
+            rr >= rcut: should be 0
+
+        """
+        file_path = "dummy_path"
+        mock_loadtxt.return_value = np.array(
+            [
+                [0.005, 1.0],
+                [0.01, 0.8],
+                [0.015, 0.5],
+                [0.02, 0.25],
+            ]
+        )
+
+        # nframes=1, nall=2
+        extended_atype = torch.tensor([[0, 0]], device=env.DEVICE)
+
+        # nframes=1, nloc=2, nnei=1
+        nlist = torch.tensor([[[1], [-1]]], device=env.DEVICE)
+
+        results = []
+
+        for dist, rcut in zip(
+            [
+                0.01,
+                0.015,
+                0.020,
+                0.015,
+                0.02,
+                0.021,
+                0.015,
+                0.02,
+                0.021,
+                0.025,
+                0.026,
+                0.025,
+                0.025,
+                0.0216161,
+            ],
+            [
+                0.015,
+                0.015,
+                0.015,
+                0.02,
+                0.02,
+                0.02,
+                0.022,
+                0.022,
+                0.022,
+                0.025,
+                0.025,
+                0.03,
+                0.035,
+                0.025,
+            ],
+        ):
+            extended_coord = torch.tensor(
+                [
+                    [
+                        [0.0, 0.0, 0.0],
+                        [0.0, dist, 0.0],
+                    ],
+                ],
+                device=env.DEVICE,
+            )
+
+            model = PairTabAtomicModel(
+                tab_file=file_path, rcut=rcut, sel=2, type_map=["H"]
+            )
+            results.append(
+                model.forward_atomic(extended_coord, extended_atype, nlist)["energy"]
+            )
+
+        expected_result = torch.stack(
+            [
+                torch.tensor(
+                    [
+                        [
+                            [0.4, 0],
+                            [0.0, 0],
+                            [0.0, 0],
+                            [0.25, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.25, 0],
+                            [0.125, 0],
+                            [0.0922, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0, 0],
+                            [0.0923, 0],
+                            [0.0713, 0],
+                        ]
+                    ],
+                    dtype=torch.float64,
+                    device=env.DEVICE,
+                )
+            ]
+        ).reshape(14, 2)
+        results = torch.stack(results).reshape(14, 2)
+
+        torch.testing.assert_close(results, expected_result, rtol=0.0001, atol=0.0001)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
new file mode 100644
index 0000000000..5e395eb8c0
--- /dev/null
+++ b/source/tests/pt/model/test_permutation.py
@@ -0,0 +1,394 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+dtype = torch.float64
+
+model_se_e2_a = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+}
+
+model_dos = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+        "type": "dos",
+        "numb_dos": 250,
+    },
+    "data_stat_nbatch": 20,
+}
+
+model_zbl = {
+    "type_map": ["O", "H", "B"],
+    "use_srtab": "source/tests/pt/model/water/data/zbl_tab_potential/H2O_tab_potential.txt",
+    "smin_alpha": 0.1,
+    "sw_rmin": 0.2,
+    "sw_rmax": 1.0,
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+}
+
+model_spin = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [46, 92, 4],
+        "rcut_smth": 0.50,
+        "rcut": 4.00,
+        "neuron": [25, 50, 100],
+        "resnet_dt": False,
+        "axis_neuron": 16,
+        "seed": 1,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+    "data_stat_nbatch": 20,
+    "spin": {
+        "use_spin": [True, False, False],
+        "virtual_scale": [0.3140],
+        "_comment": " that's all",
+    },
+}
+
+model_dpa2 = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "dpa2",
+        "repinit_rcut": 6.0,
+        "repinit_rcut_smth": 2.0,
+        "repinit_nsel": 30,
+        "repformer_rcut": 4.0,
+        "repformer_rcut_smth": 0.5,
+        "repformer_nsel": 20,
+        "repinit_neuron": [2, 4, 8],
+        "repinit_axis_neuron": 4,
+        "repinit_activation": "tanh",
+        "repformer_nlayers": 12,
+        "repformer_g1_dim": 8,
+        "repformer_g2_dim": 5,
+        "repformer_attn2_hidden": 3,
+        "repformer_attn2_nhead": 1,
+        "repformer_attn1_hidden": 5,
+        "repformer_attn1_nhead": 1,
+        "repformer_axis_dim": 4,
+        "repformer_update_h2": False,
+        "repformer_update_g1_has_conv": True,
+        "repformer_update_g1_has_grrg": True,
+        "repformer_update_g1_has_drrd": True,
+        "repformer_update_g1_has_attn": True,
+        "repformer_update_g2_has_g1g1": True,
+        "repformer_update_g2_has_attn": True,
+        "repformer_attn2_has_gate": True,
+        "repformer_add_type_ebd_to_seq": False,
+    },
+    "fitting_net": {
+        "neuron": [24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+}
+
+model_dpa1 = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "se_atten",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [25, 50, 100],
+        "axis_neuron": 16,
+        "attn": 64,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "post_ln": True,
+        "ffn": False,
+        "ffn_embed_dim": 512,
+        "activation_function": "tanh",
+        "scaling_factor": 1.0,
+        "head_num": 1,
+        "normalize": False,
+        "temperature": 1.0,
+        "set_davg_zero": True,
+        "type_one_side": True,
+    },
+    "fitting_net": {
+        "neuron": [24, 24, 24],
+        "resnet_dt": True,
+        "seed": 1,
+    },
+}
+
+
+model_hybrid = {
+    "type_map": ["O", "H", "B"],
+    "descriptor": {
+        "type": "hybrid",
+        "list": [
+            {
+                "type": "se_atten",
+                "sel": 120,
+                "rcut_smth": 0.5,
+                "rcut": 6.0,
+                "neuron": [25, 50, 100],
+                "axis_neuron": 16,
+                "attn": 128,
+                "attn_layer": 0,
+                "attn_dotr": True,
+                "attn_mask": False,
+                "post_ln": True,
+                "ffn": False,
+                "ffn_embed_dim": 1024,
+                "activation_function": "tanh",
+                "scaling_factor": 1.0,
+                "head_num": 1,
+                "normalize": True,
+                "temperature": 1.0,
+            },
+            {
+                "type": "dpa2",
+                "repinit_rcut": 6.0,
+                "repinit_rcut_smth": 2.0,
+                "repinit_nsel": 30,
+                "repformer_rcut": 4.0,
+                "repformer_rcut_smth": 0.5,
+                "repformer_nsel": 10,
+                "repinit_neuron": [2, 4, 8],
+                "repinit_axis_neuron": 4,
+                "repinit_activation": "tanh",
+                "repformer_nlayers": 12,
+                "repformer_g1_dim": 8,
+                "repformer_g2_dim": 5,
+                "repformer_attn2_hidden": 3,
+                "repformer_attn2_nhead": 1,
+                "repformer_attn1_hidden": 5,
+                "repformer_attn1_nhead": 1,
+                "repformer_axis_dim": 4,
+                "repformer_update_h2": False,
+                "repformer_update_g1_has_conv": True,
+                "repformer_update_g1_has_grrg": True,
+                "repformer_update_g1_has_drrd": True,
+                "repformer_update_g1_has_attn": True,
+                "repformer_update_g2_has_g1g1": True,
+                "repformer_update_g2_has_attn": True,
+                "repformer_attn2_has_gate": True,
+                "repformer_add_type_ebd_to_seq": False,
+            },
+        ],
+    },
+    "fitting_net": {
+        "neuron": [240, 240, 240],
+        "resnet_dt": True,
+        "seed": 1,
+        "_comment": " that's all",
+    },
+    "_comment": " that's all",
+}
+
+
+class PermutationTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        spin = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord[idx_perm].unsqueeze(0),
+            cell.unsqueeze(0),
+            atype[idx_perm],
+            spins=spin[idx_perm].unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        for key in test_keys:
+            if key in ["energy"]:
+                torch.testing.assert_close(ret0[key], ret1[key], rtol=prec, atol=prec)
+            elif key in ["force", "force_mag"]:
+                torch.testing.assert_close(
+                    ret0[key][idx_perm], ret1[key], rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    torch.testing.assert_close(
+                        ret0[key], ret1[key], rtol=prec, atol=prec
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, PermutationTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+# class TestEnergyFoo(unittest.TestCase):
+#   def test(self):
+#     model_params = model_dpau
+#     self.model = EnergyModelDPAUni(model_params).to(env.DEVICE)
+
+#     natoms = 5
+#     cell = torch.rand([3, 3], dtype=dtype)
+#     cell = (cell + cell.T) + 5. * torch.eye(3)
+#     coord = torch.rand([natoms, 3], dtype=dtype)
+#     coord = torch.matmul(coord, cell)
+#     atype = torch.IntTensor([0, 0, 0, 1, 1])
+#     idx_perm = [1, 0, 4, 3, 2]
+#     ret0 = infer_model(self.model, coord, cell, atype, type_split=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_permutation_denoise.py b/source/tests/pt/model/test_permutation_denoise.py
new file mode 100644
index 0000000000..3b6be0c495
--- /dev/null
+++ b/source/tests/pt/model/test_permutation_denoise.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation import (  # model_dpau,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+)
+
+dtype = torch.float64
+
+model_dpa1 = copy.deepcopy(model_dpa1)
+model_dpa2 = copy.deepcopy(model_dpa2)
+model_hybrid = copy.deepcopy(model_hybrid)
+model_dpa1["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_dpa1.pop("fitting_net")
+model_dpa2["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_dpa2.pop("fitting_net")
+model_hybrid["type_map"] = ["O", "H", "B", "MASKED_TOKEN"]
+model_hybrid.pop("fitting_net")
+
+
+class PermutationDenoiseTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3).to(env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        updated_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": updated_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        updated_c1, logits1 = eval_model(
+            self.model,
+            coord[idx_perm].unsqueeze(0),
+            cell.unsqueeze(0),
+            atype[idx_perm],
+            denoise=True,
+        )
+        ret1 = {"updated_coord": updated_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        prec = 1e-10
+        torch.testing.assert_close(
+            ret0["updated_coord"][idx_perm], ret1["updated_coord"], rtol=prec, atol=prec
+        )
+        torch.testing.assert_close(
+            ret0["logits"][idx_perm], ret1["logits"], rtol=prec, atol=prec
+        )
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, PermutationDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, PermutationDenoiseTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(
+            model_params,
+        ).to(env.DEVICE)
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestDenoiseModelHybrid(unittest.TestCase, TestPermutationDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py
new file mode 100644
index 0000000000..3d72c6e8fa
--- /dev/null
+++ b/source/tests/pt/model/test_polar_stat.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.model.task.polarizability import (
+    PolarFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.tf.fit.polar import (
+    PolarFittingSeA,
+)
+
+
+class TestConsistency(unittest.TestCase):
+    def setUp(self) -> None:
+        types = torch.randint(0, 4, (1, 5), device=env.DEVICE)
+        types = torch.cat((types, types, types), dim=0)
+        types[:, -1] = 3
+        ntypes = 4
+        atomic_polarizability = torch.rand((3, 5, 9), device=env.DEVICE)
+        polarizability = torch.rand((3, 9), device=env.DEVICE)
+        find_polarizability = torch.rand(1, device=env.DEVICE)
+        find_atomic_polarizability = torch.rand(1, device=env.DEVICE)
+        self.sampled = [
+            {
+                "atype": types,
+                "find_atomic_polarizability": find_atomic_polarizability,
+                "atomic_polarizability": atomic_polarizability,
+                "polarizability": polarizability,
+                "find_polarizability": find_polarizability,
+            }
+        ]
+        self.all_stat = {
+            k: [v.numpy(force=True)] for d in self.sampled for k, v in d.items()
+        }
+        self.all_stat["type"] = self.all_stat.pop("atype")
+        self.tfpolar = PolarFittingSeA(
+            ntypes=ntypes,
+            dim_descrpt=1,
+            embedding_width=1,
+            sel_type=list(range(ntypes)),
+        )
+        self.ptpolar = PolarFittingNet(
+            ntypes=ntypes,
+            dim_descrpt=1,
+            embedding_width=1,
+        )
+
+    def test_atomic_consistency(self):
+        self.tfpolar.compute_output_stats(self.all_stat)
+        tfbias = self.tfpolar.constant_matrix
+        self.ptpolar.compute_output_stats(self.sampled)
+        ptbias = self.ptpolar.constant_matrix
+        np.testing.assert_allclose(tfbias, to_numpy_array(ptbias))
+
+    def test_global_consistency(self):
+        self.sampled[0]["find_atomic_polarizability"] = -1
+        self.sampled[0]["polarizability"] = self.sampled[0][
+            "atomic_polarizability"
+        ].sum(dim=1)
+        self.all_stat["find_atomic_polarizability"] = [-1]
+        self.all_stat["polarizability"] = [
+            self.all_stat["atomic_polarizability"][0].sum(axis=1)
+        ]
+        self.tfpolar.compute_output_stats(self.all_stat)
+        tfbias = self.tfpolar.constant_matrix
+        self.ptpolar.compute_output_stats(self.sampled)
+        ptbias = self.ptpolar.constant_matrix
+        np.testing.assert_allclose(tfbias, to_numpy_array(ptbias), rtol=1e-5, atol=1e-5)
diff --git a/source/tests/pt/model/test_polarizability_fitting.py b/source/tests/pt/model/test_polarizability_fitting.py
new file mode 100644
index 0000000000..b1a5e3f730
--- /dev/null
+++ b/source/tests/pt/model/test_polarizability_fitting.py
@@ -0,0 +1,353 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import os
+import unittest
+
+import numpy as np
+import torch
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.dpmodel.fitting import PolarFitting as DPPolarFitting
+from deepmd.infer.deep_polar import (
+    DeepPolar,
+)
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.model.polar_model import (
+    PolarModel,
+)
+from deepmd.pt.model.task.polarizability import (
+    PolarFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestDipoleFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+        self.rng = np.random.default_rng()
+        self.nf, self.nloc, _ = self.nlist.shape
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.scale = self.rng.uniform(0, 1, self.nt).tolist()
+
+    def test_consistency(
+        self,
+    ):
+        rd0, gr, _, _, _ = self.dd0(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(
+            self.atype_ext[:, : self.nloc], dtype=int, device=env.DEVICE
+        )
+
+        for mixed_types, nfp, nap, fit_diag, scale in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [True, False],
+            [None, self.scale],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            ft1 = DPPolarFitting.deserialize(ft0.serialize())
+            ft2 = PolarFittingNet.deserialize(ft0.serialize())
+            ft3 = DPPolarFitting.deserialize(ft1.serialize())
+
+            if nfp > 0:
+                ifp = torch.tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    self.rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, gr, fparam=ifp, aparam=iap)
+            ret3 = ft3(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                gr.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polar"]),
+                ret1["polar"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polar"]),
+                to_numpy_array(ret2["polar"]),
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["polar"]),
+                ret3["polar"],
+            )
+
+    def test_jit(
+        self,
+    ):
+        for mixed_types, nfp, nap, fit_diag in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [True, False],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=mixed_types,
+                fit_diag=fit_diag,
+            ).to(env.DEVICE)
+            torch.jit.script(ft0)
+
+
+class TestEquivalence(unittest.TestCase):
+    def setUp(self) -> None:
+        self.natoms = 5
+        self.rcut = 4
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.nt = 3
+        self.rng = np.random.default_rng()
+        self.coord = 2 * torch.rand([self.natoms, 3], dtype=dtype, device=env.DEVICE)
+        self.shift = torch.tensor([4, 4, 4], dtype=dtype, device=env.DEVICE)
+        self.atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        self.cell = (self.cell + self.cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        self.scale = self.rng.uniform(0, 1, self.nt).tolist()
+
+    def test_rot(self):
+        atype = self.atype.reshape(1, 5)
+        rmat = torch.tensor(special_ortho_group.rvs(3), dtype=dtype, device=env.DEVICE)
+        coord_rot = torch.matmul(self.coord, rmat)
+
+        for mixed_types, nfp, nap, fit_diag, scale in itertools.product(
+            [True, False],
+            [0, 3],
+            [0, 4],
+            [True, False],
+            [None, self.scale],
+        ):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,  # dim_descrpt
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                mixed_types=True,
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            if nfp > 0:
+                ifp = torch.tensor(
+                    self.rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    self.rng.normal(size=(self.nf, self.natoms, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            res = []
+            for xyz in [self.coord, coord_rot]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz + self.shift, atype, self.rcut, self.sel, mixed_types
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, extended_atype, gr0, fparam=ifp, aparam=iap)
+                res.append(ret0["polar"])
+            np.testing.assert_allclose(
+                to_numpy_array(res[1]),
+                to_numpy_array(
+                    torch.matmul(
+                        rmat.T,
+                        torch.matmul(res[0], rmat),
+                    )
+                ),
+            )
+
+    def test_permu(self):
+        coord = torch.matmul(self.coord, self.cell)
+        for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=0,
+                numb_aparam=0,
+                mixed_types=True,
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            res = []
+            for idx_perm in [[0, 1, 2, 3, 4], [1, 0, 4, 3, 2]]:
+                atype = self.atype[idx_perm].reshape(1, 5)
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    coord[idx_perm], atype, self.rcut, self.sel, False
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, extended_atype, gr0, fparam=None, aparam=None)
+                res.append(ret0["polar"])
+
+            np.testing.assert_allclose(
+                to_numpy_array(res[0][:, idx_perm]),
+                to_numpy_array(res[1]),
+            )
+
+    def test_trans(self):
+        atype = self.atype.reshape(1, 5)
+        coord_s = torch.matmul(
+            torch.remainder(
+                torch.matmul(self.coord + self.shift, torch.linalg.inv(self.cell)), 1.0
+            ),
+            self.cell,
+        )
+        for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
+            ft0 = PolarFittingNet(
+                self.nt,
+                self.dd0.dim_out,
+                embedding_width=self.dd0.get_dim_emb(),
+                numb_fparam=0,
+                numb_aparam=0,
+                mixed_types=True,
+                fit_diag=fit_diag,
+                scale=scale,
+            ).to(env.DEVICE)
+            res = []
+            for xyz in [self.coord, coord_s]:
+                (
+                    extended_coord,
+                    extended_atype,
+                    _,
+                    nlist,
+                ) = extend_input_and_build_neighbor_list(
+                    xyz, atype, self.rcut, self.sel, False
+                )
+
+                rd0, gr0, _, _, _ = self.dd0(
+                    extended_coord,
+                    extended_atype,
+                    nlist,
+                )
+
+                ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
+                res.append(ret0["polar"])
+
+            np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
+
+
+class TestDipoleModel(unittest.TestCase):
+    def setUp(self):
+        self.natoms = 5
+        self.rcut = 4.0
+        self.nt = 3
+        self.rcut_smth = 0.5
+        self.sel = [46, 92, 4]
+        self.nf = 1
+        self.coord = 2 * torch.rand([self.natoms, 3], dtype=dtype, device="cpu")
+        cell = torch.rand([3, 3], dtype=dtype, device="cpu")
+        self.cell = (cell + cell.T) + 5.0 * torch.eye(3, device="cpu")
+        self.atype = torch.IntTensor([0, 0, 0, 1, 1], device="cpu")
+        self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        self.ft0 = PolarFittingNet(
+            self.nt,
+            self.dd0.dim_out,
+            embedding_width=self.dd0.get_dim_emb(),
+            numb_fparam=0,
+            numb_aparam=0,
+            mixed_types=True,
+        ).to(env.DEVICE)
+        self.type_mapping = ["O", "H", "B"]
+        self.model = PolarModel(self.dd0, self.ft0, self.type_mapping)
+        self.file_path = "model_output.pth"
+
+    def test_deepdipole_infer(self):
+        atype = self.atype.view(self.nf, self.natoms)
+        coord = self.coord.reshape(1, 5, 3)
+        cell = self.cell.reshape(1, 9)
+        jit_md = torch.jit.script(self.model)
+        torch.jit.save(jit_md, self.file_path)
+        load_md = DeepPolar(self.file_path)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=True)
+        load_md.eval(coords=coord, atom_types=atype, cells=cell, atomic=False)
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_region.py b/source/tests/pt/model/test_region.py
new file mode 100644
index 0000000000..b06f4221fd
--- /dev/null
+++ b/source/tests/pt/model/test_region.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.preprocess import (
+    Region3D,
+)
+from deepmd.pt.utils.region import (
+    inter2phys,
+    to_face_distance,
+)
+
+dtype = torch.float64
+
+
+class TestRegion(unittest.TestCase):
+    def setUp(self):
+        self.cell = torch.tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, device="cpu"
+        )
+        self.cell = self.cell.unsqueeze(0).unsqueeze(0)
+        self.cell = torch.tile(self.cell, [4, 5, 1, 1])
+        self.prec = 1e-8
+
+    def test_inter_to_phys(self):
+        inter = torch.rand([4, 5, 3, 3], dtype=dtype, device="cpu")
+        phys = inter2phys(inter, self.cell)
+        for ii in range(4):
+            for jj in range(5):
+                expected_phys = torch.matmul(inter[ii, jj], self.cell[ii, jj])
+                torch.testing.assert_close(
+                    phys[ii, jj], expected_phys, rtol=self.prec, atol=self.prec
+                )
+
+    def test_to_face_dist(self):
+        cell0 = self.cell[0][0].numpy()
+        vol = np.linalg.det(cell0)
+        # area of surfaces xy, xz, yz
+        sxy = np.linalg.norm(np.cross(cell0[0], cell0[1]))
+        sxz = np.linalg.norm(np.cross(cell0[0], cell0[2]))
+        syz = np.linalg.norm(np.cross(cell0[1], cell0[2]))
+        # vol / area gives distance
+        dz = vol / sxy
+        dy = vol / sxz
+        dx = vol / syz
+        expected = torch.tensor([dx, dy, dz], device="cpu")
+        dists = to_face_distance(self.cell)
+        for ii in range(4):
+            for jj in range(5):
+                torch.testing.assert_close(
+                    dists[ii][jj], expected, rtol=self.prec, atol=self.prec
+                )
+
+
+class TestLegacyRegion(unittest.TestCase):
+    def setUp(self):
+        self.cell = torch.tensor(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=dtype, device=env.DEVICE
+        )
+        self.prec = 1e-6
+
+    def test_inter_to_phys(self):
+        inter = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        reg = Region3D(self.cell)
+        phys = reg.inter2phys(inter)
+        expected_phys = torch.matmul(inter, self.cell)
+        torch.testing.assert_close(phys, expected_phys, rtol=self.prec, atol=self.prec)
+
+    def test_inter_to_inter(self):
+        inter = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        reg = Region3D(self.cell)
+        new_inter = reg.phys2inter(reg.inter2phys(inter))
+        torch.testing.assert_close(inter, new_inter, rtol=self.prec, atol=self.prec)
+
+    def test_to_face_dist(self):
+        pass
diff --git a/source/tests/pt/model/test_rot.py b/source/tests/pt/model/test_rot.py
new file mode 100644
index 0000000000..cbf09ecf40
--- /dev/null
+++ b/source/tests/pt/model/test_rot.py
@@ -0,0 +1,219 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = torch.float64
+
+
+class RotTest:
+    def test(
+        self,
+    ):
+        prec = 1e-10
+        natoms = 5
+        cell = 10.0 * torch.eye(3, dtype=dtype, device=env.DEVICE)
+        coord = 2 * torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        spin = 2 * torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        shift = torch.tensor([4, 4, 4], dtype=dtype, device=env.DEVICE)
+        atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        from scipy.stats import (
+            special_ortho_group,
+        )
+
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag"]
+        rmat = torch.tensor(special_ortho_group.rvs(3), dtype=dtype, device=env.DEVICE)
+
+        # rotate only coord and shift to the center of cell
+        coord_rot = torch.matmul(coord, rmat)
+        spin_rot = torch.matmul(spin, rmat)
+        result_0 = eval_model(
+            self.model,
+            (coord + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            (coord_rot + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin_rot.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        for key in test_keys:
+            if key in ["energy"]:
+                torch.testing.assert_close(ret0[key], ret1[key], rtol=prec, atol=prec)
+            elif key in ["force", "force_mag"]:
+                torch.testing.assert_close(
+                    torch.matmul(ret0[key], rmat), ret1[key], rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    torch.testing.assert_close(
+                        torch.matmul(
+                            rmat.T, torch.matmul(ret0[key].view([3, 3]), rmat)
+                        ),
+                        ret1[key].view([3, 3]),
+                        rtol=prec,
+                        atol=prec,
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+        # rotate coord and cell
+        torch.manual_seed(0)
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        spin = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        coord_rot = torch.matmul(coord, rmat)
+        spin_rot = torch.matmul(spin, rmat)
+        cell_rot = torch.matmul(cell, rmat)
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord_rot.unsqueeze(0),
+            cell_rot.unsqueeze(0),
+            atype,
+            spins=spin_rot.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        for key in test_keys:
+            if key in ["energy"]:
+                torch.testing.assert_close(ret0[key], ret1[key], rtol=prec, atol=prec)
+            elif key in ["force", "force_mag"]:
+                torch.testing.assert_close(
+                    torch.matmul(ret0[key], rmat), ret1[key], rtol=prec, atol=prec
+                )
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    torch.testing.assert_close(
+                        torch.matmul(
+                            rmat.T, torch.matmul(ret0[key].view([3, 3]), rmat)
+                        ),
+                        ret1[key].view([3, 3]),
+                        rtol=prec,
+                        atol=prec,
+                    )
+
+
+class TestEnergyModelSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, RotTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_rot_denoise.py b/source/tests/pt/model/test_rot_denoise.py
new file mode 100644
index 0000000000..e4ae02f630
--- /dev/null
+++ b/source/tests/pt/model/test_rot_denoise.py
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation_denoise import (
+    model_dpa1,
+    model_dpa2,
+)
+
+dtype = torch.float64
+
+
+class RotDenoiseTest:
+    def test(
+        self,
+    ):
+        prec = 1e-10
+        natoms = 5
+        cell = 10.0 * torch.eye(3, dtype=dtype).to(env.DEVICE)
+        coord = 2 * torch.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        shift = torch.tensor([4, 4, 4], dtype=dtype).to(env.DEVICE)
+        atype = torch.IntTensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        from scipy.stats import (
+            special_ortho_group,
+        )
+
+        rmat = torch.tensor(special_ortho_group.rvs(3), dtype=dtype).to(env.DEVICE)
+
+        # rotate only coord and shift to the center of cell
+        coord_rot = torch.matmul(coord, rmat)
+        update_c0, logits0 = eval_model(
+            self.model,
+            (coord + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        update_c0 = update_c0 - (coord + shift).unsqueeze(0)
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model,
+            (coord_rot + shift).unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        update_c1 = update_c1 - (coord_rot + shift).unsqueeze(0)
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        torch.testing.assert_close(
+            torch.matmul(ret0["updated_coord"], rmat),
+            ret1["updated_coord"],
+            rtol=prec,
+            atol=prec,
+        )
+        torch.testing.assert_close(ret0["logits"], ret1["logits"], rtol=prec, atol=prec)
+
+        # rotate coord and cell
+        torch.manual_seed(0)
+        cell = torch.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3).to(env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        coord_rot = torch.matmul(coord, rmat)
+        cell_rot = torch.matmul(cell, rmat)
+        update_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model,
+            coord_rot.unsqueeze(0),
+            cell_rot.unsqueeze(0),
+            atype,
+            denoise=True,
+        )
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        torch.testing.assert_close(ret0["logits"], ret1["logits"], rtol=prec, atol=prec)
+        torch.testing.assert_close(
+            torch.matmul(ret0["updated_coord"], rmat),
+            ret1["updated_coord"],
+            rtol=prec,
+            atol=prec,
+        )
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, RotDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, RotDenoiseTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestEnergyModelHybrid(unittest.TestCase, TestRotDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_rotation.py b/source/tests/pt/model/test_rotation.py
new file mode 100644
index 0000000000..caa6385c80
--- /dev/null
+++ b/source/tests/pt/model/test_rotation.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import unittest
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+)
+
+import numpy as np
+import torch
+from scipy.stats import (
+    special_ortho_group,
+)
+
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
+
+
+class CheckSymmetry(DeepmdData):
+    def __init__(
+        self,
+        sys_path: str,
+        type_map: Optional[List[str]] = None,
+    ):
+        super().__init__(sys_path=sys_path, type_map=type_map)
+        self.add("energy", 1, atomic=False, must=False, high_prec=True)
+        self.add("force", 3, atomic=True, must=False, high_prec=False)
+        self.add("virial", 9, atomic=False, must=False, high_prec=False)
+
+    def get_rotation(self, index, rotation_matrix):
+        for i in range(
+            0, len(self.dirs) + 1
+        ):  # note: if different sets can be merged, prefix sum is unused to calculate
+            if index < self.prefix_sum[i]:
+                break
+        frames = self._load_set(self.dirs[i - 1])
+        frames["coord"] = np.dot(
+            rotation_matrix, frames["coord"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frames["box"] = np.dot(
+            rotation_matrix, frames["box"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frames["force"] = np.dot(
+            rotation_matrix, frames["force"].reshape(-1, 3).T
+        ).T.reshape(self.nframes, -1)
+        frame = self._get_subdata(frames, index - self.prefix_sum[i - 1])
+        frame = self.reformat_data_torch(frame)
+        return frame
+
+
+def get_data(batch):
+    inputs = {}
+    for key in ["coord", "atype", "box"]:
+        inputs[key] = torch.as_tensor(batch[key], device=env.DEVICE)
+        inputs[key] = inputs[key].unsqueeze(0).to(env.DEVICE)
+    return inputs
+
+
+class TestRotation(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.rotation = special_ortho_group.rvs(3)
+        with torch.device("cpu"):
+            self.get_dataset(0)
+        self.get_model()
+
+    def get_model(self):
+        self.model = get_model(self.config["model"]).to(env.DEVICE)
+
+    def get_dataset(self, system_index=0, batch_index=0):
+        systems = self.config["training"]["training_data"]["systems"]
+        type_map = self.config["model"]["type_map"]
+        dpdatasystem = CheckSymmetry(sys_path=systems[system_index], type_map=type_map)
+        self.origin_batch = dpdatasystem.get_item_torch(batch_index)
+        self.rotated_batch = dpdatasystem.get_rotation(batch_index, self.rotation)
+
+    def test_rotation(self):
+        result1 = self.model(**get_data(self.origin_batch))
+        result2 = self.model(**get_data(self.rotated_batch))
+        rotation = torch.from_numpy(self.rotation).to(env.DEVICE)
+        torch.testing.assert_close(result1["energy"], result2["energy"])
+        if "force" in result1:
+            torch.testing.assert_close(
+                result2["force"][0], torch.matmul(rotation, result1["force"][0].T).T
+            )
+        if "virial" in result1:
+            torch.testing.assert_close(
+                result2["virial"][0].view([3, 3]),
+                torch.matmul(
+                    torch.matmul(rotation, result1["virial"][0].view([3, 3]).T),
+                    rotation.T,
+                ),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_saveload_dpa1.py b/source/tests/pt/model/test_saveload_dpa1.py
new file mode 100644
index 0000000000..712b44485e
--- /dev/null
+++ b/source/tests/pt/model/test_saveload_dpa1.py
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import torch
+from torch.utils.data import (
+    DataLoader,
+)
+
+from deepmd.pt.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.dataloader import (
+    BufferedIterator,
+    DpLoaderSet,
+)
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+
+
+def get_dataset(config):
+    model_config = config["model"]
+    rcut = model_config["descriptor"]["rcut"]
+    sel = model_config["descriptor"]["sel"]
+    systems = config["training"]["validation_data"]["systems"]
+    if isinstance(systems, str):
+        systems = expand_sys_str(systems)
+    batch_size = config["training"]["training_data"]["batch_size"]
+    type_map = model_config["type_map"]
+
+    dataset = DpLoaderSet(systems, batch_size, type_map)
+    data_stat_nbatch = model_config.get("data_stat_nbatch", 10)
+    sampled = make_stat_input(dataset.systems, dataset.dataloaders, data_stat_nbatch)
+    return dataset, sampled
+
+
+class TestSaveLoadDPA1(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as fin:
+            self.config = json.load(fin)
+        self.config["loss"]["starter_learning_rate"] = self.config["learning_rate"][
+            "start_lr"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.dataset, self.sampled = get_dataset(self.config)
+        self.training_dataloader = DataLoader(
+            self.dataset,
+            sampler=torch.utils.data.RandomSampler(self.dataset),
+            batch_size=None,
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            drop_last=False,
+            pin_memory=True,
+        )
+        with torch.device("cpu"):
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+        self.loss = EnergyStdLoss(**self.config["loss"])
+        self.cur_lr = 1
+        self.task_key = "Default"
+        self.input_dict, self.label_dict = self.get_data()
+        self.start_lr = self.config["learning_rate"]["start_lr"]
+
+    def get_model_result(self, read=False, model_file="tmp_model.pt"):
+        wrapper = self.create_wrapper(read)
+        optimizer = torch.optim.Adam(wrapper.parameters(), lr=self.start_lr)
+        optimizer.zero_grad()
+        if read:
+            wrapper.load_state_dict(torch.load(model_file, map_location=env.DEVICE))
+            os.remove(model_file)
+        else:
+            torch.save(wrapper.state_dict(), model_file)
+        result = wrapper(
+            **self.input_dict,
+            cur_lr=self.cur_lr,
+            label=self.label_dict,
+            task_key=self.task_key,
+        )[0]
+        return result
+
+    def create_wrapper(self, read: bool):
+        model_config = copy.deepcopy(self.config["model"])
+        model_config["resuming"] = read
+        model_config["stat_file_dir"] = "stat_files"
+        model_config["stat_file"] = "stat.npz"
+        model_config["stat_file_path"] = os.path.join(
+            model_config["stat_file_dir"], model_config["stat_file"]
+        )
+        model = get_model(model_config).to(env.DEVICE)
+        return ModelWrapper(model, self.loss)
+
+    def get_data(self):
+        try:
+            batch_data = next(iter(self.training_data))
+        except StopIteration:
+            # Refresh the status of the dataloader to start from a new epoch
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+            batch_data = next(iter(self.training_data))
+        input_dict = {}
+        for item in ["coord", "atype", "box"]:
+            if item in batch_data:
+                input_dict[item] = batch_data[item].to(env.DEVICE)
+            else:
+                input_dict[item] = None
+        label_dict = {}
+        for item in ["energy", "force", "virial"]:
+            if item in batch_data:
+                label_dict[item] = batch_data[item].to(env.DEVICE)
+        return input_dict, label_dict
+
+    def test_saveload(self):
+        result1 = self.get_model_result()
+        result2 = self.get_model_result(read=True)
+        final_result = all(
+            torch.allclose(result1[item], result2[item]) for item in result1
+        )
+        self.assertTrue(final_result)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_saveload_se_e2_a.py b/source/tests/pt/model/test_saveload_se_e2_a.py
new file mode 100644
index 0000000000..56ea3283d9
--- /dev/null
+++ b/source/tests/pt/model/test_saveload_se_e2_a.py
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import torch
+from torch.utils.data import (
+    DataLoader,
+)
+
+from deepmd.pt.loss import (
+    EnergyStdLoss,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.dataloader import (
+    BufferedIterator,
+    DpLoaderSet,
+)
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+
+
+def get_dataset(config):
+    model_config = config["model"]
+    rcut = model_config["descriptor"]["rcut"]
+    sel = model_config["descriptor"]["sel"]
+    systems = config["training"]["validation_data"]["systems"]
+    if isinstance(systems, str):
+        systems = expand_sys_str(systems)
+    batch_size = config["training"]["training_data"]["batch_size"]
+    type_map = model_config["type_map"]
+
+    dataset = DpLoaderSet(systems, batch_size, type_map)
+    data_stat_nbatch = model_config.get("data_stat_nbatch", 10)
+    sampled = make_stat_input(dataset.systems, dataset.dataloaders, data_stat_nbatch)
+    return dataset, sampled
+
+
+class TestSaveLoadSeA(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_e2_a.json")
+        with open(input_json) as fin:
+            self.config = json.load(fin)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["loss"]["starter_learning_rate"] = self.config["learning_rate"][
+            "start_lr"
+        ]
+        self.dataset, self.sampled = get_dataset(self.config)
+        self.training_dataloader = DataLoader(
+            self.dataset,
+            sampler=torch.utils.data.RandomSampler(self.dataset),
+            batch_size=None,
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            drop_last=False,
+            pin_memory=True,
+        )
+        with torch.device("cpu"):
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+        self.loss = EnergyStdLoss(**self.config["loss"])
+        self.cur_lr = 1
+        self.task_key = "Default"
+        self.input_dict, self.label_dict = self.get_data()
+        self.start_lr = self.config["learning_rate"]["start_lr"]
+
+    def get_model_result(self, read=False, model_file="tmp_model.pt"):
+        wrapper = self.create_wrapper()
+        optimizer = torch.optim.Adam(wrapper.parameters(), lr=self.start_lr)
+        optimizer.zero_grad()
+        if read:
+            wrapper.load_state_dict(torch.load(model_file, map_location=env.DEVICE))
+            os.remove(model_file)
+        else:
+            torch.save(wrapper.state_dict(), model_file)
+        result = wrapper(
+            **self.input_dict,
+            cur_lr=self.cur_lr,
+            label=self.label_dict,
+            task_key=self.task_key,
+        )[0]
+        return result
+
+    def create_wrapper(self):
+        model_config = copy.deepcopy(self.config["model"])
+        model = get_model(model_config).to(env.DEVICE)
+        return ModelWrapper(model, self.loss)
+
+    def get_data(self):
+        try:
+            batch_data = next(iter(self.training_data))
+        except StopIteration:
+            # Refresh the status of the dataloader to start from a new epoch
+            self.training_data = BufferedIterator(iter(self.training_dataloader))
+            batch_data = next(iter(self.training_data))
+        input_dict = {}
+        for item in ["coord", "atype", "box"]:
+            if item in batch_data:
+                input_dict[item] = batch_data[item].to(env.DEVICE)
+            else:
+                input_dict[item] = None
+        label_dict = {}
+        for item in ["energy", "force", "virial"]:
+            if item in batch_data:
+                label_dict[item] = batch_data[item].to(env.DEVICE)
+        return input_dict, label_dict
+
+    def test_saveload(self):
+        result1 = self.get_model_result()
+        result2 = self.get_model_result(read=True)
+        final_result = all(
+            torch.allclose(result1[item], result2[item]) for item in result1
+        )
+        self.assertTrue(final_result)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_se_e2_a.py b/source/tests/pt/model/test_se_e2_a.py
new file mode 100644
index 0000000000..214fdeb00f
--- /dev/null
+++ b/source/tests/pt/model/test_se_e2_a.py
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.descriptor import DescrptSeA as DPDescrptSeA
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+# to be merged with the tf test case
+class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec, em in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+            [[], [[0, 1]], [[1, 1]]],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                old_impl=False,
+                exclude_mask=em,
+            ).to(env.DEVICE)
+            dd0.sea.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
+            dd0.sea.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
+            rd0, _, _, _, _ = dd0(
+                torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+                torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+                torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptSeA.deserialize(dd0.serialize())
+            rd1, gr1, _, _, sw1 = dd1(
+                torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+                torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+                torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]],
+                rd0.detach().cpu().numpy()[1],
+                rtol=rtol,
+                atol=atol,
+                err_msg=err_msg,
+            )
+            # dp impl
+            dd2 = DPDescrptSeA.deserialize(dd0.serialize())
+            rd2, gr2, _, _, sw2 = dd2.call(
+                self.coord_ext,
+                self.atype_ext,
+                self.nlist,
+            )
+            for aa, bb in zip([rd1, gr1, sw1], [rd2, gr2, sw2]):
+                np.testing.assert_allclose(
+                    aa.detach().cpu().numpy(),
+                    bb,
+                    rtol=rtol,
+                    atol=atol,
+                    err_msg=err_msg,
+                )
+            # old impl
+            if idt is False and prec == "float64":
+                dd3 = DescrptSeA(
+                    self.rcut,
+                    self.rcut_smth,
+                    self.sel,
+                    precision=prec,
+                    resnet_dt=idt,
+                    old_impl=True,
+                ).to(env.DEVICE)
+                dd0_state_dict = dd0.sea.state_dict()
+                dd3_state_dict = dd3.sea.state_dict()
+                for i in dd3_state_dict:
+                    dd3_state_dict[i] = (
+                        dd0_state_dict[
+                            i.replace(".deep_layers.", ".layers.").replace(
+                                "filter_layers_old.", "filter_layers.networks."
+                            )
+                        ]
+                        .detach()
+                        .clone()
+                    )
+                    if ".bias" in i:
+                        dd3_state_dict[i] = dd3_state_dict[i].unsqueeze(0)
+                dd3.sea.load_state_dict(dd3_state_dict)
+
+                rd3, gr3, _, _, sw3 = dd3(
+                    torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+                    torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+                    torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+                )
+                for aa, bb in zip([rd1, gr1, sw1], [rd3, gr3, sw3]):
+                    np.testing.assert_allclose(
+                        aa.detach().cpu().numpy(),
+                        bb.detach().cpu().numpy(),
+                        rtol=rtol,
+                        atol=atol,
+                        err_msg=err_msg,
+                    )
+
+    def test_jit(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for idt, prec in itertools.product(
+            [False, True],
+            ["float64", "float32"],
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            err_msg = f"idt={idt} prec={prec}"
+            # sea new impl
+            dd0 = DescrptSeA(
+                self.rcut,
+                self.rcut_smth,
+                self.sel,
+                precision=prec,
+                resnet_dt=idt,
+                old_impl=False,
+            )
+            dd0.sea.mean = torch.tensor(davg, dtype=dtype, device=env.DEVICE)
+            dd0.sea.dstd = torch.tensor(dstd, dtype=dtype, device=env.DEVICE)
+            dd1 = DescrptSeA.deserialize(dd0.serialize())
+            model = torch.jit.script(dd0)
+            model = torch.jit.script(dd1)
diff --git a/source/tests/pt/model/test_smooth.py b/source/tests/pt/model/test_smooth.py
new file mode 100644
index 0000000000..4f5be912cf
--- /dev/null
+++ b/source/tests/pt/model/test_smooth.py
@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = torch.float64
+
+
+class SmoothTest:
+    def test(
+        self,
+    ):
+        # displacement of atoms
+        epsilon = 1e-5 if self.epsilon is None else self.epsilon
+        # required prec. relative prec is not checked.
+        rprec = 0
+        aprec = 1e-5 if self.aprec is None else self.aprec
+
+        natoms = 10
+        cell = 8.6 * torch.eye(3, dtype=dtype, device=env.DEVICE)
+        atype = torch.randint(0, 3, [natoms], device=env.DEVICE)
+        coord0 = torch.tensor(
+            [
+                0.0,
+                0.0,
+                0.0,
+                4.0 - 0.5 * epsilon,
+                0.0,
+                0.0,
+                0.0,
+                4.0 - 0.5 * epsilon,
+                0.0,
+            ],
+            dtype=dtype,
+            device=env.DEVICE,
+        ).view([-1, 3])
+        coord1 = torch.rand(
+            [natoms - coord0.shape[0], 3], dtype=dtype, device=env.DEVICE
+        )
+        coord1 = torch.matmul(coord1, cell)
+        coord = torch.concat([coord0, coord1], dim=0)
+        spin = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord0 = torch.clone(coord)
+        coord1 = torch.clone(coord)
+        coord1[1][0] += epsilon
+        coord2 = torch.clone(coord)
+        coord2[2][1] += epsilon
+        coord3 = torch.clone(coord)
+        coord3[1][0] += epsilon
+        coord3[2][1] += epsilon
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+
+        result_0 = eval_model(
+            self.model,
+            coord0.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord1.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        result_2 = eval_model(
+            self.model,
+            coord2.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret2 = {key: result_2[key].squeeze(0) for key in test_keys}
+        result_3 = eval_model(
+            self.model,
+            coord3.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret3 = {key: result_3[key].squeeze(0) for key in test_keys}
+
+        def compare(ret0, ret1):
+            for key in test_keys:
+                if key in ["energy"]:
+                    torch.testing.assert_close(
+                        ret0[key], ret1[key], rtol=rprec, atol=aprec
+                    )
+                elif key in ["force", "force_mag"]:
+                    # plus 1. to avoid the divided-by-zero issue
+                    torch.testing.assert_close(
+                        1.0 + ret0[key], 1.0 + ret1[key], rtol=rprec, atol=aprec
+                    )
+                elif key == "virial":
+                    if not hasattr(self, "test_virial") or self.test_virial:
+                        torch.testing.assert_close(
+                            1.0 + ret0[key], 1.0 + ret1[key], rtol=rprec, atol=aprec
+                        )
+                else:
+                    raise RuntimeError(f"Unexpected test key {key}")
+
+        compare(ret0, ret1)
+        compare(ret1, ret2)
+        compare(ret0, ret3)
+
+
+class TestEnergyModelSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestDOSModelSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+# @unittest.skip("dpa-1 not smooth at the moment")
+class TestEnergyModelDPA1(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        # less degree of smoothness,
+        # error can be systematically removed by reducing epsilon
+        self.epsilon = 1e-5
+        self.aprec = 1e-5
+
+
+class TestEnergyModelDPA2(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["descriptor"]["repinit_rcut"] = 8
+        model_params["descriptor"]["repinit_rcut_smth"] = 3.5
+        model_params_sample = copy.deepcopy(model_params)
+        #######################################################
+        # dirty hack here! the interface of dataload should be
+        # redesigned to support specifying rcut and sel
+        #######################################################
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = 1e-5, 1e-4
+
+
+class TestEnergyModelDPA2_1(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        model_params_sample = copy.deepcopy(model_params)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelDPA2_2(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        model_params_sample = copy.deepcopy(model_params)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelHybrid(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelZBL(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = 1e-10, None
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, SmoothTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+# class TestEnergyFoo(unittest.TestCase):
+#   def test(self):
+#     model_params = model_dpau
+#     self.model = EnergyModelDPAUni(model_params).to(env.DEVICE)
+
+#     natoms = 5
+#     cell = torch.rand([3, 3], dtype=dtype)
+#     cell = (cell + cell.T) + 5. * torch.eye(3)
+#     coord = torch.rand([natoms, 3], dtype=dtype)
+#     coord = torch.matmul(coord, cell)
+#     atype = torch.IntTensor([0, 0, 0, 1, 1])
+#     idx_perm = [1, 0, 4, 3, 2]
+#     ret0 = infer_model(self.model, coord, cell, atype, type_split=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_smooth_denoise.py b/source/tests/pt/model/test_smooth_denoise.py
new file mode 100644
index 0000000000..777d288f3c
--- /dev/null
+++ b/source/tests/pt/model/test_smooth_denoise.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation_denoise import (
+    model_dpa2,
+)
+
+dtype = torch.float64
+
+
+class SmoothDenoiseTest:
+    def test(
+        self,
+    ):
+        # displacement of atoms
+        epsilon = 1e-5 if self.epsilon is None else self.epsilon
+        # required prec. relative prec is not checked.
+        rprec = 0
+        aprec = 1e-5 if self.aprec is None else self.aprec
+
+        natoms = 10
+        cell = 8.6 * torch.eye(3, dtype=dtype).to(env.DEVICE)
+        atype = torch.randint(0, 3, [natoms])
+        coord0 = (
+            torch.tensor(
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                    0.0,
+                    0.0,
+                    4.0 - 0.5 * epsilon,
+                    0.0,
+                ],
+                dtype=dtype,
+            )
+            .view([-1, 3])
+            .to(env.DEVICE)
+        )
+        coord1 = torch.rand([natoms - coord0.shape[0], 3], dtype=dtype).to(env.DEVICE)
+        coord1 = torch.matmul(coord1, cell)
+        coord = torch.concat([coord0, coord1], dim=0)
+
+        coord0 = torch.clone(coord)
+        coord1 = torch.clone(coord)
+        coord1[1][0] += epsilon
+        coord2 = torch.clone(coord)
+        coord2[2][1] += epsilon
+        coord3 = torch.clone(coord)
+        coord3[1][0] += epsilon
+        coord3[2][1] += epsilon
+
+        update_c0, logits0 = eval_model(
+            self.model, coord0.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret0 = {"updated_coord": update_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        update_c1, logits1 = eval_model(
+            self.model, coord1.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret1 = {"updated_coord": update_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        update_c2, logits2 = eval_model(
+            self.model, coord2.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret2 = {"updated_coord": update_c2.squeeze(0), "logits": logits2.squeeze(0)}
+        update_c3, logits3 = eval_model(
+            self.model, coord3.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        ret3 = {"updated_coord": update_c3.squeeze(0), "logits": logits3.squeeze(0)}
+
+        def compare(ret0, ret1):
+            torch.testing.assert_close(
+                ret0["updated_coord"], ret1["updated_coord"], rtol=rprec, atol=aprec
+            )
+            torch.testing.assert_close(
+                ret0["logits"], ret1["logits"], rtol=rprec, atol=aprec
+            )
+
+        compare(ret0, ret1)
+        compare(ret1, ret2)
+        compare(ret0, ret3)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, SmoothDenoiseTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["descriptor"]["sel"] = 8
+        model_params["descriptor"]["rcut_smth"] = 3.5
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+        self.epsilon = 1e-7
+        self.aprec = 1e-5
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2_1(unittest.TestCase, SmoothDenoiseTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        # model_params["descriptor"]["combine_grrg"] = True
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+        self.epsilon = 1e-7
+        self.aprec = 1e-5
+
+
+# @unittest.skip("hybrid not supported at the moment")
+# class TestDenoiseModelHybrid(unittest.TestCase, TestSmoothDenoise):
+#     def setUp(self):
+#         model_params = copy.deepcopy(model_hybrid_denoise)
+#         self.type_split = True
+#         self.model = get_model(model_params).to(env.DEVICE)
+#         self.epsilon, self.aprec = None, None
+#         self.epsilon = 1e-7
+#         self.aprec = 1e-5
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_trans.py b/source/tests/pt/model/test_trans.py
new file mode 100644
index 0000000000..a0aeefd6b3
--- /dev/null
+++ b/source/tests/pt/model/test_trans.py
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation import (  # model_dpau,
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_spin,
+    model_zbl,
+)
+
+dtype = torch.float64
+
+
+class TransTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        spin = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        atype = torch.tensor([0, 0, 0, 1, 1], dtype=torch.int32, device=env.DEVICE)
+        shift = (torch.rand([3], dtype=dtype, device=env.DEVICE) - 0.5) * 2.0
+        coord_s = torch.matmul(
+            torch.remainder(torch.matmul(coord + shift, torch.linalg.inv(cell)), 1.0),
+            cell,
+        )
+        test_spin = getattr(self, "test_spin", False)
+        if not test_spin:
+            test_keys = ["energy", "force", "virial"]
+        else:
+            test_keys = ["energy", "force", "force_mag", "virial"]
+        result_0 = eval_model(
+            self.model,
+            coord.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+        result_1 = eval_model(
+            self.model,
+            coord_s.unsqueeze(0),
+            cell.unsqueeze(0),
+            atype,
+            spins=spin.unsqueeze(0),
+        )
+        ret1 = {key: result_1[key].squeeze(0) for key in test_keys}
+        prec = 1e-10
+        for key in test_keys:
+            if key in ["energy", "force", "force_mag"]:
+                torch.testing.assert_close(ret0[key], ret1[key], rtol=prec, atol=prec)
+            elif key == "virial":
+                if not hasattr(self, "test_virial") or self.test_virial:
+                    torch.testing.assert_close(
+                        ret0[key], ret1[key], rtol=prec, atol=prec
+                    )
+            else:
+                raise RuntimeError(f"Unexpected test key {key}")
+
+
+class TestEnergyModelSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_se_e2_a)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestDOSModelSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dos)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelDPA2(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelHybrid(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestForceModelHybrid(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        model_params["fitting_net"]["type"] = "direct_force_ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelZBL(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_zbl)
+        self.type_split = False
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelSpinSeA(unittest.TestCase, TransTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_spin)
+        self.type_split = False
+        self.test_spin = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_trans_denoise.py b/source/tests/pt/model/test_trans_denoise.py
new file mode 100644
index 0000000000..9ba93a244a
--- /dev/null
+++ b/source/tests/pt/model/test_trans_denoise.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation_denoise import (
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+)
+
+dtype = torch.float64
+
+
+class TransDenoiseTest:
+    def test(
+        self,
+    ):
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype).to(env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3).to(env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype).to(env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        shift = (torch.rand([3], dtype=dtype) - 0.5).to(env.DEVICE) * 2.0
+        coord_s = torch.matmul(
+            torch.remainder(torch.matmul(coord + shift, torch.linalg.inv(cell)), 1.0),
+            cell,
+        )
+        updated_c0, logits0 = eval_model(
+            self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        updated_c0 = updated_c0 - coord.unsqueeze(0)
+        ret0 = {"updated_coord": updated_c0.squeeze(0), "logits": logits0.squeeze(0)}
+        updated_c1, logits1 = eval_model(
+            self.model, coord_s.unsqueeze(0), cell.unsqueeze(0), atype, denoise=True
+        )
+        updated_c1 = updated_c1 - coord_s.unsqueeze(0)
+        ret1 = {"updated_coord": updated_c1.squeeze(0), "logits": logits1.squeeze(0)}
+        prec = 1e-10
+        torch.testing.assert_close(
+            ret0["updated_coord"], ret1["updated_coord"], rtol=prec, atol=prec
+        )
+        torch.testing.assert_close(ret0["logits"], ret1["logits"], rtol=prec, atol=prec)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA1(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_dpa1)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params_sample = copy.deepcopy(model_dpa2)
+        model_params_sample["descriptor"]["rcut"] = model_params_sample["descriptor"][
+            "repinit_rcut"
+        ]
+        model_params_sample["descriptor"]["sel"] = model_params_sample["descriptor"][
+            "repinit_nsel"
+        ]
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+@unittest.skip("hybrid not supported at the moment")
+class TestDenoiseModelHybrid(unittest.TestCase, TransDenoiseTest):
+    def setUp(self):
+        model_params = copy.deepcopy(model_hybrid)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/test_unused_params.py b/source/tests/pt/model/test_unused_params.py
new file mode 100644
index 0000000000..a3c93cbe68
--- /dev/null
+++ b/source/tests/pt/model/test_unused_params.py
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import torch
+
+from deepmd.pt.infer.deep_eval import (
+    eval_model,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils import (
+    env,
+)
+
+from .test_permutation import (
+    model_dpa2,
+)
+
+dtype = torch.float64
+
+
+class TestUnusedParamsDPA2(unittest.TestCase):
+    def test_unused(self):
+        import itertools
+
+        for conv, drrd, grrg, attn1, g1g1, attn2, h2 in itertools.product(
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+        ):
+            if (not drrd) and (not grrg) and h2:
+                # skip the case h2 is not envolved
+                continue
+            if (not grrg) and (not conv):
+                # skip the case g2 is not envolved
+                continue
+            model = copy.deepcopy(model_dpa2)
+            model["descriptor"]["rcut"] = model["descriptor"]["repinit_rcut"]
+            model["descriptor"]["sel"] = model["descriptor"]["repinit_nsel"]
+            model["descriptor"]["repformer_nlayers"] = 2
+            # model["descriptor"]["combine_grrg"] = cmbg2
+            model["descriptor"]["repformer_update_g1_has_conv"] = conv
+            model["descriptor"]["repformer_update_g1_has_drrd"] = drrd
+            model["descriptor"]["repformer_update_g1_has_grrg"] = grrg
+            model["descriptor"]["repformer_update_g1_has_attn"] = attn1
+            model["descriptor"]["repformer_update_g2_has_g1g1"] = g1g1
+            model["descriptor"]["repformer_update_g2_has_attn"] = attn2
+            model["descriptor"]["repformer_update_h2"] = h2
+            model["fitting_net"]["neuron"] = [12, 12, 12]
+            self._test_unused(model)
+
+    def _test_unused(self, model_params):
+        self.model = get_model(model_params).to(env.DEVICE)
+        natoms = 5
+        cell = torch.rand([3, 3], dtype=dtype, device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * torch.eye(3, device=env.DEVICE)
+        coord = torch.rand([natoms, 3], dtype=dtype, device=env.DEVICE)
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        result_0 = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        test_keys = ["energy", "force", "virial"]
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+
+        # use computation graph to find all contributing tensors
+        def get_contributing_params(y, top_level=True):
+            nf = y.grad_fn.next_functions if top_level else y.next_functions
+            for f, _ in nf:
+                try:
+                    yield f.variable
+                except AttributeError:
+                    pass  # node has no tensor
+                if f is not None:
+                    yield from get_contributing_params(f, top_level=False)
+
+        contributing_parameters = set(get_contributing_params(ret0["energy"]))
+        all_parameters = set(self.model.parameters())
+        non_contributing = all_parameters - contributing_parameters
+        self.assertEqual(len(non_contributing), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/model/water/data/data_0/set.000/box.npy b/source/tests/pt/model/water/data/data_0/set.000/box.npy
new file mode 100644
index 0000000000..6ad2de625b
Binary files /dev/null and b/source/tests/pt/model/water/data/data_0/set.000/box.npy differ
diff --git a/source/tests/pt/model/water/data/data_0/set.000/coord.npy b/source/tests/pt/model/water/data/data_0/set.000/coord.npy
new file mode 100644
index 0000000000..8bd448b125
Binary files /dev/null and b/source/tests/pt/model/water/data/data_0/set.000/coord.npy differ
diff --git a/source/tests/pt/model/water/data/data_0/set.000/energy.npy b/source/tests/pt/model/water/data/data_0/set.000/energy.npy
new file mode 100644
index 0000000000..d03db103f5
Binary files /dev/null and b/source/tests/pt/model/water/data/data_0/set.000/energy.npy differ
diff --git a/source/tests/pt/model/water/data/data_0/set.000/force.npy b/source/tests/pt/model/water/data/data_0/set.000/force.npy
new file mode 100644
index 0000000000..10b2ab83a2
Binary files /dev/null and b/source/tests/pt/model/water/data/data_0/set.000/force.npy differ
diff --git a/source/tests/finetune/data/type.raw b/source/tests/pt/model/water/data/data_0/type.raw
similarity index 100%
rename from source/tests/finetune/data/type.raw
rename to source/tests/pt/model/water/data/data_0/type.raw
diff --git a/source/tests/finetune/data/type_map.raw b/source/tests/pt/model/water/data/data_0/type_map.raw
similarity index 100%
rename from source/tests/finetune/data/type_map.raw
rename to source/tests/pt/model/water/data/data_0/type_map.raw
diff --git a/source/tests/pt/model/water/data/single/set.000/box.npy b/source/tests/pt/model/water/data/single/set.000/box.npy
new file mode 100644
index 0000000000..65897e0f9c
Binary files /dev/null and b/source/tests/pt/model/water/data/single/set.000/box.npy differ
diff --git a/source/tests/pt/model/water/data/single/set.000/coord.npy b/source/tests/pt/model/water/data/single/set.000/coord.npy
new file mode 100644
index 0000000000..6e0594a803
Binary files /dev/null and b/source/tests/pt/model/water/data/single/set.000/coord.npy differ
diff --git a/source/tests/pt/model/water/data/single/set.000/energy.npy b/source/tests/pt/model/water/data/single/set.000/energy.npy
new file mode 100644
index 0000000000..a0a88fb78a
Binary files /dev/null and b/source/tests/pt/model/water/data/single/set.000/energy.npy differ
diff --git a/source/tests/pt/model/water/data/single/set.000/force.npy b/source/tests/pt/model/water/data/single/set.000/force.npy
new file mode 100644
index 0000000000..d5b847a86e
Binary files /dev/null and b/source/tests/pt/model/water/data/single/set.000/force.npy differ
diff --git a/source/tests/pt/model/water/data/single/type.raw b/source/tests/pt/model/water/data/single/type.raw
new file mode 100644
index 0000000000..97e8fdfcf8
--- /dev/null
+++ b/source/tests/pt/model/water/data/single/type.raw
@@ -0,0 +1,192 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/finetune/data_mixed_type/type_map.raw b/source/tests/pt/model/water/data/single/type_map.raw
similarity index 100%
rename from source/tests/finetune/data_mixed_type/type_map.raw
rename to source/tests/pt/model/water/data/single/type_map.raw
diff --git a/source/tests/pt/model/water/data/zbl_tab_potential/H2O_tab_potential.txt b/source/tests/pt/model/water/data/zbl_tab_potential/H2O_tab_potential.txt
new file mode 100644
index 0000000000..66fcb8e946
--- /dev/null
+++ b/source/tests/pt/model/water/data/zbl_tab_potential/H2O_tab_potential.txt
@@ -0,0 +1,1000 @@
+0.0010    913709.625838    114389.26607   14320.660836      25838    114389.26607   14320.660836
+0.0020    453190.075792    56822.165078    7124.559066      75792    56822.165078    7124.559066
+0.0030    299716.609389    37635.860646    4726.059712      09389    37635.860646    4726.059712
+0.0040    223004.208152    28044.724786    3526.959232      08152    28044.724786    3526.959232
+0.0050    176995.875921    22291.632310    2807.616935      75921    22291.632310    2807.616935
+0.0060    146339.286793    18457.541826    2328.152606      86793    18457.541826    2328.152606
+0.0070    124454.877677    15720.007305    1985.760451      77677    15720.007305    1985.760451
+0.0080    108052.871443    13667.805976    1729.037583      71443    13667.805976    1729.037583
+0.0090    95305.6179694    12072.480958    1529.426853      79694    12072.480958    1529.426853
+0.0100    85116.5305655    10796.958308    1369.793979      05655    10796.958308    1369.793979
+0.0110    76787.7843454    9754.0093240    1239.235334      43454    9754.0093240    1239.235334
+0.0120    69854.1654175    8885.4816862    1130.481842      54175    8885.4816862    1130.481842
+0.0130    63993.6050636    8151.1162355    1038.501071      50636    8151.1162355    1038.501071
+0.0140    58976.0564146    7522.1565542    959.6984312      64146    7522.1565542    959.6984312
+0.0150    54632.8204564    6977.5147177    891.4378965      04564    6977.5147177    891.4378965
+0.0160    50837.3747846    6501.3748881    831.7424519      47846    6501.3748881    831.7424519
+0.0170    47492.9686820    6081.6426991    779.1002669      86820    6081.6426991    779.1002669
+0.0180    44524.3531708    5708.9115119    732.3354774      31708    5708.9115119    732.3354774
+0.0190    41872.1226283    5375.7551174    690.5197734      26283    5375.7551174    690.5197734
+0.0200    39488.7539185    5076.2326272    652.9105109      39185    5076.2326272    652.9105109
+0.0210    37335.7772003    4805.5348243    618.9065049      72003    4805.5348243    618.9065049
+0.0220    35381.7183353    4559.7269643    588.0158802      83353    4559.7269643    588.0158802
+0.0230    33600.5780582    4335.5586712    559.8323083      80582    4335.5586712    559.8323083
+0.0240    31970.6913556    4130.3213594    534.0171847      13556    4130.3213594    534.0171847
+0.0250    30473.8605947    3941.7398750    510.2860845      05947    3941.7398750    510.2860845
+0.0260    29094.6886984    3767.8891424    488.3983430      86984    3767.8891424    488.3983430
+0.0270    27820.0605059    3607.1293341    468.1489521      05059    3607.1293341    468.1489521
+0.0280    26638.7352692    3458.0549325    449.3621928      52692    3458.0549325    449.3621928
+0.0290    25541.0234620    3319.4543312    431.8865855      34620    3319.4543312    431.8865855
+0.0300    24518.5282265    3190.2775152    415.5908499      82265    3190.2775152    415.5908499
+0.0310    23563.9368637    3069.6099976    400.3606472      68637    3069.6099976    400.3606472
+0.0320    22670.8514191    2956.6516420    386.0959329      14191    2956.6516420    386.0959329
+0.0330    21833.6500715    2850.6993366    372.7087910      00715    2850.6993366    372.7087910
+0.0340    21047.3729830    2751.1327248    360.1216502      29830    2751.1327248    360.1216502
+0.0350    20307.6277175    2657.4023825    348.2658062      77175    2657.4023825    348.2658062
+0.0360    19610.5104235    2569.0199661    337.0801904      04235    2569.0199661    337.0801904
+0.0370    18952.5397978    2485.5499575    326.5103374      97978    2485.5499575    326.5103374
+0.0380    18330.6014769    2406.6027127    316.5075168      14769    2406.6027127    316.5075168
+0.0390    17741.9009829    2331.8285805    307.0279975      09829    2331.8285805    307.0279975
+0.0400    17183.9237284    2260.9129024    298.0324229      37284    2260.9129024    298.0324229
+0.0410    16654.4008745    2193.5717452    289.4852776      08745    2193.5717452    289.4852776
+0.0420    16151.2800661    2129.5482423    281.3544296      00661    2129.5482423    281.3544296
+0.0430    15672.7002509    2068.6094464    273.6107373      02509    2068.6094464    273.6107373
+0.0440    15216.9699328    2010.5436107    266.2277097      99328    2010.5436107    266.2277097
+0.0450    14782.5483242    1955.1578329    259.1812115      83242    1955.1578329    259.1812115
+0.0460    14368.0289580    1902.2760069    252.4492073      89580    1902.2760069    252.4492073
+0.0470    13972.1253902    1851.7370350    246.0115383      53902    1851.7370350    246.0115383
+0.0480    13593.6586918    1803.3932646    239.8497262      86918    1803.3932646    239.8497262
+0.0490    13231.5464708    1757.1091159    233.9468027      64708    1757.1091159    233.9468027
+0.0500    12884.7932124    1712.7598740    228.2871576      32124    1712.7598740    228.2871576
+0.0510    12552.4817558    1670.2306236    222.8564060      17558    1670.2306236    222.8564060
+0.0520    12233.7657548    1629.4153064    217.6412705      57548    1629.4153064    217.6412705
+0.0530    11927.8629910    1590.2158855    212.6294767      29910    1590.2158855    212.6294767
+0.0540    11634.0494314    1552.5416017    207.8096602      94314    1552.5416017    207.8096602
+0.0550    11351.6539336    1516.3083123    203.1712838      39336    1516.3083123    203.1712838
+0.0560    11080.0535186    1481.4378999    198.7045641      35186    1481.4378999    198.7045641
+0.0570    10818.6691413    1447.8577434    194.4004048      91413    1447.8577434    194.4004048
+0.0580    10566.9618984    1415.5002442    190.2503376      18984    1415.5002442    190.2503376
+0.0590    10324.4296227    1384.3024001    186.2464693      96227    1384.3024001    186.2464693
+0.0600    10090.6038173    1354.2054222    182.3814335      38173    1354.2054222    182.3814335
+0.0610    9865.0468917    1325.1543893    178.6483475       68917    1325.1543893    178.6483475
+0.0620    9647.3496659    1297.0979357    175.0407734       96659    1297.0979357    175.0407734
+0.0630    9437.1291115    1269.9879689    171.5526826       91115    1269.9879689    171.5526826
+0.0640    9234.0263053    1243.7794136    168.1784240       63053    1243.7794136    168.1784240
+0.0650    9037.7045714    1218.4299795    164.9126949       45714    1218.4299795    164.9126949
+0.0660    8847.8477928    1193.8999501    161.7505145       77928    1193.8999501    161.7505145
+0.0670    8664.1588738    1170.1519903    158.6871999       88738    1170.1519903    158.6871999
+0.0680    8486.3583383    1147.1509714    155.7183444       83383    1147.1509714    155.7183444
+0.0690    8314.1830501    1124.8638108    152.8397972       30501    1124.8638108    152.8397972
+0.0700    8147.3850427    1103.2593259    150.0476452       50427    1103.2593259    150.0476452
+0.0710    7985.7304489    1082.3080999    147.3381963       04489    1082.3080999    147.3381963
+0.0720    7828.9985183    1061.9823592    144.7079640       85183    1061.9823592    144.7079640
+0.0730    7676.9807165    1042.2558606    142.1536534       07165    1042.2558606    142.1536534
+0.0740    7529.4798977    1023.1037878    139.6721482       98977    1023.1037878    139.6721482
+0.0750    7386.3095424    1004.5026562    137.2604986       95424    1004.5026562    137.2604986
+0.0760    7247.2930565    986.4302250    134.9159107        930565    986.4302250    134.9159107
+0.0770    7112.2631243    968.8654164    132.6357361        631243    968.8654164    132.6357361
+0.0780    6981.0611116    951.7882409    130.4174626        611116    951.7882409    130.4174626
+0.0790    6853.5365143    935.1797284    128.2587056        365143    935.1797284    128.2587056
+0.0800    6729.5464483    919.0218641    126.1572004        464483    919.0218641    126.1572004
+0.0810    6608.9551768    903.2975297    124.1107942        551768    903.2975297    124.1107942
+0.0820    6491.6336731    887.9904484    122.1174397        336731    887.9904484    122.1174397
+0.0830    6377.4592142    873.0851342    120.1751889        592142    873.0851342    120.1751889
+0.0840    6266.3150042    858.5668449    118.2821865        150042    858.5668449    118.2821865
+0.0850    6158.0898234    844.4215378    116.4366651        898234    844.4215378    116.4366651
+0.0860    6052.6777030    830.6358295    114.6369400        777030    830.6358295    114.6369400
+0.0870    5949.9776216    817.1969572    112.8814040        776216    817.1969572    112.8814040
+0.0880    5849.8932223    804.0927442    111.1685235        932223    804.0927442    111.1685235
+0.0890    5752.3325494    791.3115660    109.4968341        325494    791.3115660    109.4968341
+0.0900    5657.2078026    778.8423203    107.8649368        078026    778.8423203    107.8649368
+0.0910    5564.4351069    766.6743978    106.2714943        351069    766.6743978    106.2714943
+0.0920    5473.9342981    754.7976551    104.7152279        342981    754.7976551    104.7152279
+0.0930    5385.6287222    743.2023904    103.1949141        287222    743.2023904    103.1949141
+0.0940    5299.4450471    731.8793190    101.7093819        450471    731.8793190    101.7093819
+0.0950    5215.3130867    720.8195518    100.2575097        130867    720.8195518    100.2575097
+0.0960    5133.1656359    710.0145745    98.8382229     1656359    710.0145745    98.8382229
+0.0970    5052.9383157    699.4562281    97.4504918     9383157    699.4562281    97.4504918
+0.0980    4974.5694279    689.1366911    96.0933285     5694279    689.1366911    96.0933285
+0.0990    4897.9998188    679.0484617    94.7657857     9998188    679.0484617    94.7657857
+0.1000    4823.1727507    669.1843423    93.4669540     1727507    669.1843423    93.4669540
+0.1010    4750.0337815    659.5374244    92.1959604     0337815    659.5374244    92.1959604
+0.1020    4678.5306510    650.1010737    90.9519663     5306510    650.1010737    90.9519663
+0.1030    4608.6131741    640.8689177    89.7341659     6131741    640.8689177    89.7341659
+0.1040    4540.2331402    631.8348323    88.5417846     2331402    631.8348323    88.5417846
+0.1050    4473.3442182    622.9929301    87.3740776     3442182    622.9929301    87.3740776
+0.1060    4407.9018671    614.3375495    86.2303284     9018671    614.3375495    86.2303284
+0.1070    4343.8632512    605.8632435    85.1098475     8632512    605.8632435    85.1098475
+0.1080    4281.1871608    597.5647703    84.0119712     1871608    597.5647703    84.0119712
+0.1090    4219.8339365    589.4370834    82.9360602     8339365    589.4370834    82.9360602
+0.1100    4159.7653981    581.4753230    81.8814988     7653981    581.4753230    81.8814988
+0.1110    4100.9447770    573.6748074    80.8476936     9447770    573.6748074    80.8476936
+0.1120    4043.3366528    566.0310249    79.8340726     3366528    566.0310249    79.8340726
+0.1130    3986.9068928    558.5396262    78.8400844     9068928    558.5396262    78.8400844
+0.1140    3931.6225949    551.1964175    77.8651968     6225949    551.1964175    77.8651968
+0.1150    3877.4520333    543.9973537    76.9088966     4520333    543.9973537    76.9088966
+0.1160    3824.3646071    536.9385314    75.9706883     3646071    536.9385314    75.9706883
+0.1170    3772.3307921    530.0161836    75.0500935     3307921    530.0161836    75.0500935
+0.1180    3721.3220939    523.2266731    74.1466504     3220939    523.2266731    74.1466504
+0.1190    3671.3110047    516.5664876    73.2599126     3110047    516.5664876    73.2599126
+0.1200    3622.2709610    510.0322343    72.3894489     2709610    510.0322343    72.3894489
+0.1210    3574.1763045    503.6206346    71.5348425     1763045    503.6206346    71.5348425
+0.1220    3527.0022444    497.3285198    70.6956904     0022444    497.3285198    70.6956904
+0.1230    3480.7248214    491.1528264    69.8716028     7248214    491.1528264    69.8716028
+0.1240    3435.3208739    485.0905919    69.0622028     3208739    485.0905919    69.0622028
+0.1250    3390.7680053    479.1389505    68.2671256     7680053    479.1389505    68.2671256
+0.1260    3347.0445536    473.2951298    67.4860180     0445536    473.2951298    67.4860180
+0.1270    3304.1295618    467.5564462    66.7185382     1295618    467.5564462    66.7185382
+0.1280    3262.0027498    461.9203022    65.9643553     0027498    461.9203022    65.9643553
+0.1290    3220.6444879    456.3841826    65.2231486     6444879    456.3841826    65.2231486
+0.1300    3180.0357713    450.9456517    64.4946075     0357713    450.9456517    64.4946075
+0.1310    3140.1581958    445.6023495    63.7784310     1581958    445.6023495    63.7784310
+0.1320    3100.9939349    440.3519898    63.0743273     9939349    440.3519898    63.0743273
+0.1330    3062.5257173    435.1923563    62.3820136     5257173    435.1923563    62.3820136
+0.1340    3024.7368060    430.1213011    61.7012156     7368060    430.1213011    61.7012156
+0.1350    2987.6109783    425.1367411    61.0316673     6109783    425.1367411    61.0316673
+0.1360    2951.1325064    420.2366563    60.3731105     1325064    420.2366563    60.3731105
+0.1370    2915.2861387    415.4190873    59.7252948     2861387    415.4190873    59.7252948
+0.1380    2880.0570829    410.6821328    59.0879771     0570829    410.6821328    59.0879771
+0.1390    2845.4309885    406.0239478    58.4609214     4309885    406.0239478    58.4609214
+0.1400    2811.3939311    401.4427415    57.8438986     3939311    401.4427415    57.8438986
+0.1410    2777.9323966    396.9367752    57.2366861     9323966    396.9367752    57.2366861
+0.1420    2745.0332671    392.5043608    56.6390678     0332671    392.5043608    56.6390678
+0.1430    2712.6838060    388.1438584    56.0508336     6838060    388.1438584    56.0508336
+0.1440    2680.8716450    383.8536753    55.4717796     8716450    383.8536753    55.4717796
+0.1450    2649.5847710    379.6322639    54.9017073     5847710    379.6322639    54.9017073
+0.1460    2618.8115134    375.4781203    54.3404239     8115134    375.4781203    54.3404239
+0.1470    2588.5405327    371.3897825    53.7877420     5405327    371.3897825    53.7877420
+0.1480    2558.7608086    367.3658297    53.2434792     7608086    367.3658297    53.2434792
+0.1490    2529.4616294    363.4048800    52.7074581     4616294    363.4048800    52.7074581
+0.1500    2500.6325811    359.5055896    52.1795062     6325811    359.5055896    52.1795062
+0.1510    2472.2635377    355.6666516    51.6594557     2635377    355.6666516    51.6594557
+0.1520    2444.3446512    351.8867943    51.1471432     3446512    351.8867943    51.1471432
+0.1530    2416.8663423    348.1647806    50.6424097     8663423    348.1647806    50.6424097
+0.1540    2389.8192919    344.4994064    50.1451003     8192919    344.4994064    50.1451003
+0.1550    2363.1944319    340.8894997    49.6550644     1944319    340.8894997    49.6550644
+0.1560    2336.9829372    337.3339196    49.1721551     9829372    337.3339196    49.1721551
+0.1570    2311.1762181    333.8315552    48.6962295     1762181    333.8315552    48.6962295
+0.1580    2285.7659120    330.3813249    48.2271484     7659120    330.3813249    48.2271484
+0.1590    2260.7438767    326.9821749    47.7647759     7438767    326.9821749    47.7647759
+0.1600    2236.1021827    323.6330788    47.3089799     1021827    323.6330788    47.3089799
+0.1610    2211.8331072    320.3330368    46.8596315     8331072    320.3330368    46.8596315
+0.1620    2187.9291268    317.0810744    46.4166050     9291268    317.0810744    46.4166050
+0.1630    2164.3829117    313.8762419    45.9797781     3829117    313.8762419    45.9797781
+0.1640    2141.1873194    310.7176137    45.5490312     1873194    310.7176137    45.5490312
+0.1650    2118.3353890    307.6042874    45.1242479     3353890    307.6042874    45.1242479
+0.1660    2095.8203354    304.5353832    44.7053147     8203354    304.5353832    44.7053147
+0.1670    2073.6355442    301.5100431    44.2921206     6355442    301.5100431    44.2921206
+0.1680    2051.7745660    298.5274302    43.8845577     7745660    298.5274302    43.8845577
+0.1690    2030.2311119    295.5867284    43.4825203     2311119    295.5867284    43.4825203
+0.1700    2008.9990482    292.6871414    43.0859057     9990482    292.6871414    43.0859057
+0.1710    1988.0723922    289.8278921    42.6946132     0723922    289.8278921    42.6946132
+0.1720    1967.4453070    287.0082225    42.3085448     4453070    287.0082225    42.3085448
+0.1730    1947.1120979    284.2273926    41.9276048     1120979    284.2273926    41.9276048
+0.1740    1927.0672078    281.4846800    41.5516997     0672078    281.4846800    41.5516997
+0.1750    1907.3052129    278.7793797    41.1807380     3052129    278.7793797    41.1807380
+0.1760    1887.8208195    276.1108033    40.8146308     8208195    276.1108033    40.8146308
+0.1770    1868.6088596    273.4782784    40.4532907     6088596    273.4782784    40.4532907
+0.1780    1849.6642873    270.8811486    40.0966328     6642873    270.8811486    40.0966328
+0.1790    1830.9821758    268.3187725    39.7445739     9821758    268.3187725    39.7445739
+0.1800    1812.5577133    265.7905239    39.3970327     5577133    265.7905239    39.3970327
+0.1810    1794.3862002    263.2957906    39.0539298     3862002    263.2957906    39.0539298
+0.1820    1776.4630458    260.8339748    38.7151877     4630458    260.8339748    38.7151877
+0.1830    1758.7837651    258.4044920    38.3807303     7837651    258.4044920    38.3807303
+0.1840    1741.3439759    256.0067712    38.0504836     3439759    256.0067712    38.0504836
+0.1850    1724.1393960    253.6402542    37.7243750     1393960    253.6402542    37.7243750
+0.1860    1707.1658404    251.3043952    37.4023336     1658404    251.3043952    37.4023336
+0.1870    1690.4192185    248.9986608    37.0842900     4192185    248.9986608    37.0842900
+0.1880    1673.8955316    246.7225293    36.7701764     8955316    246.7225293    36.7701764
+0.1890    1657.5908704    244.4754905    36.4599264     5908704    244.4754905    36.4599264
+0.1900    1641.5014126    242.2570456    36.1534752     5014126    242.2570456    36.1534752
+0.1910    1625.6234204    240.0667066    35.8507590     6234204    240.0667066    35.8507590
+0.1920    1609.9532382    237.9039960    35.5517159     9532382    237.9039960    35.5517159
+0.1930    1594.4872906    235.7684470    35.2562850     4872906    235.7684470    35.2562850
+0.1940    1579.2220803    233.6596024    34.9644066     2220803    233.6596024    34.9644066
+0.1950    1564.1541856    231.5770153    34.6760226     1541856    231.5770153    34.6760226
+0.1960    1549.2802589    229.5202480    34.3910759     2802589    229.5202480    34.3910759
+0.1970    1534.5970244    227.4888722    34.1095106     5970244    227.4888722    34.1095106
+0.1980    1520.1012763    225.4824686    33.8312720     1012763    225.4824686    33.8312720
+0.1990    1505.7898772    223.5006269    33.5563066     7898772    223.5006269    33.5563066
+0.2000    1491.6597561    221.5429453    33.2845619     6597561    221.5429453    33.2845619
+0.2010    1477.7079067    219.6090303    33.0159865     7079067    219.6090303    33.0159865
+0.2020    1463.9313857    217.6984967    32.7505301     9313857    217.6984967    32.7505301
+0.2030    1450.3273114    215.8109671    32.4881435     3273114    215.8109671    32.4881435
+0.2040    1436.8928620    213.9460720    32.2287782     8928620    213.9460720    32.2287782
+0.2050    1423.6252739    212.1034495    31.9723870     6252739    212.1034495    31.9723870
+0.2060    1410.5218407    210.2827449    31.7189234     5218407    210.2827449    31.7189234
+0.2070    1397.5799113    208.4836109    31.4683421     5799113    208.4836109    31.4683421
+0.2080    1384.7968886    206.7057069    31.2205985     7968886    206.7057069    31.2205985
+0.2090    1372.1702286    204.9486995    30.9756490     1702286    204.9486995    30.9756490
+0.2100    1359.6974383    203.2122618    30.7334506     6974383    203.2122618    30.7334506
+0.2110    1347.3760753    201.4960735    30.4939616     3760753    201.4960735    30.4939616
+0.2120    1335.2037458    199.7998204    30.2571406     2037458    199.7998204    30.2571406
+0.2130    1323.1781040    198.1231947    30.0229474     1781040    198.1231947    30.0229474
+0.2140    1311.2968506    196.4658948    29.7913425     2968506    196.4658948    29.7913425
+0.2150    1299.5577318    194.8276247    29.5622869     5577318    194.8276247    29.5622869
+0.2160    1287.9585380    193.2080943    29.3357428     9585380    193.2080943    29.3357428
+0.2170    1276.4971033    191.6070192    29.1116726     4971033    191.6070192    29.1116726
+0.2180    1265.1713036    190.0241204    28.8900399     1713036    190.0241204    28.8900399
+0.2190    1253.9790564    188.4591242    28.6708087     9790564    188.4591242    28.6708087
+0.2200    1242.9183194    186.9117623    28.4539438     9183194    186.9117623    28.4539438
+0.2210    1231.9870896    185.3817715    28.2394106     9870896    185.3817715    28.2394106
+0.2220    1221.1834024    183.8688934    28.0271751     1834024    183.8688934    28.0271751
+0.2230    1210.5053309    182.3728746    27.8172040     5053309    182.3728746    27.8172040
+0.2240    1199.9509845    180.8934666    27.6094647     9509845    180.8934666    27.6094647
+0.2250    1189.5185088    179.4304255    27.4039251     5185088    179.4304255    27.4039251
+0.2260    1179.2060841    177.9835117    27.2005537     2060841    177.9835117    27.2005537
+0.2270    1169.0119251    176.5524904    26.9993196     0119251    176.5524904    26.9993196
+0.2280    1158.9342796    175.1371309    26.8001924     9342796    175.1371309    26.8001924
+0.2290    1148.9714282    173.7372069    26.6031423     9714282    173.7372069    26.6031423
+0.2300    1139.1216834    172.3524963    26.4081402     1216834    172.3524963    26.4081402
+0.2310    1129.3833889    170.9827808    26.2151571     3833889    170.9827808    26.2151571
+0.2320    1119.7549187    169.6278463    26.0241650     7549187    169.6278463    26.0241650
+0.2330    1110.2346768    168.2874825    25.8351362     2346768    168.2874825    25.8351362
+0.2340    1100.8210961    166.9614829    25.6480433     8210961    166.9614829    25.6480433
+0.2350    1091.5126382    165.6496448    25.4628597     5126382    165.6496448    25.4628597
+0.2360    1082.3077924    164.3517691    25.2795591     3077924    164.3517691    25.2795591
+0.2370    1073.2050753    163.0676601    25.0981157     2050753    163.0676601    25.0981157
+0.2380    1064.2030300    161.7971257    24.9185042     2030300    161.7971257    24.9185042
+0.2390    1055.3002259    160.5399772    24.7406996     3002259    160.5399772    24.7406996
+0.2400    1046.4952577    159.2960293    24.5646775     4952577    159.2960293    24.5646775
+0.2410    1037.7867450    158.0650998    24.3904138     7867450    158.0650998    24.3904138
+0.2420    1029.1733319    156.8470097    24.2178849     1733319    156.8470097    24.2178849
+0.2430    1020.6536864    155.6415833    24.0470676     6536864    155.6415833    24.0470676
+0.2440    1012.2264998    154.4486478    23.8779390     2264998    154.4486478    23.8779390
+0.2450    1003.8904862    153.2680334    23.7104767     8904862    153.2680334    23.7104767
+0.2460    995.6443822    152.0995732    23.5446586      6443822    152.0995732    23.5446586
+0.2470    987.4869462    150.9431032    23.3804631      4869462    150.9431032    23.3804631
+0.2480    979.4169580    149.7984622    23.2178688      4169580    149.7984622    23.2178688
+0.2490    971.4332186    148.6654918    23.0568547      4332186    148.6654918    23.0568547
+0.2500    963.5345494    147.5440362    22.8974003      5345494    147.5440362    22.8974003
+0.2510    955.7197919    146.4339423    22.7394853      7197919    146.4339423    22.7394853
+0.2520    947.9878073    145.3350596    22.5830897      9878073    145.3350596    22.5830897
+0.2530    940.3374762    144.2472399    22.4281939      3374762    144.2472399    22.4281939
+0.2540    932.7676981    143.1703377    22.2747787      7676981    143.1703377    22.2747787
+0.2550    925.2773908    142.1042099    22.1228251      2773908    142.1042099    22.1228251
+0.2560    917.8654906    141.0487157    21.9723144      8654906    141.0487157    21.9723144
+0.2570    910.5309511    140.0037168    21.8232283      5309511    140.0037168    21.8232283
+0.2580    903.2727438    138.9690768    21.6755488      2727438    138.9690768    21.6755488
+0.2590    896.0898568    137.9446619    21.5292580      0898568    137.9446619    21.5292580
+0.2600    888.9812952    136.9303404    21.3843385      9812952    136.9303404    21.3843385
+0.2610    881.9460805    135.9259827    21.2407731      9460805    135.9259827    21.2407731
+0.2620    874.9832499    134.9314613    21.0985449      9832499    134.9314613    21.0985449
+0.2630    868.0918568    133.9466506    20.9576372      0918568    133.9466506    20.9576372
+0.2640    861.2709696    132.9714274    20.8180337      2709696    132.9714274    20.8180337
+0.2650    854.5196721    132.0056702    20.6797182      5196721    132.0056702    20.6797182
+0.2660    847.8370627    131.0492595    20.5426748      8370627    131.0492595    20.5426748
+0.2670    841.2222545    130.1020778    20.4068880      2222545    130.1020778    20.4068880
+0.2680    834.6743747    129.1640092    20.2723424      6743747    129.1640092    20.2723424
+0.2690    828.1925646    128.2349399    20.1390228      1925646    128.2349399    20.1390228
+0.2700    821.7759790    127.3147578    20.0069143      7759790    127.3147578    20.0069143
+0.2710    815.4237863    126.4033526    19.8760022      4237863    126.4033526    19.8760022
+0.2720    809.1351680    125.5006157    19.7462722      1351680    125.5006157    19.7462722
+0.2730    802.9093184    124.6064402    19.6177099      9093184    124.6064402    19.6177099
+0.2740    796.7454448    123.7207207    19.4903015      7454448    123.7207207    19.4903015
+0.2750    790.6427665    122.8433537    19.3640330      6427665    122.8433537    19.3640330
+0.2760    784.6005152    121.9742372    19.2388909      6005152    121.9742372    19.2388909
+0.2770    778.6179347    121.1132707    19.1148619      6179347    121.1132707    19.1148619
+0.2780    772.6942802    120.2603553    18.9919327      6942802    120.2603553    18.9919327
+0.2790    766.8288187    119.4153936    18.8700905      8288187    119.4153936    18.8700905
+0.2800    761.0208283    118.5782897    18.7493224      0208283    118.5782897    18.7493224
+0.2810    755.2695984    117.7489490    18.6296158      2695984    117.7489490    18.6296158
+0.2820    749.5744291    116.9272787    18.5109583      5744291    116.9272787    18.5109583
+0.2830    743.9346311    116.1131870    18.3933378      9346311    116.1131870    18.3933378
+0.2840    738.3495259    115.3065837    18.2767421      3495259    115.3065837    18.2767421
+0.2850    732.8184450    114.5073800    18.1611595      8184450    114.5073800    18.1611595
+0.2860    727.3407300    113.7154881    18.0465783      3407300    113.7154881    18.0465783
+0.2870    721.9157327    112.9308219    17.9329869      9157327    112.9308219    17.9329869
+0.2880    716.5428142    112.1532963    17.8203740      5428142    112.1532963    17.8203740
+0.2890    711.2213456    111.3828276    17.7087284      2213456    111.3828276    17.7087284
+0.2900    705.9507071    110.6193334    17.5980392      9507071    110.6193334    17.5980392
+0.2910    700.7302882    109.8627322    17.4882954      7302882    109.8627322    17.4882954
+0.2920    695.5594874    109.1129441    17.3794864      5594874    109.1129441    17.3794864
+0.2930    690.4377121    108.3698900    17.2716016      4377121    108.3698900    17.2716016
+0.2940    685.3643785    107.6334922    17.1646306      3643785    107.6334922    17.1646306
+0.2950    680.3389114    106.9036741    17.0585633      3389114    106.9036741    17.0585633
+0.2960    675.3607438    106.1803600    16.9533894      3607438    106.1803600    16.9533894
+0.2970    670.4293171    105.4634755    16.8490991      4293171    105.4634755    16.8490991
+0.2980    665.5440809    104.7529472    16.7456826      5440809    104.7529472    16.7456826
+0.2990    660.7044927    104.0487027    16.6431300      7044927    104.0487027    16.6431300
+0.3000    655.9100179    103.3506708    16.5414320      9100179    103.3506708    16.5414320
+0.3010    651.1601295    102.6587812    16.4405792      1601295    102.6587812    16.4405792
+0.3020    646.4543081    101.9729644    16.3405621      4543081    101.9729644    16.3405621
+0.3030    641.7920419    101.2931523    16.2413718      7920419    101.2931523    16.2413718
+0.3040    637.1728262    100.6192774    16.1429992      1728262    100.6192774    16.1429992
+0.3050    632.5961636    99.9512734    16.0454353       .5961636    99.9512734    16.0454353
+0.3060    628.0615636    99.2890746    15.9486715       .0615636    99.2890746    15.9486715
+0.3070    623.5685430    98.6326167    15.8526991       .5685430    98.6326167    15.8526991
+0.3080    619.1166250    97.9818358    15.7575095       .1166250    97.9818358    15.7575095
+0.3090    614.7053397    97.3366692    15.6630943       .7053397    97.3366692    15.6630943
+0.3100    610.3342239    96.6970550    15.5694453       .3342239    96.6970550    15.5694453
+0.3110    606.0028208    96.0629321    15.4765543       .0028208    96.0629321    15.4765543
+0.3120    601.7106798    95.4342402    15.3844132       .7106798    95.4342402    15.3844132
+0.3130    597.4573568    94.8109200    15.2930139       .4573568    94.8109200    15.2930139
+0.3140    593.2424137    94.1929129    15.2023488       .2424137    94.1929129    15.2023488
+0.3150    589.0654185    93.5801610    15.1124099       .0654185    93.5801610    15.1124099
+0.3160    584.9259453    92.9726074    15.0231897       .9259453    92.9726074    15.0231897
+0.3170    580.8235739    92.3701958    14.9346807       .8235739    92.3701958    14.9346807
+0.3180    576.7578898    91.7728708    14.8468753       .7578898    91.7728708    14.8468753
+0.3190    572.7284843    91.1805775    14.7597662       .7284843    91.1805775    14.7597662
+0.3200    568.7349543    90.5932620    14.6733463       .7349543    90.5932620    14.6733463
+0.3210    564.7769020    90.0108711    14.5876082       .7769020    90.0108711    14.5876082
+0.3220    560.8539351    89.4333521    14.5025451       .8539351    89.4333521    14.5025451
+0.3230    556.9656667    88.8606532    14.4181498       .9656667    88.8606532    14.4181498
+0.3240    553.1117150    88.2927232    14.3344156       .1117150    88.2927232    14.3344156
+0.3250    549.2917033    87.7295116    14.2513356       .2917033    87.7295116    14.2513356
+0.3260    545.5052600    87.1709686    14.1689032       .5052600    87.1709686    14.1689032
+0.3270    541.7520185    86.6170450    14.0871117       .7520185    86.6170450    14.0871117
+0.3280    538.0316170    86.0676922    14.0059546       .0316170    86.0676922    14.0059546
+0.3290    534.3436986    85.5228624    13.9254255       .3436986    85.5228624    13.9254255
+0.3300    530.6879111    84.9825082    13.8455180       .6879111    84.9825082    13.8455180
+0.3310    527.0639069    84.4465831    13.7662258       .0639069    84.4465831    13.7662258
+0.3320    523.4713431    83.9150409    13.6875428       .4713431    83.9150409    13.6875428
+0.3330    519.9098812    83.3878361    13.6094628       .9098812    83.3878361    13.6094628
+0.3340    516.3791872    82.8649240    13.5319798       .3791872    82.8649240    13.5319798
+0.3350    512.8789315    82.3462602    13.4550878       .8789315    82.3462602    13.4550878
+0.3360    509.4087887    81.8318010    13.3787809       .4087887    81.8318010    13.3787809
+0.3370    505.9684377    81.3215032    13.3030534       .9684377    81.3215032    13.3030534
+0.3380    502.5575616    80.8153242    13.2278994       .5575616    80.8153242    13.2278994
+0.3390    499.1758475    80.3132218    13.1533134       .1758475    80.3132218    13.1533134
+0.3400    495.8229866    79.8151546    13.0792897       .8229866    79.8151546    13.0792897
+0.3410    492.4986741    79.3210816    13.0058227       .4986741    79.3210816    13.0058227
+0.3420    489.2026091    78.8309622    12.9329071       .2026091    78.8309622    12.9329071
+0.3430    485.9344945    78.3447564    12.8605374       .9344945    78.3447564    12.8605374
+0.3440    482.6940372    77.8624247    12.7887084       .6940372    77.8624247    12.7887084
+0.3450    479.4809474    77.3839282    12.7174147       .4809474    77.3839282    12.7174147
+0.3460    476.2949395    76.9092283    12.6466511       .2949395    76.9092283    12.6466511
+0.3470    473.1357312    76.4382870    12.5764126       .1357312    76.4382870    12.5764126
+0.3480    470.0030438    75.9710667    12.5066941       .0030438    75.9710667    12.5066941
+0.3490    466.8966023    75.5075304    12.4374905       .8966023    75.5075304    12.4374905
+0.3500    463.8161349    75.0476413    12.3687969       .8161349    75.0476413    12.3687969
+0.3510    460.7613734    74.5913633    12.3006084       .7613734    74.5913633    12.3006084
+0.3520    457.7320529    74.1386607    12.2329203       .7320529    74.1386607    12.2329203
+0.3530    454.7279118    73.6894981    12.1657276       .7279118    73.6894981    12.1657276
+0.3540    451.7486918    73.2438407    12.0990258       .7486918    73.2438407    12.0990258
+0.3550    448.7941378    72.8016540    12.0328101       .7941378    72.8016540    12.0328101
+0.3560    445.8639978    72.3629040    11.9670761       .8639978    72.3629040    11.9670761
+0.3570    442.9580230    71.9275570    11.9018190       .9580230    71.9275570    11.9018190
+0.3580    440.0759676    71.4955799    11.8370344       .0759676    71.4955799    11.8370344
+0.3590    437.2175888    71.0669398    11.7727180       .2175888    71.0669398    11.7727180
+0.3600    434.3826470    70.6416043    11.7088653       .3826470    70.6416043    11.7088653
+0.3610    431.5709052    70.2195415    11.6454719       .5709052    70.2195415    11.6454719
+0.3620    428.7821296    69.8007195    11.5825337       .7821296    69.8007195    11.5825337
+0.3630    426.0160891    69.3851072    11.5200463       .0160891    69.3851072    11.5200463
+0.3640    423.2725553    68.9726737    11.4580056       .2725553    68.9726737    11.4580056
+0.3650    420.5513029    68.5633884    11.3964074       .5513029    68.5633884    11.3964074
+0.3660    417.8521090    68.1572211    11.3352478       .8521090    68.1572211    11.3352478
+0.3670    415.1747536    67.7541421    11.2745225       .1747536    67.7541421    11.2745225
+0.3680    412.5190192    67.3541219    11.2142277       .5190192    67.3541219    11.2142277
+0.3690    409.8846910    66.9571314    11.1543594       .8846910    66.9571314    11.1543594
+0.3700    407.2715568    66.5631417    11.0949137       .2715568    66.5631417    11.0949137
+0.3710    404.6794069    66.1721246    11.0358867       .6794069    66.1721246    11.0358867
+0.3720    402.1080341    65.7840517    10.9772747       .1080341    65.7840517    10.9772747
+0.3730    399.5572337    65.3988954    10.9190739       .5572337    65.3988954    10.9190739
+0.3740    397.0268033    65.0166283    10.8612804       .0268033    65.0166283    10.8612804
+0.3750    394.5165432    64.6372231    10.8038908       .5165432    64.6372231    10.8038908
+0.3760    392.0262556    64.2606530    10.7469013       .0262556    64.2606530    10.7469013
+0.3770    389.5557456    63.8868916    10.6903083       .5557456    63.8868916    10.6903083
+0.3780    387.1048200    63.5159126    10.6341083       .1048200    63.5159126    10.6341083
+0.3790    384.6732884    63.1476901    10.5782978       .6732884    63.1476901    10.5782978
+0.3800    382.2609623    62.7821985    10.5228732       .2609623    62.7821985    10.5228732
+0.3810    379.8676555    62.4194124    10.4678312       .8676555    62.4194124    10.4678312
+0.3820    377.4931840    62.0593069    10.4131683       .4931840    62.0593069    10.4131683
+0.3830    375.1373659    61.7018572    10.3588812       .1373659    61.7018572    10.3588812
+0.3840    372.8000214    61.3470387    10.3049666       .8000214    61.3470387    10.3049666
+0.3850    370.4809729    60.9948274    10.2514211       .4809729    60.9948274    10.2514211
+0.3860    368.1800447    60.6451993    10.1982415       .1800447    60.6451993    10.1982415
+0.3870    365.8970633    60.2981307    10.1454247       .8970633    60.2981307    10.1454247
+0.3880    363.6318570    59.9535983    10.0929674       .6318570    59.9535983    10.0929674
+0.3890    361.3842561    59.6115790    10.0408664       .3842561    59.6115790    10.0408664
+0.3900    359.1540931    59.2720498    9.9891187        9.1540931    59.2720498    9.9891187
+0.3910    356.9412021    58.9349881    9.9377213        6.9412021    58.9349881    9.9377213
+0.3920    354.7454193    58.6003717    9.8866709        4.7454193    58.6003717    9.8866709
+0.3930    352.5665826    58.2681784    9.8359647        2.5665826    58.2681784    9.8359647
+0.3940    350.4045319    57.9383862    9.7855997        0.4045319    57.9383862    9.7855997
+0.3950    348.2591089    57.6109737    9.7355729        8.2591089    57.6109737    9.7355729
+0.3960    346.1301569    57.2859194    9.6858814        6.1301569    57.2859194    9.6858814
+0.3970    344.0175213    56.9632022    9.6365223        4.0175213    56.9632022    9.6365223
+0.3980    341.9210489    56.6428011    9.5874928        1.9210489    56.6428011    9.5874928
+0.3990    339.8405885    56.3246954    9.5387901        9.8405885    56.3246954    9.5387901
+0.4000    337.7759903    56.0088648    9.4904113        7.7759903    56.0088648    9.4904113
+0.4010    335.7271066    55.6952889    9.4423537        5.7271066    55.6952889    9.4423537
+0.4020    333.6937909    55.3839477    9.3946146        3.6937909    55.3839477    9.3946146
+0.4030    331.6758987    55.0748214    9.3471913        1.6758987    55.0748214    9.3471913
+0.4040    329.6732868    54.7678904    9.3000811        9.6732868    54.7678904    9.3000811
+0.4050    327.6858138    54.4631355    9.2532814        7.6858138    54.4631355    9.2532814
+0.4060    325.7133399    54.1605373    9.2067894        5.7133399    54.1605373    9.2067894
+0.4070    323.7557266    53.8600769    9.1606028        3.7557266    53.8600769    9.1606028
+0.4080    321.8128372    53.5617355    9.1147188        1.8128372    53.5617355    9.1147188
+0.4090    319.8845364    53.2654947    9.0691350        9.8845364    53.2654947    9.0691350
+0.4100    317.9706903    52.9713360    9.0238489        7.9706903    52.9713360    9.0238489
+0.4110    316.0711666    52.6792413    8.9788579        6.0711666    52.6792413    8.9788579
+0.4120    314.1858343    52.3891926    8.9341596        4.1858343    52.3891926    8.9341596
+0.4130    312.3145641    52.1011721    8.8897516        2.3145641    52.1011721    8.8897516
+0.4140    310.4572279    51.8151622    8.8456315        0.4572279    51.8151622    8.8456315
+0.4150    308.6136989    51.5311456    8.8017969        8.6136989    51.5311456    8.8017969
+0.4160    306.7838520    51.2491049    8.7582454        6.7838520    51.2491049    8.7582454
+0.4170    304.9675631    50.9690232    8.7149747        4.9675631    50.9690232    8.7149747
+0.4180    303.1647097    50.6908836    8.6719826        3.1647097    50.6908836    8.6719826
+0.4190    301.3751706    50.4146694    8.6292666        1.3751706    50.4146694    8.6292666
+0.4200    299.5988257    50.1403641    8.5868246        9.5988257    50.1403641    8.5868246
+0.4210    297.8355565    49.8679514    8.5446543        7.8355565    49.8679514    8.5446543
+0.4220    296.0852455    49.5974150    8.5027536        6.0852455    49.5974150    8.5027536
+0.4230    294.3477765    49.3287390    8.4611201        4.3477765    49.3287390    8.4611201
+0.4240    292.6230348    49.0619075    8.4197518        2.6230348    49.0619075    8.4197518
+0.4250    290.9109067    48.7969049    8.3786464        0.9109067    48.7969049    8.3786464
+0.4260    289.2112796    48.5337157    8.3378020        9.2112796    48.5337157    8.3378020
+0.4270    287.5240424    48.2723245    8.2972163        7.5240424    48.2723245    8.2972163
+0.4280    285.8490849    48.0127161    8.2568873        5.8490849    48.0127161    8.2568873
+0.4290    284.1862984    47.7548755    8.2168129        4.1862984    47.7548755    8.2168129
+0.4300    282.5355749    47.4987878    8.1769910        2.5355749    47.4987878    8.1769910
+0.4310    280.8968079    47.2444382    8.1374197        0.8968079    47.2444382    8.1374197
+0.4320    279.2698919    46.9918123    8.0980970        9.2698919    46.9918123    8.0980970
+0.4330    277.6547226    46.7408954    8.0590208        7.6547226    46.7408954    8.0590208
+0.4340    276.0511966    46.4916735    8.0201893        6.0511966    46.4916735    8.0201893
+0.4350    274.4592117    46.2441323    7.9816004        4.4592117    46.2441323    7.9816004
+0.4360    272.8786668    45.9982578    7.9432522        2.8786668    45.9982578    7.9432522
+0.4370    271.3094618    45.7540362    7.9051429        1.3094618    45.7540362    7.9051429
+0.4380    269.7514977    45.5114537    7.8672705        9.7514977    45.5114537    7.8672705
+0.4390    268.2046764    45.2704969    7.8296332        8.2046764    45.2704969    7.8296332
+0.4400    266.6689010    45.0311521    7.7922291        6.6689010    45.0311521    7.7922291
+0.4410    265.1440755    44.7934062    7.7550565        5.1440755    44.7934062    7.7550565
+0.4420    263.6301049    44.5572460    7.7181134        3.6301049    44.5572460    7.7181134
+0.4430    262.1268953    44.3226583    7.6813981        2.1268953    44.3226583    7.6813981
+0.4440    260.6343534    44.0896304    7.6449088        0.6343534    44.0896304    7.6449088
+0.4450    259.1523873    43.8581493    7.6086438        9.1523873    43.8581493    7.6086438
+0.4460    257.6809058    43.6282025    7.5726013        7.6809058    43.6282025    7.5726013
+0.4470    256.2198188    43.3997775    7.5367796        6.2198188    43.3997775    7.5367796
+0.4480    254.7690369    43.1728617    7.5011769        4.7690369    43.1728617    7.5011769
+0.4490    253.3284718    42.9474430    7.4657916        3.3284718    42.9474430    7.4657916
+0.4500    251.8980359    42.7235091    7.4306220        1.8980359    42.7235091    7.4306220
+0.4510    250.4776428    42.5010480    7.3956665        0.4776428    42.5010480    7.3956665
+0.4520    249.0672067    42.2800478    7.3609233        9.0672067    42.2800478    7.3609233
+0.4530    247.6666428    42.0604967    7.3263909        7.6666428    42.0604967    7.3263909
+0.4540    246.2758672    41.8423830    7.2920677        6.2758672    41.8423830    7.2920677
+0.4550    244.8947966    41.6256950    7.2579520        4.8947966    41.6256950    7.2579520
+0.4560    243.5233489    41.4104214    7.2240422        3.5233489    41.4104214    7.2240422
+0.4570    242.1614425    41.1965508    7.1903369        2.1614425    41.1965508    7.1903369
+0.4580    240.8089969    40.9840718    7.1568344        0.8089969    40.9840718    7.1568344
+0.4590    239.4659322    40.7729735    7.1235332        9.4659322    40.7729735    7.1235332
+0.4600    238.1321693    40.5632447    7.0904317        8.1321693    40.5632447    7.0904317
+0.4610    236.8076301    40.3548745    7.0575286        6.8076301    40.3548745    7.0575286
+0.4620    235.4922372    40.1478521    7.0248222        5.4922372    40.1478521    7.0248222
+0.4630    234.1859137    39.9421668    6.9923110        4.1859137    39.9421668    6.9923110
+0.4640    232.8885838    39.7378079    6.9599937        2.8885838    39.7378079    6.9599937
+0.4650    231.6001723    39.5347651    6.9278688        1.6001723    39.5347651    6.9278688
+0.4660    230.3206049    39.3330277    6.8959348        0.3206049    39.3330277    6.8959348
+0.4670    229.0498078    39.1325857    6.8641902        9.0498078    39.1325857    6.8641902
+0.4680    227.7877080    38.9334286    6.8326338        7.7877080    38.9334286    6.8326338
+0.4690    226.5342334    38.7355464    6.8012640        6.5342334    38.7355464    6.8012640
+0.4700    225.2893125    38.5389292    6.7700794        5.2893125    38.5389292    6.7700794
+0.4710    224.0528743    38.3435668    6.7390788        4.0528743    38.3435668    6.7390788
+0.4720    222.8248488    38.1494496    6.7082608        2.8248488    38.1494496    6.7082608
+0.4730    221.6051665    37.9565678    6.6776239        1.6051665    37.9565678    6.6776239
+0.4740    220.3937588    37.7649118    6.6471669        0.3937588    37.7649118    6.6471669
+0.4750    219.1905574    37.5744719    6.6168884        9.1905574    37.5744719    6.6168884
+0.4760    217.9954950    37.3852387    6.5867871        7.9954950    37.3852387    6.5867871
+0.4770    216.8085049    37.1972029    6.5568617        6.8085049    37.1972029    6.5568617
+0.4780    215.6295208    37.0103551    6.5271109        5.6295208    37.0103551    6.5271109
+0.4790    214.4584774    36.8246861    6.4975335        4.4584774    36.8246861    6.4975335
+0.4800    213.2953097    36.6401869    6.4681281        3.2953097    36.6401869    6.4681281
+0.4810    212.1399536    36.4568483    6.4388935        2.1399536    36.4568483    6.4388935
+0.4820    210.9923455    36.2746615    6.4098286        0.9923455    36.2746615    6.4098286
+0.4830    209.8524225    36.0936176    6.3809319        9.8524225    36.0936176    6.3809319
+0.4840    208.7201220    35.9137077    6.3522023        8.7201220    35.9137077    6.3522023
+0.4850    207.5953824    35.7349232    6.3236387        7.5953824    35.7349232    6.3236387
+0.4860    206.4781425    35.5572555    6.2952397        6.4781425    35.5572555    6.2952397
+0.4870    205.3683417    35.3806959    6.2670043        5.3683417    35.3806959    6.2670043
+0.4880    204.2659200    35.2052361    6.2389312        4.2659200    35.2052361    6.2389312
+0.4890    203.1708179    35.0308677    6.2110192        3.1708179    35.0308677    6.2110192
+0.4900    202.0829765    34.8575823    6.1832672        2.0829765    34.8575823    6.1832672
+0.4910    201.0023376    34.6853717    6.1556741        1.0023376    34.6853717    6.1556741
+0.4920    199.9288434    34.5142278    6.1282387        9.9288434    34.5142278    6.1282387
+0.4930    198.8624366    34.3441424    6.1009599        8.8624366    34.3441424    6.1009599
+0.4940    197.8030607    34.1751076    6.0738365        7.8030607    34.1751076    6.0738365
+0.4950    196.7506594    34.0071154    6.0468675        6.7506594    34.0071154    6.0468675
+0.4960    195.7051773    33.8401579    6.0200517        5.7051773    33.8401579    6.0200517
+0.4970    194.6665591    33.6742273    5.9933881        4.6665591    33.6742273    5.9933881
+0.4980    193.6347503    33.5093160    5.9668756        3.6347503    33.5093160    5.9668756
+0.4990    192.6096969    33.3454163    5.9405131        2.6096969    33.3454163    5.9405131
+0.5000    191.5913454    33.1825205    5.9142996        1.5913454    33.1825205    5.9142996
+0.5010    190.5796426    33.0206211    5.8882340        0.5796426    33.0206211    5.8882340
+0.5020    189.5745362    32.8597108    5.8623152        9.5745362    32.8597108    5.8623152
+0.5030    188.5759739    32.6997821    5.8365423        8.5759739    32.6997821    5.8365423
+0.5040    187.5839043    32.5408276    5.8109141        7.5839043    32.5408276    5.8109141
+0.5050    186.5982763    32.3828402    5.7854298        6.5982763    32.3828402    5.7854298
+0.5060    185.6190392    32.2258127    5.7600882        5.6190392    32.2258127    5.7600882
+0.5070    184.6461430    32.0697379    5.7348884        4.6461430    32.0697379    5.7348884
+0.5080    183.6795379    31.9146087    5.7098294        3.6795379    31.9146087    5.7098294
+0.5090    182.7191747    31.7604183    5.6849101        2.7191747    31.7604183    5.6849101
+0.5100    181.7650046    31.6071595    5.6601298        1.7650046    31.6071595    5.6601298
+0.5110    180.8169795    31.4548256    5.6354873        0.8169795    31.4548256    5.6354873
+0.5120    179.8750513    31.3034097    5.6109817        9.8750513    31.3034097    5.6109817
+0.5130    178.9391727    31.1529051    5.5866120        8.9391727    31.1529051    5.5866120
+0.5140    178.0092967    31.0033051    5.5623774        8.0092967    31.0033051    5.5623774
+0.5150    177.0853767    30.8546031    5.5382769        7.0853767    30.8546031    5.5382769
+0.5160    176.1673666    30.7067924    5.5143096        6.1673666    30.7067924    5.5143096
+0.5170    175.2552207    30.5598666    5.4904745        5.2552207    30.5598666    5.4904745
+0.5180    174.3488937    30.4138191    5.4667707        4.3488937    30.4138191    5.4667707
+0.5190    173.4483407    30.2686436    5.4431974        3.4483407    30.2686436    5.4431974
+0.5200    172.5535172    30.1243337    5.4197536        2.5535172    30.1243337    5.4197536
+0.5210    171.6643793    29.9808832    5.3964385        1.6643793    29.9808832    5.3964385
+0.5220    170.7808831    29.8382858    5.3732511        0.7808831    29.8382858    5.3732511
+0.5230    169.9029855    29.6965352    5.3501907        9.9029855    29.6965352    5.3501907
+0.5240    169.0306436    29.5556254    5.3272563        9.0306436    29.5556254    5.3272563
+0.5250    168.1638148    29.4155503    5.3044471        8.1638148    29.4155503    5.3044471
+0.5260    167.3024571    29.2763038    5.2817622        7.3024571    29.2763038    5.2817622
+0.5270    166.4465288    29.1378801    5.2592009        6.4465288    29.1378801    5.2592009
+0.5280    165.5959885    29.0002730    5.2367621        5.5959885    29.0002730    5.2367621
+0.5290    164.7507952    28.8634769    5.2144453        4.7507952    28.8634769    5.2144453
+0.5300    163.9109083    28.7274858    5.1922494        3.9109083    28.7274858    5.1922494
+0.5310    163.0762876    28.5922939    5.1701737        3.0762876    28.5922939    5.1701737
+0.5320    162.2468931    28.4578957    5.1482173        2.2468931    28.4578957    5.1482173
+0.5330    161.4226854    28.3242853    5.1263796        1.4226854    28.3242853    5.1263796
+0.5340    160.6036253    28.1914571    5.1046596        0.6036253    28.1914571    5.1046596
+0.5350    159.7896739    28.0594056    5.0830567        9.7896739    28.0594056    5.0830567
+0.5360    158.9807927    27.9281253    5.0615699        8.9807927    27.9281253    5.0615699
+0.5370    158.1769437    27.7976106    5.0401986        8.1769437    27.7976106    5.0401986
+0.5380    157.3780890    27.6678562    5.0189420        7.3780890    27.6678562    5.0189420
+0.5390    156.5841911    27.5388565    4.9977992        6.5841911    27.5388565    4.9977992
+0.5400    155.7952129    27.4106063    4.9767696        5.7952129    27.4106063    4.9767696
+0.5410    155.0111177    27.2831003    4.9558524        5.0111177    27.2831003    4.9558524
+0.5420    154.2318688    27.1563333    4.9350468        4.2318688    27.1563333    4.9350468
+0.5430    153.4574302    27.0302999    4.9143522        3.4574302    27.0302999    4.9143522
+0.5440    152.6877660    26.9049950    4.8937677        2.6877660    26.9049950    4.8937677
+0.5450    151.9228407    26.7804136    4.8732926        1.9228407    26.7804136    4.8732926
+0.5460    151.1626191    26.6565505    4.8529263        1.1626191    26.6565505    4.8529263
+0.5470    150.4070662    26.5334007    4.8326680        0.4070662    26.5334007    4.8326680
+0.5480    149.6561475    26.4109592    4.8125170        9.6561475    26.4109592    4.8125170
+0.5490    148.9098286    26.2892210    4.7924726        8.9098286    26.2892210    4.7924726
+0.5500    148.1680756    26.1681812    4.7725341        8.1680756    26.1681812    4.7725341
+0.5510    147.4308547    26.0478349    4.7527008        7.4308547    26.0478349    4.7527008
+0.5520    146.6981325    25.9281774    4.7329721        6.6981325    25.9281774    4.7329721
+0.5530    145.9698760    25.8092038    4.7133471        5.9698760    25.8092038    4.7133471
+0.5540    145.2460523    25.6909093    4.6938253        5.2460523    25.6909093    4.6938253
+0.5550    144.5266287    25.5732893    4.6744060        4.5266287    25.5732893    4.6744060
+0.5560    143.8115732    25.4563391    4.6550885        3.8115732    25.4563391    4.6550885
+0.5570    143.1008536    25.3400540    4.6358722        3.1008536    25.3400540    4.6358722
+0.5580    142.3944382    25.2244294    4.6167564        2.3944382    25.2244294    4.6167564
+0.5590    141.6922957    25.1094608    4.5977404        1.6922957    25.1094608    4.5977404
+0.5600    140.9943949    24.9951437    4.5788237        0.9943949    24.9951437    4.5788237
+0.5610    140.3007048    24.8814735    4.5600055        0.3007048    24.8814735    4.5600055
+0.5620    139.6111948    24.7684458    4.5412852        9.6111948    24.7684458    4.5412852
+0.5630    138.9258345    24.6560562    4.5226623        8.9258345    24.6560562    4.5226623
+0.5640    138.2445939    24.5443004    4.5041360        8.2445939    24.5443004    4.5041360
+0.5650    137.5674430    24.4331739    4.4857058        7.5674430    24.4331739    4.4857058
+0.5660    136.8943523    24.3226725    4.4673710        6.8943523    24.3226725    4.4673710
+0.5670    136.2252924    24.2127919    4.4491311        6.2252924    24.2127919    4.4491311
+0.5680    135.5602343    24.1035279    4.4309854        5.5602343    24.1035279    4.4309854
+0.5690    134.8991490    23.9948762    4.4129333        4.8991490    23.9948762    4.4129333
+0.5700    134.2420080    23.8868327    4.3949742        4.2420080    23.8868327    4.3949742
+0.5710    133.5887829    23.7793933    4.3771076        3.5887829    23.7793933    4.3771076
+0.5720    132.9394456    23.6725538    4.3593328        2.9394456    23.6725538    4.3593328
+0.5730    132.2939681    23.5663102    4.3416493        2.2939681    23.5663102    4.3416493
+0.5740    131.6523229    23.4606585    4.3240564        1.6523229    23.4606585    4.3240564
+0.5750    131.0144825    23.3555946    4.3065537        1.0144825    23.3555946    4.3065537
+0.5760    130.3804198    23.2511146    4.2891405        0.3804198    23.2511146    4.2891405
+0.5770    129.7501078    23.1472146    4.2718163        9.7501078    23.1472146    4.2718163
+0.5780    129.1235197    23.0438906    4.2545805        9.1235197    23.0438906    4.2545805
+0.5790    128.5006290    22.9411387    4.2374326        8.5006290    22.9411387    4.2374326
+0.5800    127.8814095    22.8389551    4.2203720        7.8814095    22.8389551    4.2203720
+0.5810    127.2658351    22.7373361    4.2033981        7.2658351    22.7373361    4.2033981
+0.5820    126.6538800    22.6362777    4.1865105        6.6538800    22.6362777    4.1865105
+0.5830    126.0455185    22.5357763    4.1697085        6.0455185    22.5357763    4.1697085
+0.5840    125.4407252    22.4358282    4.1529917        5.4407252    22.4358282    4.1529917
+0.5850    124.8394749    22.3364296    4.1363594        4.8394749    22.3364296    4.1363594
+0.5860    124.2417426    22.2375768    4.1198113        4.2417426    22.2375768    4.1198113
+0.5870    123.6475035    22.1392663    4.1033467        3.6475035    22.1392663    4.1033467
+0.5880    123.0567330    22.0414944    4.0869651        3.0567330    22.0414944    4.0869651
+0.5890    122.4694068    21.9442576    4.0706661        2.4694068    21.9442576    4.0706661
+0.5900    121.8855007    21.8475522    4.0544491        1.8855007    21.8475522    4.0544491
+0.5910    121.3049907    21.7513748    4.0383135        1.3049907    21.7513748    4.0383135
+0.5920    120.7278530    21.6557219    4.0222590        0.7278530    21.6557219    4.0222590
+0.5930    120.1540640    21.5605900    4.0062849        0.1540640    21.5605900    4.0062849
+0.5940    119.5836004    21.4659757    3.9903909        9.5836004    21.4659757    3.9903909
+0.5950    119.0164390    21.3718756    3.9745764        9.0164390    21.3718756    3.9745764
+0.5960    118.4525566    21.2782862    3.9588408        8.4525566    21.2782862    3.9588408
+0.5970    117.8919307    21.1852042    3.9431838        7.8919307    21.1852042    3.9431838
+0.5980    117.3345384    21.0926262    3.9276049        7.3345384    21.0926262    3.9276049
+0.5990    116.7803573    21.0005491    3.9121035        6.7803573    21.0005491    3.9121035
+0.6000    116.2293653    20.9089694    3.8966792        6.2293653    20.9089694    3.8966792
+0.6010    115.6815402    20.8178839    3.8813315        5.6815402    20.8178839    3.8813315
+0.6020    115.1368600    20.7272894    3.8660600        5.1368600    20.7272894    3.8660600
+0.6030    114.5953032    20.6371827    3.8508642        4.5953032    20.6371827    3.8508642
+0.6040    114.0568481    20.5475606    3.8357436        4.0568481    20.5475606    3.8357436
+0.6050    113.5214734    20.4584200    3.8206977        3.5214734    20.4584200    3.8206977
+0.6060    112.9891578    20.3697576    3.8057262        2.9891578    20.3697576    3.8057262
+0.6070    112.4598804    20.2815705    3.7908285        2.4598804    20.2815705    3.7908285
+0.6080    111.9336202    20.1938554    3.7760043        1.9336202    20.1938554    3.7760043
+0.6090    111.4103567    20.1066094    3.7612530        1.4103567    20.1066094    3.7612530
+0.6100    110.8900692    20.0198294    3.7465743        0.8900692    20.0198294    3.7465743
+0.6110    110.3727375    19.9335124    3.7319676        0.3727375    19.9335124    3.7319676
+0.6120    109.8583413    19.8476555    3.7174326        9.8583413    19.8476555    3.7174326
+0.6130    109.3468606    19.7622555    3.7029688        9.3468606    19.7622555    3.7029688
+0.6140    108.8382755    19.6773096    3.6885758        8.8382755    19.6773096    3.6885758
+0.6150    108.3325663    19.5928150    3.6742532        8.3325663    19.5928150    3.6742532
+0.6160    107.8297135    19.5087685    3.6600006        7.8297135    19.5087685    3.6600006
+0.6170    107.3296977    19.4251675    3.6458174        7.3296977    19.4251675    3.6458174
+0.6180    106.8324997    19.3420090    3.6317034        6.8324997    19.3420090    3.6317034
+0.6190    106.3381002    19.2592901    3.6176581        6.3381002    19.2592901    3.6176581
+0.6200    105.8464805    19.1770082    3.6036811        5.8464805    19.1770082    3.6036811
+0.6210    105.3576217    19.0951603    3.5897719        5.3576217    19.0951603    3.5897719
+0.6220    104.8715052    19.0137437    3.5759302        4.8715052    19.0137437    3.5759302
+0.6230    104.3881125    18.9327557    3.5621556        4.3881125    18.9327557    3.5621556
+0.6240    103.9074253    18.8521936    3.5484477        3.9074253    18.8521936    3.5484477
+0.6250    103.4294254    18.7720545    3.5348060        3.4294254    18.7720545    3.5348060
+0.6260    102.9540947    18.6923359    3.5212303        2.9540947    18.6923359    3.5212303
+0.6270    102.4814152    18.6130350    3.5077200        2.4814152    18.6130350    3.5077200
+0.6280    102.0113694    18.5341493    3.4942748        2.0113694    18.5341493    3.4942748
+0.6290    101.5439394    18.4556760    3.4808944        1.5439394    18.4556760    3.4808944
+0.6300    101.0791079    18.3776126    3.4675783        1.0791079    18.3776126    3.4675783
+0.6310    100.6168575    18.2999565    3.4543261        0.6168575    18.2999565    3.4543261
+0.6320    100.1571709    18.2227051    3.4411376        0.1571709    18.2227051    3.4411376
+0.6330    99.7000311    18.1458558    3.4280123     9.7000311    18.1458558    3.4280123
+0.6340    99.2454212    18.0694062    3.4149498     9.2454212    18.0694062    3.4149498
+0.6350    98.7933242    17.9933537    3.4019497     8.7933242    17.9933537    3.4019497
+0.6360    98.3437237    17.9176958    3.3890118     8.3437237    17.9176958    3.3890118
+0.6370    97.8966029    17.8424301    3.3761357     7.8966029    17.8424301    3.3761357
+0.6380    97.4519455    17.7675540    3.3633209     7.4519455    17.7675540    3.3633209
+0.6390    97.0097351    17.6930652    3.3505671     7.0097351    17.6930652    3.3505671
+0.6400    96.5699557    17.6189612    3.3378740     6.5699557    17.6189612    3.3378740
+0.6410    96.1325912    17.5452397    3.3252412     6.1325912    17.5452397    3.3252412
+0.6420    95.6976256    17.4718981    3.3126684     5.6976256    17.4718981    3.3126684
+0.6430    95.2650431    17.3989342    3.3001551     5.2650431    17.3989342    3.3001551
+0.6440    94.8348282    17.3263457    3.2877012     4.8348282    17.3263457    3.2877012
+0.6450    94.4069651    17.2541301    3.2753062     4.4069651    17.2541301    3.2753062
+0.6460    93.9814386    17.1822851    3.2629697     3.9814386    17.1822851    3.2629697
+0.6470    93.5582333    17.1108086    3.2506915     3.5582333    17.1108086    3.2506915
+0.6480    93.1373339    17.0396981    3.2384713     3.1373339    17.0396981    3.2384713
+0.6490    92.7187255    16.9689514    3.2263085     2.7187255    16.9689514    3.2263085
+0.6500    92.3023930    16.8985663    3.2142031     2.3023930    16.8985663    3.2142031
+0.6510    91.8883217    16.8285405    3.2021545     1.8883217    16.8285405    3.2021545
+0.6520    91.4764967    16.7588718    3.1901625     1.4764967    16.7588718    3.1901625
+0.6530    91.0669035    16.6895580    3.1782268     1.0669035    16.6895580    3.1782268
+0.6540    90.6595276    16.6205970    3.1663470     0.6595276    16.6205970    3.1663470
+0.6550    90.2543545    16.5519865    3.1545229     0.2543545    16.5519865    3.1545229
+0.6560    89.8513700    16.4837244    3.1427540     9.8513700    16.4837244    3.1427540
+0.6570    89.4505599    16.4158086    3.1310401     9.4505599    16.4158086    3.1310401
+0.6580    89.0519101    16.3482369    3.1193809     9.0519101    16.3482369    3.1193809
+0.6590    88.6554066    16.2810073    3.1077761     8.6554066    16.2810073    3.1077761
+0.6600    88.2610357    16.2141176    3.0962252     8.2610357    16.2141176    3.0962252
+0.6610    87.8687835    16.1475658    3.0847282     7.8687835    16.1475658    3.0847282
+0.6620    87.4786365    16.0813498    3.0732845     7.4786365    16.0813498    3.0732845
+0.6630    87.0905810    16.0154675    3.0618940     7.0905810    16.0154675    3.0618940
+0.6640    86.7046036    15.9499170    3.0505563     6.7046036    15.9499170    3.0505563
+0.6650    86.3206910    15.8846962    3.0392712     6.3206910    15.8846962    3.0392712
+0.6660    85.9388300    15.8198031    3.0280382     5.9388300    15.8198031    3.0280382
+0.6670    85.5590074    15.7552357    3.0168572     5.5590074    15.7552357    3.0168572
+0.6680    85.1812101    15.6909920    3.0057279     5.1812101    15.6909920    3.0057279
+0.6690    84.8054253    15.6270702    2.9946499     4.8054253    15.6270702    2.9946499
+0.6700    84.4316400    15.5634682    2.9836230     4.4316400    15.5634682    2.9836230
+0.6710    84.0598416    15.5001840    2.9726468     4.0598416    15.5001840    2.9726468
+0.6720    83.6900173    15.4372159    2.9617211     3.6900173    15.4372159    2.9617211
+0.6730    83.3221547    15.3745619    2.9508456     3.3221547    15.3745619    2.9508456
+0.6740    82.9562412    15.3122200    2.9400201     2.9562412    15.3122200    2.9400201
+0.6750    82.5922645    15.2501885    2.9292442     2.5922645    15.2501885    2.9292442
+0.6760    82.2302123    15.1884654    2.9185176     2.2302123    15.1884654    2.9185176
+0.6770    81.8700724    15.1270489    2.9078402     1.8700724    15.1270489    2.9078402
+0.6780    81.5118328    15.0659372    2.8972116     1.5118328    15.0659372    2.8972116
+0.6790    81.1554813    15.0051284    2.8866315     1.1554813    15.0051284    2.8866315
+0.6800    80.8010062    14.9446207    2.8760997     0.8010062    14.9446207    2.8760997
+0.6810    80.4483955    14.8844124    2.8656158     0.4483955    14.8844124    2.8656158
+0.6820    80.0976376    14.8245015    2.8551798     0.0976376    14.8245015    2.8551798
+0.6830    79.7487207    14.7648865    2.8447912     9.7487207    14.7648865    2.8447912
+0.6840    79.4016333    14.7055654    2.8344498     9.4016333    14.7055654    2.8344498
+0.6850    79.0563639    14.6465366    2.8241553     9.0563639    14.6465366    2.8241553
+0.6860    78.7129012    14.5877983    2.8139075     8.7129012    14.5877983    2.8139075
+0.6870    78.3712338    14.5293488    2.8037062     8.3712338    14.5293488    2.8037062
+0.6880    78.0313504    14.4711864    2.7935510     8.0313504    14.4711864    2.7935510
+0.6890    77.6932400    14.4133093    2.7834418     7.6932400    14.4133093    2.7834418
+0.6900    77.3568914    14.3557160    2.7733782     7.3568914    14.3557160    2.7733782
+0.6910    77.0222938    14.2984046    2.7633600     7.0222938    14.2984046    2.7633600
+0.6920    76.6894360    14.2413736    2.7533870     6.6894360    14.2413736    2.7533870
+0.6930    76.3583075    14.1846212    2.7434589     6.3583075    14.1846212    2.7434589
+0.6940    76.0288973    14.1281459    2.7335755     6.0288973    14.1281459    2.7335755
+0.6950    75.7011949    14.0719461    2.7237365     5.7011949    14.0719461    2.7237365
+0.6960    75.3751896    14.0160200    2.7139416     5.3751896    14.0160200    2.7139416
+0.6970    75.0508709    13.9603661    2.7041907     5.0508709    13.9603661    2.7041907
+0.6980    74.7282285    13.9049827    2.6944835     4.7282285    13.9049827    2.6944835
+0.6990    74.4072519    13.8498684    2.6848198     4.4072519    13.8498684    2.6848198
+0.7000    74.0879308    13.7950215    2.6751993     4.0879308    13.7950215    2.6751993
+0.7010    73.7702551    13.7404405    2.6656217     3.7702551    13.7404405    2.6656217
+0.7020    73.4542145    13.6861238    2.6560870     3.4542145    13.6861238    2.6560870
+0.7030    73.1397992    13.6320698    2.6465947     3.1397992    13.6320698    2.6465947
+0.7040    72.8269989    13.5782771    2.6371447     2.8269989    13.5782771    2.6371447
+0.7050    72.5158039    13.5247441    2.6277368     2.5158039    13.5247441    2.6277368
+0.7060    72.2062043    13.4714693    2.6183707     2.2062043    13.4714693    2.6183707
+0.7070    71.8981904    13.4184512    2.6090463     1.8981904    13.4184512    2.6090463
+0.7080    71.5917523    13.3656882    2.5997632     1.5917523    13.3656882    2.5997632
+0.7090    71.2868805    13.3131791    2.5905212     1.2868805    13.3131791    2.5905212
+0.7100    70.9835654    13.2609221    2.5813202     0.9835654    13.2609221    2.5813202
+0.7110    70.6817975    13.2089160    2.5721599     0.6817975    13.2089160    2.5721599
+0.7120    70.3815674    13.1571592    2.5630401     0.3815674    13.1571592    2.5630401
+0.7130    70.0828658    13.1056503    2.5539606     0.0828658    13.1056503    2.5539606
+0.7140    69.7856832    13.0543879    2.5449211     9.7856832    13.0543879    2.5449211
+0.7150    69.4900106    13.0033705    2.5359215     9.4900106    13.0033705    2.5359215
+0.7160    69.1958387    12.9525968    2.5269615     9.1958387    12.9525968    2.5269615
+0.7170    68.9031585    12.9020653    2.5180409     8.9031585    12.9020653    2.5180409
+0.7180    68.6119608    12.8517746    2.5091596     8.6119608    12.8517746    2.5091596
+0.7190    68.3222368    12.8017234    2.5003172     8.3222368    12.8017234    2.5003172
+0.7200    68.0339775    12.7519103    2.4915136     8.0339775    12.7519103    2.4915136
+0.7210    67.7471742    12.7023340    2.4827486     7.7471742    12.7023340    2.4827486
+0.7220    67.4618179    12.6529930    2.4740220     7.4618179    12.6529930    2.4740220
+0.7230    67.1779001    12.6038860    2.4653335     7.1779001    12.6038860    2.4653335
+0.7240    66.8954120    12.5550117    2.4566830     6.8954120    12.5550117    2.4566830
+0.7250    66.6143450    12.5063688    2.4480703     6.6143450    12.5063688    2.4480703
+0.7260    66.3346907    12.4579559    2.4394952     6.3346907    12.4579559    2.4394952
+0.7270    66.0564406    12.4097717    2.4309574     6.0564406    12.4097717    2.4309574
+0.7280    65.7795861    12.3618149    2.4224568     5.7795861    12.3618149    2.4224568
+0.7290    65.5041191    12.3140843    2.4139931     5.5041191    12.3140843    2.4139931
+0.7300    65.2300311    12.2665786    2.4055663     5.2300311    12.2665786    2.4055663
+0.7310    64.9573141    12.2192964    2.3971760     4.9573141    12.2192964    2.3971760
+0.7320    64.6859596    12.1722365    2.3888221     4.6859596    12.1722365    2.3888221
+0.7330    64.4159598    12.1253977    2.3805044     4.4159598    12.1253977    2.3805044
+0.7340    64.1473064    12.0787787    2.3722227     4.1473064    12.0787787    2.3722227
+0.7350    63.8799915    12.0323782    2.3639768     3.8799915    12.0323782    2.3639768
+0.7360    63.6140072    11.9861951    2.3557666     3.6140072    11.9861951    2.3557666
+0.7370    63.3493455    11.9402280    2.3475917     3.3493455    11.9402280    2.3475917
+0.7380    63.0859986    11.8944759    2.3394522     3.0859986    11.8944759    2.3394522
+0.7390    62.8239587    11.8489374    2.3313477     2.8239587    11.8489374    2.3313477
+0.7400    62.5632181    11.8036114    2.3232781     2.5632181    11.8036114    2.3232781
+0.7410    62.3037690    11.7584966    2.3152432     2.3037690    11.7584966    2.3152432
+0.7420    62.0456040    11.7135920    2.3072429     2.0456040    11.7135920    2.3072429
+0.7430    61.7887153    11.6688963    2.2992769     1.7887153    11.6688963    2.2992769
+0.7440    61.5330955    11.6244083    2.2913450     1.5330955    11.6244083    2.2913450
+0.7450    61.2787372    11.5801269    2.2834471     1.2787372    11.5801269    2.2834471
+0.7460    61.0256328    11.5360509    2.2755831     1.0256328    11.5360509    2.2755831
+0.7470    60.7737751    11.4921792    2.2677526     0.7737751    11.4921792    2.2677526
+0.7480    60.5231566    11.4485107    2.2599557     0.5231566    11.4485107    2.2599557
+0.7490    60.2737703    11.4050442    2.2521920     0.2737703    11.4050442    2.2521920
+0.7500    60.0256087    11.3617785    2.2444614     0.0256087    11.3617785    2.2444614
+0.7510    59.7786649    11.3187127    2.2367638     9.7786649    11.3187127    2.2367638
+0.7520    59.5329316    11.2758455    2.2290989     9.5329316    11.2758455    2.2290989
+0.7530    59.2884018    11.2331758    2.2214666     9.2884018    11.2331758    2.2214666
+0.7540    59.0450684    11.1907026    2.2138668     9.0450684    11.1907026    2.2138668
+0.7550    58.8029246    11.1484248    2.2062993     8.8029246    11.1484248    2.2062993
+0.7560    58.5619634    11.1063413    2.1987638     8.5619634    11.1063413    2.1987638
+0.7570    58.3221779    11.0644510    2.1912603     8.3221779    11.0644510    2.1912603
+0.7580    58.0835612    11.0227529    2.1837885     8.0835612    11.0227529    2.1837885
+0.7590    57.8461067    10.9812459    2.1763483     7.8461067    10.9812459    2.1763483
+0.7600    57.6098075    10.9399289    2.1689396     7.6098075    10.9399289    2.1689396
+0.7610    57.3746570    10.8988008    2.1615622     7.3746570    10.8988008    2.1615622
+0.7620    57.1406485    10.8578608    2.1542159     7.1406485    10.8578608    2.1542159
+0.7630    56.9077755    10.8171077    2.1469005     6.9077755    10.8171077    2.1469005
+0.7640    56.6760313    10.7765404    2.1396160     6.6760313    10.7765404    2.1396160
+0.7650    56.4454095    10.7361581    2.1323621     6.4454095    10.7361581    2.1323621
+0.7660    56.2159036    10.6959596    2.1251387     6.2159036    10.6959596    2.1251387
+0.7670    55.9875072    10.6559440    2.1179456     5.9875072    10.6559440    2.1179456
+0.7680    55.7602140    10.6161102    2.1107827     5.7602140    10.6161102    2.1107827
+0.7690    55.5340174    10.5764573    2.1036499     5.5340174    10.5764573    2.1036499
+0.7700    55.3089114    10.5369842    2.0965469     5.3089114    10.5369842    2.0965469
+0.7710    55.0848896    10.4976901    2.0894736     5.0848896    10.4976901    2.0894736
+0.7720    54.8619458    10.4585739    2.0824299     4.8619458    10.4585739    2.0824299
+0.7730    54.6400739    10.4196346    2.0754157     4.6400739    10.4196346    2.0754157
+0.7740    54.4192677    10.3808713    2.0684307     4.4192677    10.3808713    2.0684307
+0.7750    54.1995211    10.3422831    2.0614748     4.1995211    10.3422831    2.0614748
+0.7760    53.9808282    10.3038690    2.0545479     3.9808282    10.3038690    2.0545479
+0.7770    53.7631829    10.2656280    2.0476498     3.7631829    10.2656280    2.0476498
+0.7780    53.5465792    10.2275592    2.0407804     3.5465792    10.2275592    2.0407804
+0.7790    53.3310112    10.1896616    2.0339396     3.3310112    10.1896616    2.0339396
+0.7800    53.1164730    10.1519345    2.0271272     3.1164730    10.1519345    2.0271272
+0.7810    52.9029589    10.1143767    2.0203430     2.9029589    10.1143767    2.0203430
+0.7820    52.6904629    10.0769875    2.0135869     2.6904629    10.0769875    2.0135869
+0.7830    52.4789794    10.0397659    2.0068588     2.4789794    10.0397659    2.0068588
+0.7840    52.2685025    10.0027109    2.0001585     2.2685025    10.0027109    2.0001585
+0.7850    52.0590267    9.9658218    1.9934859      52.0590267    9.9658218    1.9934859
+0.7860    51.8505462    9.9290976    1.9868409      51.8505462    9.9290976    1.9868409
+0.7870    51.6430554    9.8925374    1.9802232      51.6430554    9.8925374    1.9802232
+0.7880    51.4365489    9.8561404    1.9736329      51.4365489    9.8561404    1.9736329
+0.7890    51.2310209    9.8199057    1.9670697      51.2310209    9.8199057    1.9670697
+0.7900    51.0264660    9.7838323    1.9605335      51.0264660    9.7838323    1.9605335
+0.7910    50.8228788    9.7479195    1.9540241      50.8228788    9.7479195    1.9540241
+0.7920    50.6202538    9.7121664    1.9475415      50.6202538    9.7121664    1.9475415
+0.7930    50.4185857    9.6765721    1.9410855      50.4185857    9.6765721    1.9410855
+0.7940    50.2178690    9.6411358    1.9346560      50.2178690    9.6411358    1.9346560
+0.7950    50.0180984    9.6058567    1.9282528      50.0180984    9.6058567    1.9282528
+0.7960    49.8192687    9.5707338    1.9218759      49.8192687    9.5707338    1.9218759
+0.7970    49.6213746    9.5357665    1.9155250      49.6213746    9.5357665    1.9155250
+0.7980    49.4244109    9.5009538    1.9092000      49.4244109    9.5009538    1.9092000
+0.7990    49.2283723    9.4662949    1.9029009      49.2283723    9.4662949    1.9029009
+0.8000    49.0332538    9.4317890    1.8966275      49.0332538    9.4317890    1.8966275
+0.8010    48.8390502    9.3974354    1.8903796      48.8390502    9.3974354    1.8903796
+0.8020    48.6457564    9.3632331    1.8841572      48.6457564    9.3632331    1.8841572
+0.8030    48.4533674    9.3291814    1.8779600      48.4533674    9.3291814    1.8779600
+0.8040    48.2618782    9.2952795    1.8717881      48.2618782    9.2952795    1.8717881
+0.8050    48.0712838    9.2615267    1.8656413      48.0712838    9.2615267    1.8656413
+0.8060    47.8815791    9.2279220    1.8595194      47.8815791    9.2279220    1.8595194
+0.8070    47.6927594    9.1944649    1.8534223      47.6927594    9.1944649    1.8534223
+0.8080    47.5048196    9.1611543    1.8473499      47.5048196    9.1611543    1.8473499
+0.8090    47.3177550    9.1279897    1.8413020      47.3177550    9.1279897    1.8413020
+0.8100    47.1315608    9.0949702    1.8352787      47.1315608    9.0949702    1.8352787
+0.8110    46.9462320    9.0620951    1.8292797      46.9462320    9.0620951    1.8292797
+0.8120    46.7617641    9.0293636    1.8233049      46.7617641    9.0293636    1.8233049
+0.8130    46.5781521    8.9967749    1.8173541      46.5781521    8.9967749    1.8173541
+0.8140    46.3953915    8.9643283    1.8114274      46.3953915    8.9643283    1.8114274
+0.8150    46.2134775    8.9320232    1.8055246      46.2134775    8.9320232    1.8055246
+0.8160    46.0324056    8.8998586    1.7996454      46.0324056    8.8998586    1.7996454
+0.8170    45.8521710    8.8678339    1.7937900      45.8521710    8.8678339    1.7937900
+0.8180    45.6727692    8.8359484    1.7879580      45.6727692    8.8359484    1.7879580
+0.8190    45.4941957    8.8042014    1.7821494      45.4941957    8.8042014    1.7821494
+0.8200    45.3164460    8.7725920    1.7763642      45.3164460    8.7725920    1.7763642
+0.8210    45.1395155    8.7411197    1.7706021      45.1395155    8.7411197    1.7706021
+0.8220    44.9633997    8.7097837    1.7648630      44.9633997    8.7097837    1.7648630
+0.8230    44.7880943    8.6785832    1.7591470      44.7880943    8.6785832    1.7591470
+0.8240    44.6135948    8.6475176    1.7534537      44.6135948    8.6475176    1.7534537
+0.8250    44.4398968    8.6165862    1.7477832      44.4398968    8.6165862    1.7477832
+0.8260    44.2669961    8.5857883    1.7421353      44.2669961    8.5857883    1.7421353
+0.8270    44.0948882    8.5551232    1.7365100      44.0948882    8.5551232    1.7365100
+0.8280    43.9235689    8.5245902    1.7309070      43.9235689    8.5245902    1.7309070
+0.8290    43.7530338    8.4941886    1.7253263      43.7530338    8.4941886    1.7253263
+0.8300    43.5832788    8.4639178    1.7197678      43.5832788    8.4639178    1.7197678
+0.8310    43.4142997    8.4337771    1.7142314      43.4142997    8.4337771    1.7142314
+0.8320    43.2460921    8.4037657    1.7087170      43.2460921    8.4037657    1.7087170
+0.8330    43.0786521    8.3738831    1.7032245      43.0786521    8.3738831    1.7032245
+0.8340    42.9119754    8.3441286    1.6977537      42.9119754    8.3441286    1.6977537
+0.8350    42.7460579    8.3145015    1.6923045      42.7460579    8.3145015    1.6923045
+0.8360    42.5808956    8.2850012    1.6868770      42.5808956    8.2850012    1.6868770
+0.8370    42.4164843    8.2556269    1.6814708      42.4164843    8.2556269    1.6814708
+0.8380    42.2528201    8.2263782    1.6760861      42.2528201    8.2263782    1.6760861
+0.8390    42.0898989    8.1972543    1.6707225      42.0898989    8.1972543    1.6707225
+0.8400    41.9277168    8.1682546    1.6653802      41.9277168    8.1682546    1.6653802
+0.8410    41.7662698    8.1393784    1.6600588      41.7662698    8.1393784    1.6600588
+0.8420    41.6055540    8.1106252    1.6547585      41.6055540    8.1106252    1.6547585
+0.8430    41.4455654    8.0819943    1.6494789      41.4455654    8.0819943    1.6494789
+0.8440    41.2863002    8.0534850    1.6442201      41.2863002    8.0534850    1.6442201
+0.8450    41.1277545    8.0250968    1.6389820      41.1277545    8.0250968    1.6389820
+0.8460    40.9699245    7.9968291    1.6337644      40.9699245    7.9968291    1.6337644
+0.8470    40.8128063    7.9686812    1.6285673      40.8128063    7.9686812    1.6285673
+0.8480    40.6563962    7.9406525    1.6233905      40.6563962    7.9406525    1.6233905
+0.8490    40.5006904    7.9127424    1.6182340      40.5006904    7.9127424    1.6182340
+0.8500    40.3456852    7.8849503    1.6130976      40.3456852    7.8849503    1.6130976
+0.8510    40.1913769    7.8572757    1.6079813      40.1913769    7.8572757    1.6079813
+0.8520    40.0377617    7.8297179    1.6028850      40.0377617    7.8297179    1.6028850
+0.8530    39.8848360    7.8022763    1.5978086      39.8848360    7.8022763    1.5978086
+0.8540    39.7325961    7.7749504    1.5927520      39.7325961    7.7749504    1.5927520
+0.8550    39.5810385    7.7477396    1.5877150      39.5810385    7.7477396    1.5877150
+0.8560    39.4301594    7.7206432    1.5826977      39.4301594    7.7206432    1.5826977
+0.8570    39.2799554    7.6936608    1.5776998      39.2799554    7.6936608    1.5776998
+0.8580    39.1304228    7.6667916    1.5727214      39.1304228    7.6667916    1.5727214
+0.8590    38.9815582    7.6400353    1.5677623      38.9815582    7.6400353    1.5677623
+0.8600    38.8333580    7.6133912    1.5628224      38.8333580    7.6133912    1.5628224
+0.8610    38.6858187    7.5868587    1.5579017      38.6858187    7.5868587    1.5579017
+0.8620    38.5389369    7.5604372    1.5530001      38.5389369    7.5604372    1.5530001
+0.8630    38.3927091    7.5341263    1.5481174      38.3927091    7.5341263    1.5481174
+0.8640    38.2471318    7.5079254    1.5432535      38.2471318    7.5079254    1.5432535
+0.8650    38.1022017    7.4818339    1.5384085      38.1022017    7.4818339    1.5384085
+0.8660    37.9579153    7.4558513    1.5335822      37.9579153    7.4558513    1.5335822
+0.8670    37.8142694    7.4299770    1.5287744      37.8142694    7.4299770    1.5287744
+0.8680    37.6712605    7.4042105    1.5239853      37.6712605    7.4042105    1.5239853
+0.8690    37.5288854    7.3785512    1.5192145      37.5288854    7.3785512    1.5192145
+0.8700    37.3871406    7.3529987    1.5144621      37.3871406    7.3529987    1.5144621
+0.8710    37.2460231    7.3275524    1.5097280      37.2460231    7.3275524    1.5097280
+0.8720    37.1055294    7.3022117    1.5050120      37.1055294    7.3022117    1.5050120
+0.8730    36.9656563    7.2769761    1.5003142      36.9656563    7.2769761    1.5003142
+0.8740    36.8264006    7.2518452    1.4956344      36.8264006    7.2518452    1.4956344
+0.8750    36.6877592    7.2268184    1.4909725      36.6877592    7.2268184    1.4909725
+0.8760    36.5497287    7.2018952    1.4863284      36.5497287    7.2018952    1.4863284
+0.8770    36.4123061    7.1770750    1.4817022      36.4123061    7.1770750    1.4817022
+0.8780    36.2754882    7.1523574    1.4770936      36.2754882    7.1523574    1.4770936
+0.8790    36.1392719    7.1277418    1.4725026      36.1392719    7.1277418    1.4725026
+0.8800    36.0036540    7.1032278    1.4679291      36.0036540    7.1032278    1.4679291
+0.8810    35.8686315    7.0788149    1.4633731      35.8686315    7.0788149    1.4633731
+0.8820    35.7342013    7.0545025    1.4588345      35.7342013    7.0545025    1.4588345
+0.8830    35.6003603    7.0302902    1.4543131      35.6003603    7.0302902    1.4543131
+0.8840    35.4671056    7.0061774    1.4498089      35.4671056    7.0061774    1.4498089
+0.8850    35.3344340    6.9821637    1.4453219      35.3344340    6.9821637    1.4453219
+0.8860    35.2023426    6.9582486    1.4408519      35.2023426    6.9582486    1.4408519
+0.8870    35.0708284    6.9344316    1.4363988      35.0708284    6.9344316    1.4363988
+0.8880    34.9398885    6.9107122    1.4319627      34.9398885    6.9107122    1.4319627
+0.8890    34.8095199    6.8870900    1.4275434      34.8095199    6.8870900    1.4275434
+0.8900    34.6797197    6.8635645    1.4231408      34.6797197    6.8635645    1.4231408
+0.8910    34.5504849    6.8401351    1.4187548      34.5504849    6.8401351    1.4187548
+0.8920    34.4218127    6.8168015    1.4143854      34.4218127    6.8168015    1.4143854
+0.8930    34.2937002    6.7935632    1.4100326      34.2937002    6.7935632    1.4100326
+0.8940    34.1661445    6.7704197    1.4056962      34.1661445    6.7704197    1.4056962
+0.8950    34.0391429    6.7473705    1.4013761      34.0391429    6.7473705    1.4013761
+0.8960    33.9126924    6.7244151    1.3970723      33.9126924    6.7244151    1.3970723
+0.8970    33.7867903    6.7015532    1.3927847      33.7867903    6.7015532    1.3927847
+0.8980    33.6614338    6.6787843    1.3885133      33.6614338    6.6787843    1.3885133
+0.8990    33.5366200    6.6561079    1.3842579      33.5366200    6.6561079    1.3842579
+0.9000    33.4123463    6.6335236    1.3800185      33.4123463    6.6335236    1.3800185
+0.9010    33.2886100    6.6110309    1.3757950      33.2886100    6.6110309    1.3757950
+0.9020    33.1654082    6.5886293    1.3715874      33.1654082    6.5886293    1.3715874
+0.9030    33.0427382    6.5663185    1.3673955      33.0427382    6.5663185    1.3673955
+0.9040    32.9205975    6.5440981    1.3632194      32.9205975    6.5440981    1.3632194
+0.9050    32.7989833    6.5219674    1.3590588      32.7989833    6.5219674    1.3590588
+0.9060    32.6778929    6.4999262    1.3549139      32.6778929    6.4999262    1.3549139
+0.9070    32.5573237    6.4779741    1.3507844      32.5573237    6.4779741    1.3507844
+0.9080    32.4372731    6.4561104    1.3466703      32.4372731    6.4561104    1.3466703
+0.9090    32.3177384    6.4343350    1.3425716      32.3177384    6.4343350    1.3425716
+0.9100    32.1987171    6.4126472    1.3384881      32.1987171    6.4126472    1.3384881
+0.9110    32.0802066    6.3910468    1.3344199      32.0802066    6.3910468    1.3344199
+0.9120    31.9622042    6.3695332    1.3303668      31.9622042    6.3695332    1.3303668
+0.9130    31.8447076    6.3481061    1.3263288      31.8447076    6.3481061    1.3263288
+0.9140    31.7277140    6.3267651    1.3223058      31.7277140    6.3267651    1.3223058
+0.9150    31.6112211    6.3055096    1.3182978      31.6112211    6.3055096    1.3182978
+0.9160    31.4952262    6.2843395    1.3143046      31.4952262    6.2843395    1.3143046
+0.9170    31.3797269    6.2632541    1.3103262      31.3797269    6.2632541    1.3103262
+0.9180    31.2647207    6.2422532    1.3063625      31.2647207    6.2422532    1.3063625
+0.9190    31.1502052    6.2213362    1.3024136      31.1502052    6.2213362    1.3024136
+0.9200    31.0361779    6.2005029    1.2984792      31.0361779    6.2005029    1.2984792
+0.9210    30.9226363    6.1797528    1.2945594      30.9226363    6.1797528    1.2945594
+0.9220    30.8095781    6.1590855    1.2906541      30.8095781    6.1590855    1.2906541
+0.9230    30.6970008    6.1385007    1.2867632      30.6970008    6.1385007    1.2867632
+0.9240    30.5849021    6.1179979    1.2828866      30.5849021    6.1179979    1.2828866
+0.9250    30.4732795    6.0975767    1.2790243      30.4732795    6.0975767    1.2790243
+0.9260    30.3621308    6.0772368    1.2751763      30.3621308    6.0772368    1.2751763
+0.9270    30.2514534    6.0569777    1.2713424      30.2514534    6.0569777    1.2713424
+0.9280    30.1412452    6.0367991    1.2675226      30.1412452    6.0367991    1.2675226
+0.9290    30.0315037    6.0167007    1.2637168      30.0315037    6.0167007    1.2637168
+0.9300    29.9222268    5.9966820    1.2599250      29.9222268    5.9966820    1.2599250
+0.9310    29.8134120    5.9767426    1.2561471      29.8134120    5.9767426    1.2561471
+0.9320    29.7050570    5.9568822    1.2523831      29.7050570    5.9568822    1.2523831
+0.9330    29.5971597    5.9371004    1.2486328      29.5971597    5.9371004    1.2486328
+0.9340    29.4897178    5.9173969    1.2448963      29.4897178    5.9173969    1.2448963
+0.9350    29.3827290    5.8977712    1.2411735      29.3827290    5.8977712    1.2411735
+0.9360    29.2761910    5.8782230    1.2374642      29.2761910    5.8782230    1.2374642
+0.9370    29.1701017    5.8587520    1.2337685      29.1701017    5.8587520    1.2337685
+0.9380    29.0644589    5.8393577    1.2300863      29.0644589    5.8393577    1.2300863
+0.9390    28.9592603    5.8200398    1.2264175      28.9592603    5.8200398    1.2264175
+0.9400    28.8545038    5.8007980    1.2227621      28.8545038    5.8007980    1.2227621
+0.9410    28.7501872    5.7816319    1.2191200      28.7501872    5.7816319    1.2191200
+0.9420    28.6463084    5.7625412    1.2154912      28.6463084    5.7625412    1.2154912
+0.9430    28.5428651    5.7435254    1.2118755      28.5428651    5.7435254    1.2118755
+0.9440    28.4398554    5.7245843    1.2082730      28.4398554    5.7245843    1.2082730
+0.9450    28.3372770    5.7057175    1.2046836      28.3372770    5.7057175    1.2046836
+0.9460    28.2351278    5.6869246    1.2011071      28.2351278    5.6869246    1.2011071
+0.9470    28.1334058    5.6682053    1.1975437      28.1334058    5.6682053    1.1975437
+0.9480    28.0321089    5.6495593    1.1939931      28.0321089    5.6495593    1.1939931
+0.9490    27.9312350    5.6309862    1.1904554      27.9312350    5.6309862    1.1904554
+0.9500    27.8307820    5.6124856    1.1869305      27.8307820    5.6124856    1.1869305
+0.9510    27.7307479    5.5940574    1.1834183      27.7307479    5.5940574    1.1834183
+0.9520    27.6311306    5.5757010    1.1799189      27.6311306    5.5757010    1.1799189
+0.9530    27.5319282    5.5574162    1.1764320      27.5319282    5.5574162    1.1764320
+0.9540    27.4331386    5.5392026    1.1729577      27.4331386    5.5392026    1.1729577
+0.9550    27.3347598    5.5210600    1.1694960      27.3347598    5.5210600    1.1694960
+0.9560    27.2367898    5.5029879    1.1660467      27.2367898    5.5029879    1.1660467
+0.9570    27.1392266    5.4849861    1.1626098      27.1392266    5.4849861    1.1626098
+0.9580    27.0420683    5.4670542    1.1591853      27.0420683    5.4670542    1.1591853
+0.9590    26.9453130    5.4491919    1.1557730      26.9453130    5.4491919    1.1557730
+0.9600    26.8489586    5.4313990    1.1523731      26.8489586    5.4313990    1.1523731
+0.9610    26.7530032    5.4136749    1.1489853      26.7530032    5.4136749    1.1489853
+0.9620    26.6574449    5.3960196    1.1456096      26.6574449    5.3960196    1.1456096
+0.9630    26.5622819    5.3784326    1.1422461      26.5622819    5.3784326    1.1422461
+0.9640    26.4675121    5.3609136    1.1388946      26.4675121    5.3609136    1.1388946
+0.9650    26.3731337    5.3434623    1.1355551      26.3731337    5.3434623    1.1355551
+0.9660    26.2791448    5.3260784    1.1322275      26.2791448    5.3260784    1.1322275
+0.9670    26.1855436    5.3087616    1.1289118      26.1855436    5.3087616    1.1289118
+0.9680    26.0923281    5.2915115    1.1256080      26.0923281    5.2915115    1.1256080
+0.9690    25.9994966    5.2743280    1.1223159      25.9994966    5.2743280    1.1223159
+0.9700    25.9070472    5.2572106    1.1190356      25.9070472    5.2572106    1.1190356
+0.9710    25.8149780    5.2401591    1.1157669      25.8149780    5.2401591    1.1157669
+0.9720    25.7232873    5.2231731    1.1125099      25.7232873    5.2231731    1.1125099
+0.9730    25.6319732    5.2062525    1.1092644      25.6319732    5.2062525    1.1092644
+0.9740    25.5410340    5.1893967    1.1060305      25.5410340    5.1893967    1.1060305
+0.9750    25.4504678    5.1726057    1.1028081      25.4504678    5.1726057    1.1028081
+0.9760    25.3602728    5.1558791    1.0995971      25.3602728    5.1558791    1.0995971
+0.9770    25.2704474    5.1392165    1.0963975      25.2704474    5.1392165    1.0963975
+0.9780    25.1809897    5.1226177    1.0932092      25.1809897    5.1226177    1.0932092
+0.9790    25.0918980    5.1060825    1.0900323      25.0918980    5.1060825    1.0900323
+0.9800    25.0031705    5.0896104    1.0868665      25.0031705    5.0896104    1.0868665
+0.9810    24.9148055    5.0732013    1.0837120      24.9148055    5.0732013    1.0837120
+0.9820    24.8268013    5.0568548    1.0805686      24.8268013    5.0568548    1.0805686
+0.9830    24.7391563    5.0405707    1.0774363      24.7391563    5.0405707    1.0774363
+0.9840    24.6518686    5.0243487    1.0743150      24.6518686    5.0243487    1.0743150
+0.9850    24.5649365    5.0081885    1.0712048      24.5649365    5.0081885    1.0712048
+0.9860    24.4783585    4.9920898    1.0681055      24.4783585    4.9920898    1.0681055
+0.9870    24.3921328    4.9760523    1.0650171      24.3921328    4.9760523    1.0650171
+0.9880    24.3062578    4.9600758    1.0619396      24.3062578    4.9600758    1.0619396
+0.9890    24.2207318    4.9441600    1.0588729      24.2207318    4.9441600    1.0588729
+0.9900    24.1355532    4.9283046    1.0558169      24.1355532    4.9283046    1.0558169
+0.9910    24.0507203    4.9125093    1.0527717      24.0507203    4.9125093    1.0527717
+0.9920    23.9662314    4.8967740    1.0497372      23.9662314    4.8967740    1.0497372
+0.9930    23.8820851    4.8810982    1.0467134      23.8820851    4.8810982    1.0467134
+0.9940    23.7982796    4.8654818    1.0437001      23.7982796    4.8654818    1.0437001
+0.9950    23.7148134    4.8499244    1.0406973      23.7148134    4.8499244    1.0406973
+0.9960    23.6316848    4.8344259    1.0377051      23.6316848    4.8344259    1.0377051
+0.9970    23.5488924    4.8189859    1.0347233      23.5488924    4.8189859    1.0347233
+0.9980    23.4664344    4.8036042    1.0317519      23.4664344    4.8036042    1.0317519
+0.9990    23.3843094    4.7882805    1.0287909      23.3843094    4.7882805    1.0287909
+1.0000    23.3025158    4.7730145    1.0258403      23.3025158    4.7730145    1.0258403
diff --git a/source/tests/pt/model/water/lkf.json b/source/tests/pt/model/water/lkf.json
new file mode 100644
index 0000000000..4385d02136
--- /dev/null
+++ b/source/tests/pt/model/water/lkf.json
@@ -0,0 +1,79 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        25,
+        25
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 3,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "opt_type": "LKF",
+    "kf_blocksize": 1024,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/source/tests/pt/model/water/multitask.json b/source/tests/pt/model/water/multitask.json
new file mode 100644
index 0000000000..2f706e4cd9
--- /dev/null
+++ b/source/tests/pt/model/water/multitask.json
@@ -0,0 +1,141 @@
+{
+  "model": {
+    "shared_dict": {
+      "my_type_map": [
+        "O",
+        "H",
+        "B"
+      ],
+      "my_descriptor": {
+        "type": "se_e2_a",
+        "sel": [
+          46,
+          92
+        ],
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "_comment": "that's all"
+    },
+    "model_dict": {
+      "model_1": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        },
+        "data_stat_nbatch": 1
+      },
+      "model_2": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        },
+        "data_stat_nbatch": 1
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "decay_rate": 0.98,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "_comment": " that's all",
+    "model_1": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    },
+    "model_2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
+  "training": {
+    "model_prob": {
+      "model_1": 0.5,
+      "model_2": 0.5
+    },
+    "data_dict": {
+      "model_1": {
+        "stat_file": "./stat_files/model_1",
+        "training_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      },
+      "model_2": {
+        "stat_file": "./stat_files/model_2",
+        "training_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      }
+    },
+    "numb_steps": 100000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 100,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pt/model/water/se_atten.json b/source/tests/pt/model/water/se_atten.json
new file mode 100644
index 0000000000..6b6fca50d3
--- /dev/null
+++ b/source/tests/pt/model/water/se_atten.json
@@ -0,0 +1,86 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 40,
+      "rcut_smth": 0.5,
+      "rcut": 4.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "attn": 64,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false,
+      "post_ln": true,
+      "ffn": false,
+      "ffn_embed_dim": 512,
+      "activation_function": "tanh",
+      "scaling_factor": 1.0,
+      "head_num": 1,
+      "normalize": false,
+      "temperature": 1.0
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model",
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pt/model/water/se_e2_a.json b/source/tests/pt/model/water/se_e2_a.json
new file mode 100644
index 0000000000..425ca3cbf5
--- /dev/null
+++ b/source/tests/pt/model/water/se_e2_a.json
@@ -0,0 +1,77 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "training_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pt/water/data/data_0"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 100000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 10000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/source/tests/pt/model/water/zbl.json b/source/tests/pt/model/water/zbl.json
new file mode 100644
index 0000000000..cb5602d92d
--- /dev/null
+++ b/source/tests/pt/model/water/zbl.json
@@ -0,0 +1,92 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "use_srtab": "H2O_tab_potential.txt",
+    "smin_alpha": 0.1,
+    "sw_rmin": 0.8,
+    "sw_rmax": 1.0,
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment5": "that's all"
+  },
+
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment6": " that's all"
+  },
+
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment7": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment8": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment9": "that's all"
+  },
+
+  "_comment10": "that's all"
+}
diff --git a/source/tests/pt/requirements.txt b/source/tests/pt/requirements.txt
new file mode 100644
index 0000000000..74abad719e
--- /dev/null
+++ b/source/tests/pt/requirements.txt
@@ -0,0 +1,6 @@
+tensorflow>=2.14.0
+deepmd-kit>=2.2.7
+dpdata
+ase
+coverage
+pytest
diff --git a/source/tests/pt/test_LKF.py b/source/tests/pt/test_LKF.py
new file mode 100644
index 0000000000..33aeac7f4f
--- /dev/null
+++ b/source/tests/pt/test_LKF.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+from deepmd.pt.entrypoints.main import (
+    main,
+)
+
+
+class TestLKF(unittest.TestCase):
+    def test_lkf(self):
+        with open(str(Path(__file__).parent / "water/lkf.json")) as fin:
+            content = fin.read()
+        self.config = json.loads(content)
+        self.config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/data_0")
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/data_0")
+        ]
+        self.input_json = "test_lkf.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+        main(["train", self.input_json])
+
+    def tearDown(self):
+        os.remove(self.input_json)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_auto_batch_size.py b/source/tests/pt/test_auto_batch_size.py
new file mode 100644
index 0000000000..71194e001e
--- /dev/null
+++ b/source/tests/pt/test_auto_batch_size.py
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.pt.utils.auto_batch_size import (
+    AutoBatchSize,
+)
+
+
+class TestAutoBatchSize(unittest.TestCase):
+    def test_execute_all(self):
+        dd0 = np.zeros((10000, 2, 1, 3, 4))
+        dd1 = np.ones((10000, 2, 1, 3, 4))
+        auto_batch_size = AutoBatchSize(256, 2.0)
+
+        def func(dd1):
+            return np.zeros_like(dd1), np.ones_like(dd1)
+
+        dd2 = auto_batch_size.execute_all(func, 10000, 2, dd1)
+        np.testing.assert_equal(dd0, dd2[0])
+        np.testing.assert_equal(dd1, dd2[1])
+
+    def test_execute_all_dict(self):
+        dd0 = np.zeros((10000, 2, 1, 3, 4))
+        dd1 = np.ones((10000, 2, 1, 3, 4))
+        auto_batch_size = AutoBatchSize(256, 2.0)
+
+        def func(dd1):
+            return {
+                "foo": np.zeros_like(dd1),
+                "bar": np.ones_like(dd1),
+            }
+
+        dd2 = auto_batch_size.execute_all(func, 10000, 2, dd1)
+        np.testing.assert_equal(dd0, dd2["foo"])
+        np.testing.assert_equal(dd1, dd2["bar"])
diff --git a/source/tests/pt/test_calculator.py b/source/tests/pt/test_calculator.py
new file mode 100644
index 0000000000..52b4b6cbbe
--- /dev/null
+++ b/source/tests/pt/test_calculator.py
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import torch
+
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pt.utils.ase_calc import (
+    DPCalculator,
+)
+
+dtype = torch.float64
+
+
+class TestCalculator(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        with torch.device("cpu"):
+            input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        _, _, more_loss = trainer.wrapper(**input_dict, label=label_dict, cur_lr=1.0)
+
+        self.calculator = DPCalculator("model.pt")
+
+    def test_calculator(self):
+        from ase import (
+            Atoms,
+        )
+
+        natoms = 5
+        cell = torch.eye(3, dtype=dtype, device="cpu") * 10
+        coord = torch.rand([natoms, 3], dtype=dtype, device="cpu")
+        coord = torch.matmul(coord, cell)
+        atype = torch.IntTensor([0, 0, 0, 1, 1])
+        atomic_numbers = [1, 1, 1, 8, 8]
+        idx_perm = [1, 0, 4, 3, 2]
+
+        prec = 1e-10
+        low_prec = 1e-4
+
+        ase_atoms0 = Atoms(
+            numbers=atomic_numbers,
+            positions=coord,
+            # positions=[tuple(item) for item in coordinate],
+            cell=cell,
+            calculator=self.calculator,
+            pbc=True,
+        )
+        e0, f0 = ase_atoms0.get_potential_energy(), ase_atoms0.get_forces()
+        s0, v0 = (
+            ase_atoms0.get_stress(voigt=True),
+            -ase_atoms0.get_stress(voigt=False) * ase_atoms0.get_volume(),
+        )
+
+        ase_atoms1 = Atoms(
+            numbers=[atomic_numbers[i] for i in idx_perm],
+            positions=coord[idx_perm, :],
+            # positions=[tuple(item) for item in coordinate],
+            cell=cell,
+            calculator=self.calculator,
+            pbc=True,
+        )
+        e1, f1 = ase_atoms1.get_potential_energy(), ase_atoms1.get_forces()
+        s1, v1 = (
+            ase_atoms1.get_stress(voigt=True),
+            -ase_atoms1.get_stress(voigt=False) * ase_atoms1.get_volume(),
+        )
+
+        assert isinstance(e0, float)
+        assert f0.shape == (natoms, 3)
+        assert v0.shape == (3, 3)
+        np.testing.assert_allclose(e0, e1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(f0[idx_perm, :], f1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(s0, s1, rtol=low_prec, atol=prec)
+        np.testing.assert_allclose(v0, v1, rtol=low_prec, atol=prec)
diff --git a/source/tests/pt/test_dp_test.py b/source/tests/pt/test_dp_test.py
new file mode 100644
index 0000000000..271b8f1082
--- /dev/null
+++ b/source/tests/pt/test_dp_test.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import torch
+
+from deepmd.entrypoints.test import test as dp_test
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .model.test_permutation import (
+    model_se_e2_a,
+    model_spin,
+)
+
+
+class DPTest:
+    def test_dp_test_1_frame(self):
+        trainer = get_trainer(deepcopy(self.config))
+        with torch.device("cpu"):
+            input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        has_spin = getattr(trainer.model, "has_spin", False)
+        if callable(has_spin):
+            has_spin = has_spin()
+        if not has_spin:
+            input_dict.pop("spin", None)
+        input_dict["do_atomic_virial"] = True
+        result = trainer.model(**input_dict)
+        model = torch.jit.script(trainer.model)
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
+        torch.jit.save(model, tmp_model.name)
+        dp_test(
+            model=tmp_model.name,
+            system=self.config["training"]["validation_data"]["systems"][0],
+            datafile=None,
+            set_prefix="set",
+            numb_test=0,
+            rand_seed=None,
+            shuffle_test=False,
+            detail_file=self.detail_file,
+            atomic=False,
+        )
+        os.unlink(tmp_model.name)
+        natom = input_dict["atype"].shape[1]
+        pred_e = np.loadtxt(self.detail_file + ".e.out", ndmin=2)[0, 1]
+        np.testing.assert_almost_equal(
+            pred_e,
+            to_numpy_array(result["energy"])[0][0],
+        )
+        pred_e_peratom = np.loadtxt(self.detail_file + ".e_peratom.out", ndmin=2)[0, 1]
+        np.testing.assert_almost_equal(pred_e_peratom, pred_e / natom)
+        if not has_spin:
+            pred_f = np.loadtxt(self.detail_file + ".f.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_f,
+                to_numpy_array(result["force"]).reshape(-1, 3),
+            )
+            pred_v = np.loadtxt(self.detail_file + ".v.out", ndmin=2)[:, 9:18]
+            np.testing.assert_almost_equal(
+                pred_v,
+                to_numpy_array(result["virial"]),
+            )
+            pred_v_peratom = np.loadtxt(self.detail_file + ".v_peratom.out", ndmin=2)[
+                :, 9:18
+            ]
+            np.testing.assert_almost_equal(pred_v_peratom, pred_v / natom)
+        else:
+            pred_fr = np.loadtxt(self.detail_file + ".fr.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_fr,
+                to_numpy_array(result["force"]).reshape(-1, 3),
+            )
+            pred_fm = np.loadtxt(self.detail_file + ".fm.out", ndmin=2)[:, 3:6]
+            np.testing.assert_almost_equal(
+                pred_fm,
+                to_numpy_array(
+                    result["force_mag"][result["mask_mag"].bool().squeeze(-1)]
+                ).reshape(-1, 3),
+            )
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f.startswith(self.detail_file):
+                os.remove(f)
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestDPTestSeA(DPTest, unittest.TestCase):
+    def setUp(self):
+        self.detail_file = "test_dp_test_ener_detail"
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "water/data/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+
+class TestDPTestSeASpin(DPTest, unittest.TestCase):
+    def setUp(self):
+        self.detail_file = "test_dp_test_ener_spin_detail"
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        data_file = [str(Path(__file__).parent / "NiO/data/single")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_spin)
+        self.config["model"]["type_map"] = ["Ni", "O", "B"]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_finetune.py b/source/tests/pt/test_finetune.py
new file mode 100644
index 0000000000..79f8c57cb8
--- /dev/null
+++ b/source/tests/pt/test_finetune.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import tempfile
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import torch
+
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .model.test_permutation import (
+    model_dpa2,
+    model_se_e2_a,
+    model_zbl,
+)
+from .test_stat import (
+    energy_data_requirement,
+)
+
+
+class FinetuneTest:
+    def test_finetune_change_out_bias(self):
+        # get model
+        model = get_model(self.model_config)
+        fitting_net = model.get_fitting_net()
+        fitting_net["bias_atom_e"] = torch.rand_like(fitting_net["bias_atom_e"])
+        energy_bias_before = deepcopy(
+            to_numpy_array(fitting_net["bias_atom_e"]).reshape(-1)
+        )
+
+        # prepare original model for test
+        dp = torch.jit.script(model)
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
+        torch.jit.save(dp, tmp_model.name)
+        dp = DeepEval(tmp_model.name)
+        origin_type_map = ["O", "H"]
+        full_type_map = ["O", "H", "B"]
+
+        # change energy bias
+        model.atomic_model.change_out_bias(
+            self.sampled,
+            bias_adjust_mode="change-by-statistic",
+            origin_type_map=origin_type_map,
+            full_type_map=full_type_map,
+        )
+        energy_bias_after = deepcopy(
+            to_numpy_array(fitting_net["bias_atom_e"]).reshape(-1)
+        )
+
+        # get ground-truth energy bias change
+        sorter = np.argsort(full_type_map)
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
+        ntest = 1
+        atom_nums = np.tile(
+            np.bincount(to_numpy_array(self.sampled[0]["atype"][0]))[idx_type_map],
+            (ntest, 1),
+        )
+        energy = dp.eval(
+            to_numpy_array(self.sampled[0]["coord"][:ntest]),
+            to_numpy_array(self.sampled[0]["box"][:ntest]),
+            to_numpy_array(self.sampled[0]["atype"][0]),
+        )[0]
+        energy_diff = to_numpy_array(self.sampled[0]["energy"][:ntest]) - energy
+        finetune_shift = (
+            energy_bias_after[idx_type_map] - energy_bias_before[idx_type_map]
+        )
+        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[
+            0
+        ].reshape(-1)
+
+        # check values
+        np.testing.assert_almost_equal(finetune_shift, ground_truth_shift, decimal=10)
+
+
+class TestEnergyModelSeA(unittest.TestCase, FinetuneTest):
+    def setUp(self):
+        self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.model_config = model_se_e2_a
+        self.data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
+        )
+
+
+@unittest.skip("change bias not implemented yet.")
+class TestEnergyZBLModelSeA(unittest.TestCase, FinetuneTest):
+    def setUp(self):
+        self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.model_config = model_zbl
+        self.data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
+        )
+
+
+class TestEnergyModelDPA2(unittest.TestCase, FinetuneTest):
+    def setUp(self):
+        self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.model_config = model_dpa2
+        self.data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.model_config["type_map"],
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_init_frz_model.py b/source/tests/pt/test_init_frz_model.py
new file mode 100644
index 0000000000..223b28515d
--- /dev/null
+++ b/source/tests/pt/test_init_frz_model.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import unittest
+from argparse import (
+    Namespace,
+)
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.pt.entrypoints.main import (
+    freeze,
+    get_trainer,
+)
+from deepmd.pt.infer.deep_eval import (
+    DeepPot,
+)
+
+
+class TestInitFrzModel(unittest.TestCase):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            config = json.load(f)
+        config["training"]["numb_steps"] = 1
+        config["training"]["save_freq"] = 1
+        config["learning_rate"]["start_lr"] = 1.0
+        config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+
+        self.models = []
+        for imodel in range(2):
+            if imodel == 1:
+                config["training"]["numb_steps"] = 0
+                trainer = get_trainer(deepcopy(config), init_frz_model=self.models[-1])
+            else:
+                trainer = get_trainer(deepcopy(config))
+            trainer.run()
+
+            frozen_model = f"frozen_model{imodel}.pth"
+            ns = Namespace(
+                model="model.pt",
+                output=frozen_model,
+                head=None,
+            )
+            freeze(ns)
+            self.models.append(frozen_model)
+
+    def test_dp_test(self):
+        dp1 = DeepPot(str(self.models[0]))
+        dp2 = DeepPot(str(self.models[1]))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret1 = dp1.eval(coord, cell, atype, atomic=True)
+        e1, f1, v1, ae1, av1 = ret1[0], ret1[1], ret1[2], ret1[3], ret1[4]
+        ret2 = dp2.eval(coord, cell, atype, atomic=True)
+        e2, f2, v2, ae2, av2 = ret2[0], ret2[1], ret2[2], ret2[3], ret2[4]
+        np.testing.assert_allclose(e1, e2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(f1, f2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(v1, v2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(ae1, ae2, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(av1, av2, rtol=1e-10, atol=1e-10)
diff --git a/source/tests/pt/test_loss.py b/source/tests/pt/test_loss.py
new file mode 100644
index 0000000000..17b05dadc6
--- /dev/null
+++ b/source/tests/pt/test_loss.py
@@ -0,0 +1,411 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import os
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+import torch
+
+tf.disable_eager_execution()
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+from deepmd.pt.loss import (
+    EnergySpinLoss,
+    EnergyStdLoss,
+)
+from deepmd.pt.utils.dataset import (
+    DeepmdDataSetForLoader,
+)
+from deepmd.tf.loss.ener import (
+    EnerSpinLoss,
+    EnerStdLoss,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+from .model.test_embedding_net import (
+    get_single_batch,
+)
+from .test_stat import (
+    energy_data_requirement,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+def get_batch(system, type_map, data_requirement):
+    dataset = DeepmdDataSetForLoader(system, type_map)
+    dataset.add_data_requirement(data_requirement)
+    np_batch, pt_batch = get_single_batch(dataset)
+    return np_batch, pt_batch
+
+
+class TestEnerStdLoss(unittest.TestCase):
+    def setUp(self):
+        self.system = str(Path(__file__).parent / "water/data/data_0")
+        self.type_map = ["H", "O"]
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_f = 1000.0
+        self.limit_pref_f = 1.0
+        self.start_pref_v = 0.02
+        self.limit_pref_v = 1.0
+        self.cur_lr = 1.2
+        # data
+        np_batch, pt_batch = get_batch(
+            self.system, self.type_map, energy_data_requirement
+        )
+        natoms = np_batch["natoms"]
+        self.nloc = natoms[0]
+        l_energy, l_force, l_virial = (
+            np_batch["energy"],
+            np_batch["force"],
+            np_batch["virial"],
+        )
+        p_energy, p_force, p_virial = (
+            np.ones_like(l_energy),
+            np.ones_like(l_force),
+            np.ones_like(l_virial),
+        )
+        nloc = natoms[0]
+        batch_size = pt_batch["coord"].shape[0]
+        atom_energy = np.zeros(shape=[batch_size, nloc])
+        atom_pref = np.zeros(shape=[batch_size, nloc * 3])
+        # tf
+        base = EnerStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+        )
+        self.g = tf.Graph()
+        with self.g.as_default():
+            t_cur_lr = tf.placeholder(shape=[], dtype=tf.float64)
+            t_natoms = tf.placeholder(shape=[None], dtype=tf.int32)
+            t_penergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_pforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_pvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_patom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lenergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_lforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_latom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_atom_pref = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            find_energy = tf.constant(1.0, dtype=tf.float64)
+            find_force = tf.constant(1.0, dtype=tf.float64)
+            find_virial = tf.constant(1.0, dtype=tf.float64)
+            find_atom_energy = tf.constant(0.0, dtype=tf.float64)
+            find_atom_pref = tf.constant(0.0, dtype=tf.float64)
+            model_dict = {
+                "energy": t_penergy,
+                "force": t_pforce,
+                "virial": t_pvirial,
+                "atom_ener": t_patom_energy,
+            }
+            label_dict = {
+                "energy": t_lenergy,
+                "force": t_lforce,
+                "virial": t_lvirial,
+                "atom_ener": t_latom_energy,
+                "atom_pref": t_atom_pref,
+                "find_energy": find_energy,
+                "find_force": find_force,
+                "find_virial": find_virial,
+                "find_atom_ener": find_atom_energy,
+                "find_atom_pref": find_atom_pref,
+            }
+            self.base_loss_sess = base.build(
+                t_cur_lr, t_natoms, model_dict, label_dict, ""
+            )
+        # torch
+        self.feed_dict = {
+            t_cur_lr: self.cur_lr,
+            t_natoms: natoms,
+            t_penergy: p_energy,
+            t_pforce: p_force,
+            t_pvirial: p_virial.reshape(-1, 9),
+            t_patom_energy: atom_energy,
+            t_lenergy: l_energy,
+            t_lforce: l_force,
+            t_lvirial: l_virial.reshape(-1, 9),
+            t_latom_energy: atom_energy,
+            t_atom_pref: atom_pref,
+        }
+        self.model_pred = {
+            "energy": torch.from_numpy(p_energy),
+            "force": torch.from_numpy(p_force),
+            "virial": torch.from_numpy(p_virial),
+        }
+        self.label = {
+            "energy": torch.from_numpy(l_energy),
+            "find_energy": 1.0,
+            "force": torch.from_numpy(l_force),
+            "find_force": 1.0,
+            "virial": torch.from_numpy(l_virial),
+            "find_virial": 1.0,
+        }
+        self.label_absent = {
+            "energy": torch.from_numpy(l_energy),
+            "force": torch.from_numpy(l_force),
+            "virial": torch.from_numpy(l_virial),
+        }
+        self.natoms = pt_batch["natoms"]
+
+    def tearDown(self) -> None:
+        tf.reset_default_graph()
+        return super().tearDown()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            base_loss, base_more_loss = sess.run(
+                self.base_loss_sess, feed_dict=self.feed_dict
+            )
+        mine = EnergyStdLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_f,
+            self.limit_pref_f,
+            self.start_pref_v,
+            self.limit_pref_v,
+        )
+
+        def fake_model():
+            return self.model_pred
+
+        _, my_loss, my_more_loss = mine(
+            {},
+            fake_model,
+            self.label,
+            self.nloc,
+            self.cur_lr,
+        )
+        _, my_loss_absent, my_more_loss_absent = mine(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc,
+            self.cur_lr,
+        )
+        my_loss = my_loss.detach().cpu()
+        my_loss_absent = my_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(base_loss, my_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, my_loss_absent.numpy()))
+        for key in ["ener", "force", "virial"]:
+            self.assertTrue(
+                np.allclose(
+                    base_more_loss["l2_%s_loss" % key], my_more_loss["l2_%s_loss" % key]
+                )
+            )
+            self.assertTrue(np.isnan(my_more_loss_absent["l2_%s_loss" % key]))
+
+
+class TestEnerSpinLoss(unittest.TestCase):
+    def setUp(self):
+        self.system = str(Path(__file__).parent / "NiO/data/data_0")
+        self.type_map = ["Ni", "O"]
+        self.start_lr = 1.1
+        self.start_pref_e = 0.02
+        self.limit_pref_e = 1.0
+        self.start_pref_fr = 1000.0
+        self.limit_pref_fr = 1.0
+        self.start_pref_fm = 1000.0
+        self.limit_pref_fm = 1.0
+        self.cur_lr = 1.2
+        self.use_spin = [1, 0]
+        # data
+        spin_data_requirement = deepcopy(energy_data_requirement)
+        spin_data_requirement.append(
+            DataRequirementItem(
+                "force_mag",
+                ndof=3,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            )
+        )
+        np_batch, pt_batch = get_batch(
+            self.system, self.type_map, spin_data_requirement
+        )
+        natoms = np_batch["natoms"]
+        self.nloc = natoms[0]
+        nframes = np_batch["energy"].shape[0]
+        l_energy, l_force_real, l_force_mag, l_virial = (
+            np_batch["energy"],
+            np_batch["force"],
+            np_batch["force_mag"],
+            np_batch["virial"],
+        )
+        # merged force for tf old implement
+        l_force_merge_tf = np.concatenate(
+            [
+                l_force_real.reshape(nframes, self.nloc, 3),
+                l_force_mag.reshape(nframes, self.nloc, 3)[
+                    np_batch["atype"] == 0
+                ].reshape(nframes, -1, 3),
+            ],
+            axis=1,
+        ).reshape(nframes, -1)
+        p_energy, p_force_real, p_force_mag, p_force_merge_tf, p_virial = (
+            np.ones_like(l_energy),
+            np.ones_like(l_force_real),
+            np.ones_like(l_force_mag),
+            np.ones_like(l_force_merge_tf),
+            np.ones_like(l_virial),
+        )
+        virt_nloc = (np_batch["atype"] == 0).sum(-1)
+        natoms_tf = np.concatenate([natoms, virt_nloc], axis=0)
+        natoms_tf[:2] += virt_nloc
+        nloc = natoms_tf[0]
+        batch_size = pt_batch["coord"].shape[0]
+        atom_energy = np.zeros(shape=[batch_size, nloc])
+        atom_pref = np.zeros(shape=[batch_size, nloc * 3])
+        self.nloc_tf = nloc
+        # tf
+        base = EnerSpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+            use_spin=self.use_spin,
+        )
+        self.g = tf.Graph()
+        with self.g.as_default():
+            t_cur_lr = tf.placeholder(shape=[], dtype=tf.float64)
+            t_natoms = tf.placeholder(shape=[None], dtype=tf.int32)
+            t_penergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_pforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_pvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_patom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lenergy = tf.placeholder(shape=[None, 1], dtype=tf.float64)
+            t_lforce = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_lvirial = tf.placeholder(shape=[None, 9], dtype=tf.float64)
+            t_latom_energy = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            t_atom_pref = tf.placeholder(shape=[None, None], dtype=tf.float64)
+            find_energy = tf.constant(1.0, dtype=tf.float64)
+            find_force = tf.constant(1.0, dtype=tf.float64)
+            find_virial = tf.constant(0.0, dtype=tf.float64)
+            find_atom_energy = tf.constant(0.0, dtype=tf.float64)
+            find_atom_pref = tf.constant(0.0, dtype=tf.float64)
+            model_dict = {
+                "energy": t_penergy,
+                "force": t_pforce,
+                "virial": t_pvirial,
+                "atom_ener": t_patom_energy,
+            }
+            label_dict = {
+                "energy": t_lenergy,
+                "force": t_lforce,
+                "virial": t_lvirial,
+                "atom_ener": t_latom_energy,
+                "atom_pref": t_atom_pref,
+                "find_energy": find_energy,
+                "find_force": find_force,
+                "find_virial": find_virial,
+                "find_atom_ener": find_atom_energy,
+                "find_atom_pref": find_atom_pref,
+            }
+            self.base_loss_sess = base.build(
+                t_cur_lr, t_natoms, model_dict, label_dict, ""
+            )
+        # torch
+        self.feed_dict = {
+            t_cur_lr: self.cur_lr,
+            t_natoms: natoms_tf,
+            t_penergy: p_energy,
+            t_pforce: p_force_merge_tf,
+            t_pvirial: p_virial.reshape(-1, 9),
+            t_patom_energy: atom_energy,
+            t_lenergy: l_energy,
+            t_lforce: l_force_merge_tf,
+            t_lvirial: l_virial.reshape(-1, 9),
+            t_latom_energy: atom_energy,
+            t_atom_pref: atom_pref,
+        }
+        self.model_pred = {
+            "energy": torch.from_numpy(p_energy),
+            "force": torch.from_numpy(p_force_real).reshape(nframes, self.nloc, 3),
+            "force_mag": torch.from_numpy(p_force_mag).reshape(nframes, self.nloc, 3),
+            "mask_mag": torch.from_numpy(np_batch["atype"] == 0).reshape(
+                nframes, self.nloc, 1
+            ),
+        }
+        self.label = {
+            "energy": torch.from_numpy(l_energy),
+            "find_energy": 1.0,
+            "force": torch.from_numpy(l_force_real).reshape(nframes, self.nloc, 3),
+            "find_force": 1.0,
+            "force_mag": torch.from_numpy(l_force_mag).reshape(nframes, self.nloc, 3),
+            "find_force_mag": 1.0,
+        }
+        self.label_absent = {
+            "energy": torch.from_numpy(l_energy),
+            "force": torch.from_numpy(l_force_real).reshape(nframes, self.nloc, 3),
+            "force_mag": torch.from_numpy(l_force_mag).reshape(nframes, self.nloc, 3),
+        }
+        self.natoms = pt_batch["natoms"]
+
+    def tearDown(self) -> None:
+        tf.reset_default_graph()
+        return super().tearDown()
+
+    def test_consistency(self):
+        with tf.Session(graph=self.g) as sess:
+            base_loss, base_more_loss = sess.run(
+                self.base_loss_sess, feed_dict=self.feed_dict
+            )
+        mine = EnergySpinLoss(
+            self.start_lr,
+            self.start_pref_e,
+            self.limit_pref_e,
+            self.start_pref_fr,
+            self.limit_pref_fr,
+            self.start_pref_fm,
+            self.limit_pref_fm,
+        )
+
+        def fake_model():
+            return self.model_pred
+
+        _, my_loss, my_more_loss = mine(
+            {},
+            fake_model,
+            self.label,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        _, my_loss_absent, my_more_loss_absent = mine(
+            {},
+            fake_model,
+            self.label_absent,
+            self.nloc_tf,  # use tf natoms pref
+            self.cur_lr,
+        )
+        my_loss = my_loss.detach().cpu()
+        my_loss_absent = my_loss_absent.detach().cpu()
+        self.assertTrue(np.allclose(base_loss, my_loss.numpy()))
+        self.assertTrue(np.allclose(0.0, my_loss_absent.numpy()))
+        for key in ["ener", "force_r", "force_m"]:
+            self.assertTrue(
+                np.allclose(
+                    base_more_loss["l2_%s_loss" % key], my_more_loss["l2_%s_loss" % key]
+                )
+            )
+            self.assertTrue(np.isnan(my_more_loss_absent["l2_%s_loss" % key]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_lr.py b/source/tests/pt/test_lr.py
new file mode 100644
index 0000000000..9fbde599bb
--- /dev/null
+++ b/source/tests/pt/test_lr.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+tf.disable_eager_execution()
+
+from deepmd.pt.utils.learning_rate import (
+    LearningRateExp,
+)
+from deepmd.tf.utils import (
+    learning_rate,
+)
+
+
+class TestLearningRate(unittest.TestCase):
+    def setUp(self):
+        self.start_lr = 0.001
+        self.stop_lr = 3.51e-8
+        self.decay_steps = np.arange(400, 601, 100)
+        self.stop_steps = np.arange(500, 1600, 500)
+
+    def test_consistency(self):
+        for decay_step in self.decay_steps:
+            for stop_step in self.stop_steps:
+                self.decay_step = decay_step
+                self.stop_step = stop_step
+                self.judge_it()
+                self.decay_rate_pt()
+
+    def judge_it(self):
+        base_lr = learning_rate.LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step
+        )
+        g = tf.Graph()
+        with g.as_default():
+            global_step = tf.placeholder(shape=[], dtype=tf.int32)
+            t_lr = base_lr.build(global_step, self.stop_step)
+
+        my_lr = LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step, self.stop_step
+        )
+        with tf.Session(graph=g) as sess:
+            base_vals = [
+                sess.run(t_lr, feed_dict={global_step: step_id})
+                for step_id in range(self.stop_step)
+                if step_id % self.decay_step != 0
+            ]
+        my_vals = [
+            my_lr.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        self.assertTrue(np.allclose(base_vals, my_vals))
+        tf.reset_default_graph()
+
+    def decay_rate_pt(self):
+        my_lr = LearningRateExp(
+            self.start_lr, self.stop_lr, self.decay_step, self.stop_step
+        )
+
+        default_ds = 100 if self.stop_step // 10 > 100 else self.stop_step // 100 + 1
+        if self.decay_step >= self.stop_step:
+            self.decay_step = default_ds
+        decay_rate = np.exp(
+            np.log(self.stop_lr / self.start_lr) / (self.stop_step / self.decay_step)
+        )
+        my_lr_decay = LearningRateExp(
+            self.start_lr,
+            1e-10,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        min_lr = 1e-5
+        my_lr_decay_trunc = LearningRateExp(
+            self.start_lr,
+            min_lr,
+            self.decay_step,
+            self.stop_step,
+            decay_rate=decay_rate,
+        )
+        my_vals = [
+            my_lr.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay = [
+            my_lr_decay.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        my_vals_decay_trunc = [
+            my_lr_decay_trunc.value(step_id)
+            for step_id in range(self.stop_step)
+            if step_id % self.decay_step != 0
+        ]
+        self.assertTrue(np.allclose(my_vals_decay, my_vals))
+        self.assertTrue(
+            np.allclose(my_vals_decay_trunc, np.clip(my_vals, a_min=min_lr, a_max=None))
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
new file mode 100644
index 0000000000..8bdb42df52
--- /dev/null
+++ b/source/tests/pt/test_multitask.py
@@ -0,0 +1,279 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+from deepmd.pt.utils.multi_task import (
+    preprocess_shared_params,
+)
+
+from .model.test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+)
+
+multitask_template_json = str(Path(__file__).parent / "water/multitask.json")
+with open(multitask_template_json) as f:
+    multitask_template = json.load(f)
+
+
+class MultiTaskTrainTest:
+    def test_multitask_train(self):
+        # test multitask training
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
+        trainer.run()
+        # check model keys
+        self.assertEqual(len(trainer.wrapper.model), 2)
+        self.assertIn("model_1", trainer.wrapper.model)
+        self.assertIn("model_2", trainer.wrapper.model)
+
+        # check shared parameters
+        multi_state_dict = trainer.wrapper.model.state_dict()
+        for state_key in multi_state_dict:
+            if "model_1" in state_key:
+                self.assertIn(state_key.replace("model_1", "model_2"), multi_state_dict)
+            if "model_2" in state_key:
+                self.assertIn(state_key.replace("model_2", "model_1"), multi_state_dict)
+            if "model_1.descriptor" in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key],
+                    multi_state_dict[state_key.replace("model_1", "model_2")],
+                )
+
+        # test multitask fine-tuning
+        # add model_3
+        self.origin_config["model"]["model_dict"]["model_3"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_3"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_3"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_3"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_3"]["stat_file"].replace("model_2", "model_3")
+        )
+
+        # add model_4
+        self.origin_config["model"]["model_dict"]["model_4"] = deepcopy(
+            self.origin_config["model"]["model_dict"]["model_2"]
+        )
+        self.origin_config["loss_dict"]["model_4"] = deepcopy(
+            self.origin_config["loss_dict"]["model_2"]
+        )
+        self.origin_config["training"]["model_prob"]["model_4"] = deepcopy(
+            self.origin_config["training"]["model_prob"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"] = deepcopy(
+            self.origin_config["training"]["data_dict"]["model_2"]
+        )
+        self.origin_config["training"]["data_dict"]["model_4"]["stat_file"] = (
+            self.origin_config[
+                "training"
+            ]["data_dict"]["model_4"]["stat_file"].replace("model_2", "model_4")
+        )
+
+        # set finetune rules
+        # model_1 resuming from model_1
+        # pass
+
+        # model_2 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_2"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_3 fine-tuning from model_2
+        self.origin_config["model"]["model_dict"]["model_3"]["finetune_head"] = (
+            "model_2"
+        )
+
+        # new model_4 fine-tuning with randomly initialized fitting net
+        # pass
+
+        self.origin_config["model"], shared_links_finetune = preprocess_shared_params(
+            self.origin_config["model"]
+        )
+
+        trainer_finetune = get_trainer(
+            deepcopy(self.origin_config),
+            finetune_model=self.config["training"].get("save_ckpt", "model.ckpt")
+            + ".pt",
+            shared_links=shared_links_finetune,
+        )
+
+        # check parameters
+        multi_state_dict_finetuned = trainer_finetune.wrapper.model.state_dict()
+        for state_key in multi_state_dict_finetuned:
+            if "model_1" in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_2" in state_key and "bias_atom_e" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_3" in state_key and "bias_atom_e" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key.replace("model_3", "model_2")],
+                    multi_state_dict_finetuned[state_key],
+                )
+            elif "model_4" in state_key and "fitting_net" not in state_key:
+                torch.testing.assert_close(
+                    multi_state_dict[state_key.replace("model_4", "model_2")],
+                    multi_state_dict_finetuned[state_key],
+                )
+
+        # check running
+        trainer_finetune.run()
+        self.tearDown()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in [self.stat_files]:
+                shutil.rmtree(f)
+
+
+class TestMultiTaskSeA(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_se_e2_a = deepcopy(multitask_template)
+        multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "se_e2_a"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_se_e2_a
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA1(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA1 = deepcopy(multitask_template)
+        multitask_DPA1["model"]["shared_dict"]["my_descriptor"] = model_dpa1[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA1"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA1
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA2(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA2 = deepcopy(multitask_template)
+        multitask_DPA2["model"]["shared_dict"]["my_descriptor"] = model_dpa2[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA2"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA2
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_neighbor_stat.py b/source/tests/pt/test_neighbor_stat.py
new file mode 100644
index 0000000000..4cbb46f66b
--- /dev/null
+++ b/source/tests/pt/test_neighbor_stat.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import shutil
+import unittest
+
+import dpdata
+import numpy as np
+
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
+
+
+def gen_sys(nframes):
+    natoms = 1000
+    data = {}
+    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
+    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
+    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
+    data["forces"] = np.random.default_rng().random([nframes, natoms, 3])
+    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
+        1, 3, 3
+    )
+    data["energies"] = np.random.default_rng().random([nframes, 1])
+    data["atom_names"] = ["TYPE"]
+    data["atom_numbs"] = [27]
+    data["atom_types"] = np.repeat(0, 27)
+    return data
+
+
+class TestNeighborStat(unittest.TestCase):
+    def setUp(self):
+        data0 = gen_sys(1)
+        sys0 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys0.to_deepmd_npy("system_0", set_size=1)
+
+    def tearDown(self):
+        shutil.rmtree("system_0")
+
+    def test_neighbor_stat(self):
+        for rcut in (0.0, 1.0, 2.0, 4.0):
+            for mixed_type in (True, False):
+                with self.subTest(rcut=rcut, mixed_type=mixed_type):
+                    rcut += 1e-3  # prevent numerical errors
+                    min_nbor_dist, max_nbor_size = neighbor_stat(
+                        system="system_0",
+                        rcut=rcut,
+                        type_map=["TYPE", "NO_THIS_TYPE"],
+                        mixed_type=mixed_type,
+                        backend="pytorch",
+                    )
+                    upper = np.ceil(rcut) + 1
+                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
+                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
+                    # distance to (0,0,0)
+                    distance = np.linalg.norm(positions, axis=1)
+                    expected_neighbors = np.count_nonzero(
+                        np.logical_and(distance > 0, distance <= rcut)
+                    )
+                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
+                    ret = [expected_neighbors]
+                    if not mixed_type:
+                        ret.append(0)
+                    np.testing.assert_array_equal(max_nbor_size, ret)
diff --git a/source/tests/pt/test_sampler.py b/source/tests/pt/test_sampler.py
new file mode 100644
index 0000000000..4f1091c936
--- /dev/null
+++ b/source/tests/pt/test_sampler.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import torch
+from torch.utils.data import (
+    DataLoader,
+)
+
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
+    get_weighted_sampler,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.utils import random as tf_random
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestSampler(unittest.TestCase):
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.batch_size = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        self.my_dataset = DpLoaderSet(
+            self.systems,
+            self.batch_size,
+            model_config["type_map"],
+            seed=10,
+            shuffle=False,
+        )
+
+        tf_random.seed(10)
+        self.dp_dataset = DeepmdDataSystem(self.systems, self.batch_size, 1, self.rcut)
+
+    def test_sampler_debug_info(self):
+        dataloader = DataLoader(
+            self.my_dataset,
+            sampler=get_weighted_sampler(self.my_dataset, prob_style="prob_sys_size"),
+            batch_size=None,
+            num_workers=0,  # setting to 0 diverges the behavior of its iterator; should be >=1
+            drop_last=False,
+            pin_memory=True,
+        )
+        with torch.device("cpu"):
+            batch_data = next(iter(dataloader))
+        sid = batch_data["sid"]
+        fid = batch_data["fid"][0]
+        coord = batch_data["coord"].squeeze(0)
+        frame = self.my_dataset.systems[sid].__getitem__(fid)
+        self.assertTrue(np.allclose(coord, frame["coord"]))
+
+    def test_auto_prob_uniform(self):
+        auto_prob_style = "prob_uniform"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_auto_prob_sys_size(self):
+        auto_prob_style = "prob_sys_size"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_auto_prob_sys_size_ext(self):
+        auto_prob_style = "prob_sys_size;0:1:0.2;1:3:0.8"
+        sampler = get_weighted_sampler(self.my_dataset, prob_style=auto_prob_style)
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(auto_prob_style=auto_prob_style)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+    def test_sys_probs(self):
+        sys_probs = [0.1, 0.4, 0.5]
+        sampler = get_weighted_sampler(
+            self.my_dataset, prob_style=sys_probs, sys_prob=True
+        )
+        my_probs = np.array(sampler.weights)
+        self.dp_dataset.set_sys_probs(sys_probs=sys_probs)
+        dp_probs = np.array(self.dp_dataset.sys_probs)
+        self.assertTrue(np.allclose(my_probs, dp_probs))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_stat.py b/source/tests/pt/test_stat.py
new file mode 100644
index 0000000000..2362821dfa
--- /dev/null
+++ b/source/tests/pt/test_stat.py
@@ -0,0 +1,429 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import tempfile
+import unittest
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from pathlib import (
+    Path,
+)
+
+import dpdata
+import h5py
+import numpy as np
+import torch
+
+from deepmd.pt.model.descriptor import (
+    DescrptSeA,
+)
+from deepmd.pt.model.descriptor.dpa1 import (
+    DescrptDPA1,
+)
+from deepmd.pt.model.task.ener import (
+    EnergyFittingNet,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.dataloader import (
+    DpLoaderSet,
+)
+from deepmd.pt.utils.stat import (
+    compute_output_stats,
+)
+from deepmd.pt.utils.stat import make_stat_input
+from deepmd.pt.utils.stat import make_stat_input as my_make
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.tf.common import (
+    expand_sys_str,
+)
+from deepmd.tf.descriptor.se_a import DescrptSeA as DescrptSeA_tf
+from deepmd.tf.descriptor.se_atten import DescrptSeAtten as DescrptSeAtten_tf
+from deepmd.tf.fit.ener import (
+    EnerFitting,
+)
+from deepmd.tf.model.model_stat import make_stat_input as dp_make
+from deepmd.tf.model.model_stat import merge_sys_stat as dp_merge
+from deepmd.tf.utils import random as tf_random
+from deepmd.tf.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+energy_data_requirement = [
+    DataRequirementItem(
+        "energy",
+        ndof=1,
+        atomic=False,
+        must=False,
+        high_prec=True,
+    ),
+    DataRequirementItem(
+        "force",
+        ndof=3,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "virial",
+        ndof=9,
+        atomic=False,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "atom_ener",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "atom_pref",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+        repeat=3,
+    ),
+]
+
+
+def compare(ut, base, given):
+    if isinstance(base, list):
+        ut.assertEqual(len(base), len(given))
+        for idx in range(len(base)):
+            compare(ut, base[idx], given[idx])
+    elif isinstance(base, np.ndarray):
+        ut.assertTrue(np.allclose(base.reshape(-1), given.reshape(-1)))
+    else:
+        ut.assertEqual(base, given)
+
+
+class DatasetTest(ABC):
+    @abstractmethod
+    def setup_data(self):
+        pass
+
+    @abstractmethod
+    def setup_tf(self):
+        pass
+
+    @abstractmethod
+    def setup_pt(self):
+        pass
+
+    @abstractmethod
+    def tf_compute_input_stats(self):
+        pass
+
+    def setUp(self):
+        with open(str(Path(__file__).parent / "water/se_e2_a.json")) as fin:
+            content = fin.read()
+        config = json.loads(content)
+        data_file = [self.setup_data()]
+
+        config["training"]["training_data"]["systems"] = data_file
+        config["training"]["validation_data"]["systems"] = data_file
+        model_config = config["model"]
+        self.rcut = model_config["descriptor"]["rcut"]
+        self.rcut_smth = model_config["descriptor"]["rcut_smth"]
+        self.sel = model_config["descriptor"]["sel"]
+        self.batch_size = config["training"]["training_data"]["batch_size"]
+        self.systems = config["training"]["validation_data"]["systems"]
+        if isinstance(self.systems, str):
+            self.systems = expand_sys_str(self.systems)
+        self.my_dataset = DpLoaderSet(
+            self.systems,
+            self.batch_size,
+            model_config["type_map"],
+            seed=10,
+        )
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.data_stat_nbatch = 2
+        self.filter_neuron = model_config["descriptor"]["neuron"]
+        self.axis_neuron = model_config["descriptor"]["axis_neuron"]
+        self.n_neuron = model_config["fitting_net"]["neuron"]
+        self.my_dataset.add_data_requirement(energy_data_requirement)
+
+        self.my_sampled = my_make(
+            self.my_dataset.systems, self.my_dataset.dataloaders, self.data_stat_nbatch
+        )
+
+        tf_random.seed(10)
+        dp_dataset = DeepmdDataSystem(self.systems, self.batch_size, 1, self.rcut)
+        dp_dataset.add("energy", 1, atomic=False, must=False, high_prec=True)
+        dp_dataset.add("force", 3, atomic=True, must=False, high_prec=False)
+        self.dp_sampled = dp_make(dp_dataset, self.data_stat_nbatch, False)
+        self.dp_merged = dp_merge(self.dp_sampled)
+        self.dp_mesh = self.dp_merged.pop("default_mesh")
+        self.dp_d = self.setup_tf()
+
+    def test_stat_output(self):
+        def my_merge(energy, natoms):
+            energy_lst = []
+            natoms_lst = []
+            for i in range(len(energy)):
+                for j in range(len(energy[i])):
+                    energy_lst.append(torch.tensor(energy[i][j], device="cpu"))
+                    natoms_lst.append(
+                        torch.tensor(natoms[i][j], device="cpu")
+                        .unsqueeze(0)
+                        .expand(energy[i][j].shape[0], -1)
+                    )
+            energy_merge = torch.cat(energy_lst)
+            natoms_merge = torch.cat(natoms_lst)
+            return energy_merge, natoms_merge
+
+        energy = self.dp_sampled["energy"]
+        natoms = self.dp_sampled["natoms_vec"]
+        energy, natoms = my_merge(energy, natoms)
+        dp_fn = EnerFitting(
+            self.dp_d.get_ntypes(), self.dp_d.get_dim_out(), self.n_neuron
+        )
+        dp_fn.compute_output_stats(self.dp_sampled, mixed_type=self.mixed_type)
+        pt_fn = EnergyFittingNet(
+            self.dp_d.get_ntypes(), self.dp_d.get_dim_out(), self.n_neuron
+        )
+        pt_fn.compute_output_stats(self.my_sampled)
+        np.testing.assert_allclose(
+            dp_fn.bias_atom_e, pt_fn.bias_atom_e.detach().cpu().numpy().ravel()
+        )
+
+    # temporarily delete this function for performance of seeds in tf and pytorch may be different
+    """
+    def test_stat_input(self):
+        my_sampled = self.my_sampled
+        # list of dicts, each dict contains samples from a system
+        dp_keys = set(self.dp_merged.keys()) # dict of list of batches
+        self.dp_merged['natoms'] = self.dp_merged['natoms_vec']
+        for key in dp_keys:
+            if not key in my_sampled[0] or key in 'coord':
+                # coord is pre-normalized
+                continue
+            lst = []
+            for item in my_sampled:
+                bsz = item['energy'].shape[0]//self.data_stat_nbatch
+                for j in range(self.data_stat_nbatch):
+                    lst.append(item[key][j*bsz:(j+1)*bsz].cpu().numpy())
+                compare(self, self.dp_merged[key], lst)
+    """
+
+    def test_descriptor(self):
+        self.tf_compute_input_stats()
+
+        my_en = self.setup_pt()
+        sampled = self.my_sampled
+        for sys in sampled:
+            for key in [
+                "coord",
+                "atype",
+                "natoms",
+                "box",
+            ]:
+                if key in sys.keys():
+                    sys[key] = sys[key].to(env.DEVICE)
+        stat_dict = my_en.compute_input_stats(sampled)
+        my_en.mean = my_en.mean
+        my_en.stddev = my_en.stddev
+        np.testing.assert_allclose(
+            self.dp_d.davg.reshape([-1]),
+            my_en.mean.cpu().reshape([-1]),
+            rtol=1e-14,
+            atol=1e-14,
+        )
+        np.testing.assert_allclose(
+            self.dp_d.dstd.reshape([-1]),
+            my_en.stddev.cpu().reshape([-1]),
+            rtol=1e-14,
+            atol=1e-14,
+        )
+
+
+class TestDatasetNoMixed(DatasetTest, unittest.TestCase):
+    def setup_data(self):
+        original_data = str(Path(__file__).parent / "water/data/data_0")
+        picked_data = str(Path(__file__).parent / "picked_data_for_test_stat")
+        dpdata.LabeledSystem(original_data, fmt="deepmd/npy")[:2].to_deepmd_npy(
+            picked_data
+        )
+        self.mixed_type = False
+        return picked_data
+
+    def setup_tf(self):
+        return DescrptSeA_tf(
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+        )
+
+    def setup_pt(self):
+        return DescrptSeA(
+            self.rcut, self.rcut_smth, self.sel, self.filter_neuron, self.axis_neuron
+        ).sea  # get the block who has stat as private vars
+
+    def tf_compute_input_stats(self):
+        coord = self.dp_merged["coord"]
+        atype = self.dp_merged["type"]
+        natoms = self.dp_merged["natoms_vec"]
+        box = self.dp_merged["box"]
+        self.dp_d.compute_input_stats(coord, box, atype, natoms, self.dp_mesh, {})
+
+
+class TestDatasetMixed(DatasetTest, unittest.TestCase):
+    def setup_data(self):
+        original_data = str(Path(__file__).parent / "water/data/data_0")
+        picked_data = str(Path(__file__).parent / "picked_data_for_test_stat")
+        dpdata.LabeledSystem(original_data, fmt="deepmd/npy")[:2].to_deepmd_npy_mixed(
+            picked_data
+        )
+        self.mixed_type = True
+        return picked_data
+
+    def setup_tf(self):
+        return DescrptSeAtten_tf(
+            ntypes=2,
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=sum(self.sel),
+            neuron=self.filter_neuron,
+            axis_neuron=self.axis_neuron,
+            set_davg_zero=False,
+        )
+
+    def setup_pt(self):
+        return DescrptDPA1(
+            self.rcut,
+            self.rcut_smth,
+            sum(self.sel),
+            2,
+            self.filter_neuron,
+            self.axis_neuron,
+            set_davg_zero=False,
+        ).se_atten
+
+    def tf_compute_input_stats(self):
+        coord = self.dp_merged["coord"]
+        atype = self.dp_merged["type"]
+        natoms = self.dp_merged["natoms_vec"]
+        box = self.dp_merged["box"]
+        real_natoms_vec = self.dp_merged["real_natoms_vec"]
+
+        self.dp_d.compute_input_stats(
+            coord,
+            box,
+            atype,
+            natoms,
+            self.dp_mesh,
+            {},
+            mixed_type=True,
+            real_natoms_vec=real_natoms_vec,
+        )
+
+
+class TestOutputStat(unittest.TestCase):
+    def setUp(self):
+        self.data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.type_map = ["O", "H"]  # by dataset
+        self.data = DpLoaderSet(
+            self.data_file,
+            batch_size=1,
+            type_map=self.type_map,
+        )
+        self.data.add_data_requirement(energy_data_requirement)
+        self.sampled = make_stat_input(
+            self.data.systems,
+            self.data.dataloaders,
+            nbatches=1,
+        )
+        self.tempdir = tempfile.TemporaryDirectory()
+        h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve())
+        with h5py.File(h5file, "w") as f:
+            pass
+        self.stat_file_path = DPPath(h5file, "a")
+
+    def tearDown(self):
+        self.tempdir.cleanup()
+
+    def test_calc_and_load(self):
+        stat_file_path = self.stat_file_path
+        type_map = self.type_map
+
+        # compute from sample
+        ret0 = compute_output_stats(
+            self.sampled,
+            len(type_map),
+            keys=["energy"],
+            stat_file_path=stat_file_path,
+            atom_ener=None,
+            model_forward=None,
+        )
+        # ground truth
+        ntest = 1
+        atom_nums = np.tile(
+            np.bincount(to_numpy_array(self.sampled[0]["atype"][0])),
+            (ntest, 1),
+        )
+        energy_diff = to_numpy_array(self.sampled[0]["energy"][:ntest])
+        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[0]
+
+        # check values
+        np.testing.assert_almost_equal(
+            to_numpy_array(ret0["energy"]), ground_truth_shift, decimal=10
+        )
+        # self.assertTrue(stat_file_path.is_dir())
+
+        def raise_error():
+            raise RuntimeError
+
+        # hack!!!
+        # suppose to load stat from file, if from sample, an error will raise.
+        ret1 = compute_output_stats(
+            raise_error,
+            len(type_map),
+            keys=["energy"],
+            stat_file_path=stat_file_path,
+            atom_ener=None,
+            model_forward=None,
+        )
+        np.testing.assert_almost_equal(
+            to_numpy_array(ret0["energy"]), to_numpy_array(ret1["energy"]), decimal=10
+        )
+
+    def test_assigned(self):
+        atom_ener = np.array([3.0, 5.0]).reshape(2, 1)
+        stat_file_path = self.stat_file_path
+        type_map = self.type_map
+
+        # from assigned atom_ener
+        ret2 = compute_output_stats(
+            self.sampled,
+            len(type_map),
+            keys=["energy"],
+            stat_file_path=stat_file_path,
+            atom_ener=atom_ener,
+            model_forward=None,
+        )
+        np.testing.assert_almost_equal(
+            to_numpy_array(ret2["energy"]), atom_ener, decimal=10
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
new file mode 100644
index 0000000000..1635ad56ea
--- /dev/null
+++ b/source/tests/pt/test_training.py
@@ -0,0 +1,394 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+
+from .model.test_permutation import (
+    model_dos,
+    model_dpa1,
+    model_dpa2,
+    model_hybrid,
+    model_se_e2_a,
+    model_zbl,
+)
+
+
+class DPTrainTest:
+    def test_dp_train(self):
+        # test training from scratch
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        # test fine-tuning
+        trainer_finetune = get_trainer(
+            deepcopy(self.config),
+            finetune_model=self.config["training"].get("save_ckpt", "model.ckpt")
+            + ".pt",
+        )
+        trainer_finetune.run()
+        self.tearDown()
+
+    def test_trainable(self):
+        fix_params = deepcopy(self.config)
+        fix_params["model"]["descriptor"]["trainable"] = False
+        fix_params["model"]["fitting_net"]["trainable"] = False
+        free_descriptor = hasattr(self, "not_all_grad") and self.not_all_grad
+        if free_descriptor:
+            # can not set requires_grad false for all parameters,
+            # because the input coord has no grad, thus the loss if all set to false
+            # we only check trainable for fitting net
+            fix_params["model"]["descriptor"]["trainable"] = True
+            trainer_fix = get_trainer(fix_params)
+            model_dict_before_training = deepcopy(
+                trainer_fix.model.get_fitting_net().state_dict()
+            )
+            trainer_fix.run()
+            model_dict_after_training = deepcopy(
+                trainer_fix.model.get_fitting_net().state_dict()
+            )
+        else:
+            trainer_fix = get_trainer(fix_params)
+            model_dict_before_training = deepcopy(trainer_fix.model.state_dict())
+            trainer_fix.run()
+            model_dict_after_training = deepcopy(trainer_fix.model.state_dict())
+        for key in model_dict_before_training:
+            torch.testing.assert_close(
+                model_dict_before_training[key], model_dict_after_training[key]
+            )
+
+        self.tearDown()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in ["stat_files"]:
+                shutil.rmtree(f)
+
+
+class TestEnergyModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDOSModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "dos/input.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "dos/data/atomic_system")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dos)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/zbl.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_zbl)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestFparam(unittest.TestCase, DPTrainTest):
+    """Test if `fparam` can be loaded correctly."""
+
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["fitting_net"]["numb_fparam"] = 1
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
+        shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+
+    def tearDown(self) -> None:
+        (self.set_path / "fparam.npy").unlink(missing_ok=True)
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+@unittest.skip("hybrid not supported at the moment")
+class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_hybrid)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDipoleModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/dipole/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/dipole/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "dipole"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelSeA(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelDPA1(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa1)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+class TestPolarModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self):
+        input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file_atomic = str(
+            Path(__file__).parent / "water_tensor/polar/atomic_system"
+        )
+        data_file_global = str(
+            Path(__file__).parent / "water_tensor/polar/global_system"
+        )
+        self.config["training"]["training_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            data_file_atomic,
+            data_file_global,
+        ]
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["model"]["atom_exclude_types"] = [1]
+        self.config["model"]["fitting_net"]["type"] = "polar"
+        self.config["model"]["fitting_net"]["fit_diag"] = False
+        self.config["model"]["fitting_net"]["shift_diag"] = False
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        # can not set requires_grad false for all parameters,
+        # because the input coord has no grad, thus the loss if all set to false
+        self.not_all_grad = True
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_utils.py b/source/tests/pt/test_utils.py
new file mode 100644
index 0000000000..145fe6c510
--- /dev/null
+++ b/source/tests/pt/test_utils.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+
+class TestCvt(unittest.TestCase):
+    def test_to_numpy(self):
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ptp, npp in zip(
+            [torch.float16, torch.float32, torch.float64],
+            [np.float16, np.float32, np.float64],
+        ):
+            foo = foo.astype(npp)
+            bar = to_torch_tensor(foo)
+            self.assertEqual(bar.dtype, ptp)
+            onk = to_numpy_array(bar)
+            self.assertEqual(onk.dtype, npp)
+        with self.assertRaises(ValueError) as ee:
+            foo = foo.astype(np.int8)
+            bar = to_torch_tensor(foo)
+        with self.assertRaises(ValueError) as ee:
+            bar = to_torch_tensor(foo)
+            bar = to_numpy_array(bar.int())
diff --git a/source/tests/pt/water b/source/tests/pt/water
new file mode 120000
index 0000000000..7e5219651f
--- /dev/null
+++ b/source/tests/pt/water
@@ -0,0 +1 @@
+model/water
\ No newline at end of file
diff --git a/source/tests/pt/water_tensor/dipole/atomic_system/nopbc b/source/tests/pt/water_tensor/dipole/atomic_system/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pt/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy b/source/tests/pt/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy
new file mode 100644
index 0000000000..2cabc71e21
Binary files /dev/null and b/source/tests/pt/water_tensor/dipole/atomic_system/set.000/atomic_dipole.npy differ
diff --git a/examples/water_tensor/polar/training_data/atomic_system/set.000/box.npy b/source/tests/pt/water_tensor/dipole/atomic_system/set.000/box.npy
similarity index 100%
rename from examples/water_tensor/polar/training_data/atomic_system/set.000/box.npy
rename to source/tests/pt/water_tensor/dipole/atomic_system/set.000/box.npy
diff --git a/examples/water_tensor/polar/training_data/atomic_system/set.000/coord.npy b/source/tests/pt/water_tensor/dipole/atomic_system/set.000/coord.npy
similarity index 100%
rename from examples/water_tensor/polar/training_data/atomic_system/set.000/coord.npy
rename to source/tests/pt/water_tensor/dipole/atomic_system/set.000/coord.npy
diff --git a/source/tests/pt/water_tensor/dipole/atomic_system/type.raw b/source/tests/pt/water_tensor/dipole/atomic_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pt/water_tensor/dipole/atomic_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/init_frz_model/data/type_map.raw b/source/tests/pt/water_tensor/dipole/atomic_system/type_map.raw
similarity index 100%
rename from source/tests/init_frz_model/data/type_map.raw
rename to source/tests/pt/water_tensor/dipole/atomic_system/type_map.raw
diff --git a/source/tests/pt/water_tensor/dipole/global_system/nopbc b/source/tests/pt/water_tensor/dipole/global_system/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pt/water_tensor/dipole/global_system/set.000/box.npy b/source/tests/pt/water_tensor/dipole/global_system/set.000/box.npy
new file mode 100644
index 0000000000..652530cfe8
Binary files /dev/null and b/source/tests/pt/water_tensor/dipole/global_system/set.000/box.npy differ
diff --git a/source/tests/pt/water_tensor/dipole/global_system/set.000/coord.npy b/source/tests/pt/water_tensor/dipole/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..4f6c37e77a
Binary files /dev/null and b/source/tests/pt/water_tensor/dipole/global_system/set.000/coord.npy differ
diff --git a/source/tests/pt/water_tensor/dipole/global_system/set.000/dipole.npy b/source/tests/pt/water_tensor/dipole/global_system/set.000/dipole.npy
new file mode 100644
index 0000000000..c16efad029
Binary files /dev/null and b/source/tests/pt/water_tensor/dipole/global_system/set.000/dipole.npy differ
diff --git a/source/tests/pt/water_tensor/dipole/global_system/type.raw b/source/tests/pt/water_tensor/dipole/global_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pt/water_tensor/dipole/global_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/model_compression/data/type_map.raw b/source/tests/pt/water_tensor/dipole/global_system/type_map.raw
similarity index 100%
rename from source/tests/model_compression/data/type_map.raw
rename to source/tests/pt/water_tensor/dipole/global_system/type_map.raw
diff --git a/source/tests/pt/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy b/source/tests/pt/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy
new file mode 100644
index 0000000000..2aa2cdd4f2
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/atomic_system/set.000/atomic_polarizability.npy differ
diff --git a/source/tests/pt/water_tensor/polar/atomic_system/set.000/box.npy b/source/tests/pt/water_tensor/polar/atomic_system/set.000/box.npy
new file mode 100644
index 0000000000..a0ce7ef9a7
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/atomic_system/set.000/box.npy differ
diff --git a/source/tests/pt/water_tensor/polar/atomic_system/set.000/coord.npy b/source/tests/pt/water_tensor/polar/atomic_system/set.000/coord.npy
new file mode 100644
index 0000000000..baa2c0a7c3
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/atomic_system/set.000/coord.npy differ
diff --git a/source/tests/pt/water_tensor/polar/atomic_system/type.raw b/source/tests/pt/water_tensor/polar/atomic_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pt/water_tensor/polar/atomic_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pt/water_tensor/polar/atomic_system/type_map.raw b/source/tests/pt/water_tensor/polar/atomic_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pt/water_tensor/polar/atomic_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pt/water_tensor/polar/global_system/set.000/box.npy b/source/tests/pt/water_tensor/polar/global_system/set.000/box.npy
new file mode 100644
index 0000000000..652530cfe8
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/global_system/set.000/box.npy differ
diff --git a/source/tests/pt/water_tensor/polar/global_system/set.000/coord.npy b/source/tests/pt/water_tensor/polar/global_system/set.000/coord.npy
new file mode 100644
index 0000000000..4f6c37e77a
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/global_system/set.000/coord.npy differ
diff --git a/source/tests/pt/water_tensor/polar/global_system/set.000/polarizability.npy b/source/tests/pt/water_tensor/polar/global_system/set.000/polarizability.npy
new file mode 100644
index 0000000000..893767e565
Binary files /dev/null and b/source/tests/pt/water_tensor/polar/global_system/set.000/polarizability.npy differ
diff --git a/source/tests/pt/water_tensor/polar/global_system/type.raw b/source/tests/pt/water_tensor/polar/global_system/type.raw
new file mode 100644
index 0000000000..6c71c85e58
--- /dev/null
+++ b/source/tests/pt/water_tensor/polar/global_system/type.raw
@@ -0,0 +1 @@
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/source/tests/pt/water_tensor/polar/global_system/type_map.raw b/source/tests/pt/water_tensor/polar/global_system/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/pt/water_tensor/polar/global_system/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/pt/water_tensor/se_e2_a.json b/source/tests/pt/water_tensor/se_e2_a.json
new file mode 100644
index 0000000000..e53caafc96
--- /dev/null
+++ b/source/tests/pt/water_tensor/se_e2_a.json
@@ -0,0 +1,85 @@
+{
+  "_comment1": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "atom_exclude_types": [
+      1
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 3.80,
+      "rcut": 4.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 6,
+      "type_one_side": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment2": " that's all"
+    },
+    "fitting_net": {
+      "type": "dipole",
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "precision": "float64",
+      "seed": 1,
+      "_comment3": " that's all"
+    },
+    "_comment4": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "decay_steps": 5000,
+    "_comment5": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment6": " that's all"
+  },
+  "_comment7": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "pt/water_tensor/dipole/atomic_system",
+        "pt/water_tensor/dipole/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment8": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "pt/water_tensor/dipole/atomic_system",
+        "pt/water_tensor/dipole/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment9": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment10": "that's all"
+  },
+  "_comment11": "that's all"
+}
diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py
deleted file mode 100644
index c946177cb5..0000000000
--- a/source/tests/test_cluster.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import unittest
-from unittest import (
-    mock,
-)
-
-from deepmd.cluster import (
-    local,
-    slurm,
-)
-
-kHostName = "compute-b24-1"
-
-
-class FakePopen:
-    def __init__(self, stdout=b"", stderr=b"", returncode=0):
-        self._stdout = stdout
-        self._stderr = stderr
-        self._returncode = returncode
-
-    def communicate(self):
-        return self._stdout, self._stderr
-
-    @property
-    def returncode(self):
-        return self._returncode
-
-
-class TestGPU(unittest.TestCase):
-    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
-    @mock.patch("subprocess.Popen")
-    def test_none(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = FakePopen(b"0", b"")
-        mock_is_built_with_cuda.return_value = True
-        gpus = local.get_gpus()
-        self.assertIsNone(gpus)
-
-    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
-    @mock.patch("subprocess.Popen")
-    def test_valid(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = FakePopen(b"2", b"")
-        mock_is_built_with_cuda.return_value = True
-        gpus = local.get_gpus()
-        self.assertEqual(gpus, [0, 1])
-
-    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
-    @mock.patch("subprocess.Popen")
-    def test_error(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = FakePopen(
-            stderr=b"!", returncode=1
-        )
-        mock_is_built_with_cuda.return_value = True
-        with self.assertRaises(RuntimeError) as cm:
-            _ = local.get_gpus()
-            self.assertIn("Failed to detect", str(cm.exception))
-
-    @mock.patch("tensorflow.compat.v1.test.is_built_with_rocm", create=True)
-    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
-    def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm):
-        mock_is_built_with_cuda.return_value = False
-        mock_is_built_with_rocm.return_value = False
-        gpus = local.get_gpus()
-        self.assertIsNone(gpus)
-
-
-class TestLocal(unittest.TestCase):
-    @mock.patch("socket.gethostname")
-    def test_resource(self, mock_gethostname):
-        mock_gethostname.return_value = kHostName
-        nodename, nodelist, _ = local.get_resource()
-        self.assertEqual(nodename, kHostName)
-        self.assertEqual(nodelist, [kHostName])
-
-
-class TestSlurm(unittest.TestCase):
-    @mock.patch.dict(
-        "os.environ",
-        values={
-            "SLURM_JOB_NODELIST": kHostName,
-            "SLURMD_NODENAME": kHostName,
-            "SLURM_JOB_NUM_NODES": "1",
-        },
-    )
-    def test_single(self):
-        nodename, nodelist, _ = slurm.get_resource()
-        self.assertEqual(nodename, kHostName)
-        self.assertEqual(nodelist, [kHostName])
-
-    @mock.patch.dict(
-        "os.environ",
-        values={
-            "SLURM_JOB_NODELIST": "compute-b24-[1-3,5-9],compute-b25-[4,8]",
-            "SLURMD_NODENAME": "compute-b24-2",
-            "SLURM_JOB_NUM_NODES": "10",
-        },
-    )
-    def test_multiple(self):
-        nodename, nodelist, _ = slurm.get_resource()
-        self.assertEqual(nodename, "compute-b24-2")
-        self.assertEqual(
-            nodelist,
-            [
-                "compute-b24-1",
-                "compute-b24-2",
-                "compute-b24-3",
-                "compute-b24-5",
-                "compute-b24-6",
-                "compute-b24-7",
-                "compute-b24-8",
-                "compute-b24-9",
-                "compute-b25-4",
-                "compute-b25-8",
-            ],
-        )
-
-    def test_illegal(self):
-        environ = {
-            "SLURM_JOB_NODELIST": "compute-b24-[3-5]",
-            "SLURMD_NODENAME": "compute-b24-4",
-        }
-        with mock.patch.dict("os.environ", environ):
-            with self.assertRaises(RuntimeError) as cm:
-                _ = slurm.get_resource()
-                self.assertIn("Could not get SLURM number", str(cm.exception))
-
-        environ = {
-            "SLURM_JOB_NODELIST": "compute-b24-1,compute-b25-2",
-            "SLURMD_NODENAME": "compute-b25-2",
-            "SLURM_JOB_NUM_NODES": "4",
-        }
-        with mock.patch.dict("os.environ", environ):
-            with self.assertRaises(ValueError) as cm:
-                _ = slurm.get_resource()
-                self.assertIn("Number of slurm nodes 2", str(cm.exception))
-
-        environ = {
-            "SLURM_JOB_NODELIST": "compute-b24-1,compute-b25-3",
-            "SLURMD_NODENAME": "compute-b25-2",
-            "SLURM_JOB_NUM_NODES": "2",
-        }
-        with mock.patch.dict("os.environ", environ):
-            with self.assertRaises(ValueError) as cm:
-                _ = slurm.get_resource()
-                self.assertIn("Nodename(compute-b25-2", str(cm.exception))
diff --git a/source/tests/test_neighbor_stat.py b/source/tests/test_neighbor_stat.py
deleted file mode 100644
index 49ace29f53..0000000000
--- a/source/tests/test_neighbor_stat.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import shutil
-import unittest
-
-import dpdata
-import numpy as np
-
-from deepmd.entrypoints.neighbor_stat import (
-    neighbor_stat,
-)
-
-
-def gen_sys(nframes):
-    natoms = 1000
-    data = {}
-    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
-    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
-    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
-    data["forces"] = np.random.random([nframes, natoms, 3])
-    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
-        1, 3, 3
-    )
-    data["energies"] = np.random.random([nframes, 1])
-    data["atom_names"] = ["TYPE"]
-    data["atom_numbs"] = [27]
-    data["atom_types"] = np.repeat(0, 27)
-    return data
-
-
-class TestNeighborStat(unittest.TestCase):
-    def setUp(self):
-        data0 = gen_sys(1)
-        sys0 = dpdata.LabeledSystem()
-        sys0.data = data0
-        sys0.to_deepmd_npy("system_0", set_size=1)
-
-    def tearDown(self):
-        shutil.rmtree("system_0")
-
-    def test_neighbor_stat(self):
-        # set rcut to 0. will cause a core dumped
-        # TODO: check what is wrong
-        for rcut in (1.0, 2.0, 4.0):
-            with self.subTest():
-                rcut += 1e-3  # prevent numerical errors
-                min_nbor_dist, max_nbor_size = neighbor_stat(
-                    system="system_0", rcut=rcut, type_map=["TYPE"]
-                )
-                upper = np.ceil(rcut) + 1
-                X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
-                positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
-                # distance to (0,0,0)
-                distance = np.linalg.norm(positions, axis=1)
-                expected_neighbors = np.count_nonzero(
-                    np.logical_and(distance > 0, distance <= rcut)
-                )
-                self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
-                self.assertEqual(max_nbor_size, [expected_neighbors])
diff --git a/source/tests/test_output_def.py b/source/tests/test_output_def.py
deleted file mode 100644
index 82d1b13a80..0000000000
--- a/source/tests/test_output_def.py
+++ /dev/null
@@ -1,304 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import unittest
-from typing import (
-    List,
-)
-
-import numpy as np
-
-from deepmd_utils.model_format import (
-    FittingOutputDef,
-    ModelOutputDef,
-    NativeOP,
-    OutputVariableDef,
-    fitting_check_output,
-    model_check_output,
-)
-from deepmd_utils.model_format.output_def import (
-    check_var,
-)
-
-
-class VariableDef:
-    def __init__(
-        self,
-        name: str,
-        shape: List[int],
-        atomic: bool = True,
-    ):
-        self.name = name
-        self.shape = list(shape)
-        self.atomic = atomic
-
-
-class TestDef(unittest.TestCase):
-    def test_model_output_def(self):
-        defs = [
-            OutputVariableDef("energy", [1], True, True),
-            OutputVariableDef("dos", [10], True, False),
-            OutputVariableDef("foo", [3], False, False),
-        ]
-        # fitting definition
-        fd = FittingOutputDef(defs)
-        expected_keys = ["energy", "dos", "foo"]
-        self.assertEqual(
-            set(expected_keys),
-            set(fd.keys()),
-        )
-        # shape
-        self.assertEqual(fd["energy"].shape, [1])
-        self.assertEqual(fd["dos"].shape, [10])
-        self.assertEqual(fd["foo"].shape, [3])
-        # atomic
-        self.assertEqual(fd["energy"].atomic, True)
-        self.assertEqual(fd["dos"].atomic, True)
-        self.assertEqual(fd["foo"].atomic, True)
-        # reduce
-        self.assertEqual(fd["energy"].reduciable, True)
-        self.assertEqual(fd["dos"].reduciable, True)
-        self.assertEqual(fd["foo"].reduciable, False)
-        # derivative
-        self.assertEqual(fd["energy"].differentiable, True)
-        self.assertEqual(fd["dos"].differentiable, False)
-        self.assertEqual(fd["foo"].differentiable, False)
-        # model definition
-        md = ModelOutputDef(fd)
-        expected_keys = [
-            "energy",
-            "dos",
-            "foo",
-            "energy_redu",
-            "energy_derv_r",
-            "energy_derv_c",
-            "dos_redu",
-        ]
-        self.assertEqual(
-            set(expected_keys),
-            set(md.keys()),
-        )
-        for kk in expected_keys:
-            self.assertEqual(md[kk].name, kk)
-        # reduce
-        self.assertEqual(md["energy"].reduciable, True)
-        self.assertEqual(md["dos"].reduciable, True)
-        self.assertEqual(md["foo"].reduciable, False)
-        # derivative
-        self.assertEqual(md["energy"].differentiable, True)
-        self.assertEqual(md["dos"].differentiable, False)
-        self.assertEqual(md["foo"].differentiable, False)
-        # shape
-        self.assertEqual(md["energy"].shape, [1])
-        self.assertEqual(md["dos"].shape, [10])
-        self.assertEqual(md["foo"].shape, [3])
-        self.assertEqual(md["energy_redu"].shape, [1])
-        self.assertEqual(md["energy_derv_r"].shape, [1, 3])
-        self.assertEqual(md["energy_derv_c"].shape, [1, 3, 3])
-        # atomic
-        self.assertEqual(md["energy"].atomic, True)
-        self.assertEqual(md["dos"].atomic, True)
-        self.assertEqual(md["foo"].atomic, True)
-        self.assertEqual(md["energy_redu"].atomic, False)
-        self.assertEqual(md["energy_derv_r"].atomic, True)
-        self.assertEqual(md["energy_derv_c"].atomic, True)
-
-    def test_raise_no_redu_deriv(self):
-        with self.assertRaises(ValueError) as context:
-            (OutputVariableDef("energy", [1], False, True),)
-
-    def test_model_decorator(self):
-        nf = 2
-        nloc = 3
-        nall = 4
-
-        @model_check_output
-        class Foo(NativeOP):
-            def output_def(self):
-                defs = [
-                    OutputVariableDef("energy", [1], True, True),
-                ]
-                return ModelOutputDef(FittingOutputDef(defs))
-
-            def call(self):
-                return {
-                    "energy": np.zeros([nf, nloc, 1]),
-                    "energy_redu": np.zeros([nf, 1]),
-                    "energy_derv_r": np.zeros([nf, nall, 1, 3]),
-                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
-                }
-
-        ff = Foo()
-        ff()
-
-    def test_model_decorator_keyerror(self):
-        nf = 2
-        nloc = 3
-        nall = 4
-
-        @model_check_output
-        class Foo(NativeOP):
-            def __init__(self):
-                super().__init__()
-
-            def output_def(self):
-                defs = [
-                    OutputVariableDef("energy", [1], True, True),
-                ]
-                return ModelOutputDef(FittingOutputDef(defs))
-
-            def call(self):
-                return {
-                    "energy": np.zeros([nf, nloc, 1]),
-                    "energy_redu": np.zeros([nf, 1]),
-                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
-                }
-
-        ff = Foo()
-        with self.assertRaises(KeyError) as context:
-            ff()
-            self.assertIn("energy_derv_r", context.exception)
-
-    def test_model_decorator_shapeerror(self):
-        nf = 2
-        nloc = 3
-        nall = 4
-
-        @model_check_output
-        class Foo(NativeOP):
-            def __init__(
-                self,
-                shape_rd=[nf, 1],
-                shape_dr=[nf, nall, 1, 3],
-            ):
-                self.shape_rd, self.shape_dr = shape_rd, shape_dr
-
-            def output_def(self):
-                defs = [
-                    OutputVariableDef("energy", [1], True, True),
-                ]
-                return ModelOutputDef(FittingOutputDef(defs))
-
-            def call(self):
-                return {
-                    "energy": np.zeros([nf, nloc, 1]),
-                    "energy_redu": np.zeros(self.shape_rd),
-                    "energy_derv_r": np.zeros(self.shape_dr),
-                    "energy_derv_c": np.zeros([nf, nall, 1, 3, 3]),
-                }
-
-        ff = Foo()
-        ff()
-        # shape of reduced energy
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape_rd=[nf, nloc, 1])
-            ff()
-            self.assertIn("not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape_rd=[nf, 2])
-            ff()
-            self.assertIn("not matching", context.exception)
-        # shape of dr
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape_dr=[nf, nloc, 1])
-            ff()
-            self.assertIn("not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape_dr=[nf, nloc, 1, 3, 3])
-            ff()
-            self.assertIn("not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape_dr=[nf, nloc, 1, 4])
-            ff()
-            self.assertIn("not matching", context.exception)
-
-    def test_fitting_decorator(self):
-        nf = 2
-        nloc = 3
-        nall = 4
-
-        @fitting_check_output
-        class Foo(NativeOP):
-            def output_def(self):
-                defs = [
-                    OutputVariableDef("energy", [1], True, True),
-                ]
-                return FittingOutputDef(defs)
-
-            def call(self):
-                return {
-                    "energy": np.zeros([nf, nloc, 1]),
-                }
-
-        ff = Foo()
-        ff()
-
-    def test_fitting_decorator_shapeerror(self):
-        nf = 2
-        nloc = 3
-
-        @fitting_check_output
-        class Foo(NativeOP):
-            def __init__(
-                self,
-                shape=[nf, nloc, 1],
-            ):
-                self.shape = shape
-
-            def output_def(self):
-                defs = [
-                    OutputVariableDef("energy", [1], True, True),
-                ]
-                return FittingOutputDef(defs)
-
-            def call(self):
-                return {
-                    "energy": np.zeros(self.shape),
-                }
-
-        ff = Foo()
-        ff()
-        # shape of reduced energy
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape=[nf, 1])
-            ff()
-            self.assertIn("not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            ff = Foo(shape=[nf, nloc, 2])
-            ff()
-            self.assertIn("not matching", context.exception)
-
-    def test_check_var(self):
-        var_def = VariableDef("foo", [2, 3], atomic=True)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
-            self.assertIn("length not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5]), var_def)
-            self.assertIn("shape not matching", context.exception)
-        check_var(np.zeros([2, 3, 2, 3]), var_def)
-
-        var_def = VariableDef("foo", [2, 3], atomic=False)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5]), var_def)
-            self.assertIn("length not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4]), var_def)
-            self.assertIn("shape not matching", context.exception)
-        check_var(np.zeros([2, 2, 3]), var_def)
-
-        var_def = VariableDef("foo", [2, -1], atomic=True)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5, 6]), var_def)
-            self.assertIn("length not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5]), var_def)
-            self.assertIn("shape not matching", context.exception)
-        check_var(np.zeros([2, 3, 2, 8]), var_def)
-
-        var_def = VariableDef("foo", [2, -1], atomic=False)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4, 5]), var_def)
-            self.assertIn("length not matching", context.exception)
-        with self.assertRaises(ValueError) as context:
-            check_var(np.zeros([2, 3, 4]), var_def)
-            self.assertIn("shape not matching", context.exception)
-        check_var(np.zeros([2, 2, 8]), var_def)
diff --git a/source/tests/tf/__init__.py b/source/tests/tf/__init__.py
new file mode 100644
index 0000000000..6ceb116d85
--- /dev/null
+++ b/source/tests/tf/__init__.py
@@ -0,0 +1 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
diff --git a/source/tests/common.py b/source/tests/tf/common.py
similarity index 98%
rename from source/tests/common.py
rename to source/tests/tf/common.py
index 9af324896f..d4f3cc8392 100644
--- a/source/tests/common.py
+++ b/source/tests/tf/common.py
@@ -4,19 +4,23 @@
 import os
 import pathlib
 import shutil
+import warnings
 
 import dpdata
 import numpy as np
 
-from deepmd.common import j_loader as dp_j_loader
-from deepmd.entrypoints.main import (
+from deepmd.tf.common import j_loader as dp_j_loader
+from deepmd.tf.entrypoints.main import (
     main,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.utils import random as dp_random
+from deepmd.tf.utils import random as dp_random
+from deepmd.utils.out_stat import (
+    compute_stats_from_redu,
+)
 
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
@@ -28,6 +32,7 @@
     global_default_places = 5
 
 tests_path = pathlib.Path(__file__).parent.absolute()
+infer_path = (tests_path.parent / "infer").absolute()
 
 
 def j_loader(filename):
@@ -965,7 +970,7 @@ def __init__(self, systems, set_prefix, batch_size, test_size, rcut, run_opt=Non
                 )
             chk_ret = self.data_systems[ii].check_test_size(test_size)
             if chk_ret is not None:
-                print(
+                warnings.warn(
                     "WARNNING: system %s required test size %d is larger than the size %d of the dataset %s"
                     % (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0])
                 )
@@ -1040,10 +1045,12 @@ def compute_energy_shift(self):
         sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
         sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
         sys_tynatom = sys_tynatom[:, 2:]
-        energy_shift, resd, rank, s_value = np.linalg.lstsq(
-            sys_tynatom, sys_ener, rcond=None
+        energy_shift, _ = compute_stats_from_redu(
+            sys_ener.reshape(-1, 1),
+            sys_tynatom,
+            rcond=None,
         )
-        return energy_shift
+        return energy_shift.ravel()
 
     def process_sys_weights(self, sys_weights):
         sys_weights = np.array(sys_weights)
diff --git a/source/tests/compat_inputs/water_se_a_v0.json b/source/tests/tf/compat_inputs/water_se_a_v0.json
similarity index 100%
rename from source/tests/compat_inputs/water_se_a_v0.json
rename to source/tests/tf/compat_inputs/water_se_a_v0.json
diff --git a/source/tests/compat_inputs/water_se_a_v1.json b/source/tests/tf/compat_inputs/water_se_a_v1.json
similarity index 100%
rename from source/tests/compat_inputs/water_se_a_v1.json
rename to source/tests/tf/compat_inputs/water_se_a_v1.json
diff --git a/source/tests/compat_inputs/water_v0.json b/source/tests/tf/compat_inputs/water_v0.json
similarity index 100%
rename from source/tests/compat_inputs/water_v0.json
rename to source/tests/tf/compat_inputs/water_v0.json
diff --git a/source/tests/compat_inputs/water_v1.json b/source/tests/tf/compat_inputs/water_v1.json
similarity index 100%
rename from source/tests/compat_inputs/water_v1.json
rename to source/tests/tf/compat_inputs/water_v1.json
diff --git a/source/tests/compat_inputs/water_v2.json b/source/tests/tf/compat_inputs/water_v2.json
similarity index 100%
rename from source/tests/compat_inputs/water_v2.json
rename to source/tests/tf/compat_inputs/water_v2.json
diff --git a/source/tests/tf/data_dp_mask/nopbc b/source/tests/tf/data_dp_mask/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/data_dp_mask/set.000/aparam.npy b/source/tests/tf/data_dp_mask/set.000/aparam.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/aparam.npy
rename to source/tests/tf/data_dp_mask/set.000/aparam.npy
diff --git a/source/tests/data_dp_mask/set.000/atom_pref.npy b/source/tests/tf/data_dp_mask/set.000/atom_pref.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/atom_pref.npy
rename to source/tests/tf/data_dp_mask/set.000/atom_pref.npy
diff --git a/source/tests/data_dp_mask/set.000/box.npy b/source/tests/tf/data_dp_mask/set.000/box.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/box.npy
rename to source/tests/tf/data_dp_mask/set.000/box.npy
diff --git a/source/tests/data_dp_mask/set.000/coord.npy b/source/tests/tf/data_dp_mask/set.000/coord.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/coord.npy
rename to source/tests/tf/data_dp_mask/set.000/coord.npy
diff --git a/source/tests/data_dp_mask/set.000/energy.npy b/source/tests/tf/data_dp_mask/set.000/energy.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/energy.npy
rename to source/tests/tf/data_dp_mask/set.000/energy.npy
diff --git a/source/tests/data_dp_mask/set.000/force.npy b/source/tests/tf/data_dp_mask/set.000/force.npy
similarity index 100%
rename from source/tests/data_dp_mask/set.000/force.npy
rename to source/tests/tf/data_dp_mask/set.000/force.npy
diff --git a/source/tests/data_dp_mask/type.raw b/source/tests/tf/data_dp_mask/type.raw
similarity index 100%
rename from source/tests/data_dp_mask/type.raw
rename to source/tests/tf/data_dp_mask/type.raw
diff --git a/source/tests/data_dp_mask/type_map.raw b/source/tests/tf/data_dp_mask/type_map.raw
similarity index 100%
rename from source/tests/data_dp_mask/type_map.raw
rename to source/tests/tf/data_dp_mask/type_map.raw
diff --git a/source/tests/data_modifier/dipole.json b/source/tests/tf/data_modifier/dipole.json
similarity index 100%
rename from source/tests/data_modifier/dipole.json
rename to source/tests/tf/data_modifier/dipole.json
diff --git a/source/tests/data_modifier/sys_10/set.000/atomic_dipole.npy b/source/tests/tf/data_modifier/sys_10/set.000/atomic_dipole.npy
similarity index 100%
rename from source/tests/data_modifier/sys_10/set.000/atomic_dipole.npy
rename to source/tests/tf/data_modifier/sys_10/set.000/atomic_dipole.npy
diff --git a/source/tests/data_modifier/sys_10/set.000/box.npy b/source/tests/tf/data_modifier/sys_10/set.000/box.npy
similarity index 100%
rename from source/tests/data_modifier/sys_10/set.000/box.npy
rename to source/tests/tf/data_modifier/sys_10/set.000/box.npy
diff --git a/source/tests/data_modifier/sys_10/set.000/coord.npy b/source/tests/tf/data_modifier/sys_10/set.000/coord.npy
similarity index 100%
rename from source/tests/data_modifier/sys_10/set.000/coord.npy
rename to source/tests/tf/data_modifier/sys_10/set.000/coord.npy
diff --git a/source/tests/data_modifier/sys_10/set.000/energy.npy b/source/tests/tf/data_modifier/sys_10/set.000/energy.npy
similarity index 100%
rename from source/tests/data_modifier/sys_10/set.000/energy.npy
rename to source/tests/tf/data_modifier/sys_10/set.000/energy.npy
diff --git a/source/tests/data_modifier/sys_10/set.000/force.npy b/source/tests/tf/data_modifier/sys_10/set.000/force.npy
similarity index 100%
rename from source/tests/data_modifier/sys_10/set.000/force.npy
rename to source/tests/tf/data_modifier/sys_10/set.000/force.npy
diff --git a/source/tests/data_modifier/sys_10/type.raw b/source/tests/tf/data_modifier/sys_10/type.raw
similarity index 100%
rename from source/tests/data_modifier/sys_10/type.raw
rename to source/tests/tf/data_modifier/sys_10/type.raw
diff --git a/source/tests/data_modifier/sys_10/type_map.raw b/source/tests/tf/data_modifier/sys_10/type_map.raw
similarity index 100%
rename from source/tests/data_modifier/sys_10/type_map.raw
rename to source/tests/tf/data_modifier/sys_10/type_map.raw
diff --git a/source/tests/finetune/data/set.000/box.npy b/source/tests/tf/finetune/data/set.000/box.npy
similarity index 100%
rename from source/tests/finetune/data/set.000/box.npy
rename to source/tests/tf/finetune/data/set.000/box.npy
diff --git a/source/tests/finetune/data/set.000/coord.npy b/source/tests/tf/finetune/data/set.000/coord.npy
similarity index 100%
rename from source/tests/finetune/data/set.000/coord.npy
rename to source/tests/tf/finetune/data/set.000/coord.npy
diff --git a/source/tests/finetune/data/set.000/energy.npy b/source/tests/tf/finetune/data/set.000/energy.npy
similarity index 100%
rename from source/tests/finetune/data/set.000/energy.npy
rename to source/tests/tf/finetune/data/set.000/energy.npy
diff --git a/source/tests/finetune/data/set.000/force.npy b/source/tests/tf/finetune/data/set.000/force.npy
similarity index 100%
rename from source/tests/finetune/data/set.000/force.npy
rename to source/tests/tf/finetune/data/set.000/force.npy
diff --git a/source/tests/tf/finetune/data/type.raw b/source/tests/tf/finetune/data/type.raw
new file mode 100644
index 0000000000..97e8fdfcf8
--- /dev/null
+++ b/source/tests/tf/finetune/data/type.raw
@@ -0,0 +1,192 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/tf/finetune/data/type_map.raw b/source/tests/tf/finetune/data/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/tf/finetune/data/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/finetune/data_mixed_type/set.000/box.npy b/source/tests/tf/finetune/data_mixed_type/set.000/box.npy
similarity index 100%
rename from source/tests/finetune/data_mixed_type/set.000/box.npy
rename to source/tests/tf/finetune/data_mixed_type/set.000/box.npy
diff --git a/source/tests/finetune/data_mixed_type/set.000/coord.npy b/source/tests/tf/finetune/data_mixed_type/set.000/coord.npy
similarity index 100%
rename from source/tests/finetune/data_mixed_type/set.000/coord.npy
rename to source/tests/tf/finetune/data_mixed_type/set.000/coord.npy
diff --git a/source/tests/finetune/data_mixed_type/set.000/energy.npy b/source/tests/tf/finetune/data_mixed_type/set.000/energy.npy
similarity index 100%
rename from source/tests/finetune/data_mixed_type/set.000/energy.npy
rename to source/tests/tf/finetune/data_mixed_type/set.000/energy.npy
diff --git a/source/tests/finetune/data_mixed_type/set.000/force.npy b/source/tests/tf/finetune/data_mixed_type/set.000/force.npy
similarity index 100%
rename from source/tests/finetune/data_mixed_type/set.000/force.npy
rename to source/tests/tf/finetune/data_mixed_type/set.000/force.npy
diff --git a/source/tests/finetune/data_mixed_type/set.000/real_atom_types.npy b/source/tests/tf/finetune/data_mixed_type/set.000/real_atom_types.npy
similarity index 100%
rename from source/tests/finetune/data_mixed_type/set.000/real_atom_types.npy
rename to source/tests/tf/finetune/data_mixed_type/set.000/real_atom_types.npy
diff --git a/source/tests/finetune/data_mixed_type/type.raw b/source/tests/tf/finetune/data_mixed_type/type.raw
similarity index 100%
rename from source/tests/finetune/data_mixed_type/type.raw
rename to source/tests/tf/finetune/data_mixed_type/type.raw
diff --git a/source/tests/tf/finetune/data_mixed_type/type_map.raw b/source/tests/tf/finetune/data_mixed_type/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/tf/finetune/data_mixed_type/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/finetune/input_finetune.json b/source/tests/tf/finetune/input_finetune.json
similarity index 100%
rename from source/tests/finetune/input_finetune.json
rename to source/tests/tf/finetune/input_finetune.json
diff --git a/source/tests/finetune/input_pretrain.json b/source/tests/tf/finetune/input_pretrain.json
similarity index 100%
rename from source/tests/finetune/input_pretrain.json
rename to source/tests/tf/finetune/input_pretrain.json
diff --git a/source/tests/init_frz_model/data/set.000/box.npy b/source/tests/tf/init_frz_model/data/set.000/box.npy
similarity index 100%
rename from source/tests/init_frz_model/data/set.000/box.npy
rename to source/tests/tf/init_frz_model/data/set.000/box.npy
diff --git a/source/tests/init_frz_model/data/set.000/coord.npy b/source/tests/tf/init_frz_model/data/set.000/coord.npy
similarity index 100%
rename from source/tests/init_frz_model/data/set.000/coord.npy
rename to source/tests/tf/init_frz_model/data/set.000/coord.npy
diff --git a/source/tests/init_frz_model/data/set.000/energy.npy b/source/tests/tf/init_frz_model/data/set.000/energy.npy
similarity index 100%
rename from source/tests/init_frz_model/data/set.000/energy.npy
rename to source/tests/tf/init_frz_model/data/set.000/energy.npy
diff --git a/source/tests/init_frz_model/data/set.000/force.npy b/source/tests/tf/init_frz_model/data/set.000/force.npy
similarity index 100%
rename from source/tests/init_frz_model/data/set.000/force.npy
rename to source/tests/tf/init_frz_model/data/set.000/force.npy
diff --git a/source/tests/init_frz_model/data/type.raw b/source/tests/tf/init_frz_model/data/type.raw
similarity index 100%
rename from source/tests/init_frz_model/data/type.raw
rename to source/tests/tf/init_frz_model/data/type.raw
diff --git a/source/tests/tf/init_frz_model/data/type_map.raw b/source/tests/tf/init_frz_model/data/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/tf/init_frz_model/data/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/init_frz_model/input.json b/source/tests/tf/init_frz_model/input.json
similarity index 100%
rename from source/tests/init_frz_model/input.json
rename to source/tests/tf/init_frz_model/input.json
diff --git a/source/tests/model_compression/data/set.000/box.npy b/source/tests/tf/model_compression/data/set.000/box.npy
similarity index 100%
rename from source/tests/model_compression/data/set.000/box.npy
rename to source/tests/tf/model_compression/data/set.000/box.npy
diff --git a/source/tests/model_compression/data/set.000/coord.npy b/source/tests/tf/model_compression/data/set.000/coord.npy
similarity index 100%
rename from source/tests/model_compression/data/set.000/coord.npy
rename to source/tests/tf/model_compression/data/set.000/coord.npy
diff --git a/source/tests/model_compression/data/set.000/energy.npy b/source/tests/tf/model_compression/data/set.000/energy.npy
similarity index 100%
rename from source/tests/model_compression/data/set.000/energy.npy
rename to source/tests/tf/model_compression/data/set.000/energy.npy
diff --git a/source/tests/model_compression/data/set.000/force.npy b/source/tests/tf/model_compression/data/set.000/force.npy
similarity index 100%
rename from source/tests/model_compression/data/set.000/force.npy
rename to source/tests/tf/model_compression/data/set.000/force.npy
diff --git a/source/tests/model_compression/data/type.raw b/source/tests/tf/model_compression/data/type.raw
similarity index 100%
rename from source/tests/model_compression/data/type.raw
rename to source/tests/tf/model_compression/data/type.raw
diff --git a/source/tests/tf/model_compression/data/type_map.raw b/source/tests/tf/model_compression/data/type_map.raw
new file mode 100644
index 0000000000..e900768b1d
--- /dev/null
+++ b/source/tests/tf/model_compression/data/type_map.raw
@@ -0,0 +1,2 @@
+O
+H
diff --git a/source/tests/model_compression/input.json b/source/tests/tf/model_compression/input.json
similarity index 100%
rename from source/tests/model_compression/input.json
rename to source/tests/tf/model_compression/input.json
diff --git a/source/tests/model_spin/set.000/box.npy b/source/tests/tf/model_spin/set.000/box.npy
similarity index 100%
rename from source/tests/model_spin/set.000/box.npy
rename to source/tests/tf/model_spin/set.000/box.npy
diff --git a/source/tests/model_spin/set.000/coord.npy b/source/tests/tf/model_spin/set.000/coord.npy
similarity index 100%
rename from source/tests/model_spin/set.000/coord.npy
rename to source/tests/tf/model_spin/set.000/coord.npy
diff --git a/source/tests/model_spin/set.000/energy.npy b/source/tests/tf/model_spin/set.000/energy.npy
similarity index 100%
rename from source/tests/model_spin/set.000/energy.npy
rename to source/tests/tf/model_spin/set.000/energy.npy
diff --git a/source/tests/model_spin/set.000/force.npy b/source/tests/tf/model_spin/set.000/force.npy
similarity index 100%
rename from source/tests/model_spin/set.000/force.npy
rename to source/tests/tf/model_spin/set.000/force.npy
diff --git a/source/tests/model_spin/type.raw b/source/tests/tf/model_spin/type.raw
similarity index 100%
rename from source/tests/model_spin/type.raw
rename to source/tests/tf/model_spin/type.raw
diff --git a/source/tests/nvnmd/ref/box.npy b/source/tests/tf/nvnmd/ref/box.npy
similarity index 100%
rename from source/tests/nvnmd/ref/box.npy
rename to source/tests/tf/nvnmd/ref/box.npy
diff --git a/source/tests/nvnmd/ref/config_v0_cnn.npy b/source/tests/tf/nvnmd/ref/config_v0_cnn.npy
similarity index 100%
rename from source/tests/nvnmd/ref/config_v0_cnn.npy
rename to source/tests/tf/nvnmd/ref/config_v0_cnn.npy
diff --git a/source/tests/nvnmd/ref/config_v1_cnn.npy b/source/tests/tf/nvnmd/ref/config_v1_cnn.npy
similarity index 100%
rename from source/tests/nvnmd/ref/config_v1_cnn.npy
rename to source/tests/tf/nvnmd/ref/config_v1_cnn.npy
diff --git a/source/tests/nvnmd/ref/coord.npy b/source/tests/tf/nvnmd/ref/coord.npy
similarity index 100%
rename from source/tests/nvnmd/ref/coord.npy
rename to source/tests/tf/nvnmd/ref/coord.npy
diff --git a/source/tests/nvnmd/ref/type.raw b/source/tests/tf/nvnmd/ref/type.raw
similarity index 100%
rename from source/tests/nvnmd/ref/type.raw
rename to source/tests/tf/nvnmd/ref/type.raw
diff --git a/source/tests/nvnmd/ref/weight_v0_cnn.npy b/source/tests/tf/nvnmd/ref/weight_v0_cnn.npy
similarity index 100%
rename from source/tests/nvnmd/ref/weight_v0_cnn.npy
rename to source/tests/tf/nvnmd/ref/weight_v0_cnn.npy
diff --git a/source/tests/nvnmd/ref/weight_v1_cnn.npy b/source/tests/tf/nvnmd/ref/weight_v1_cnn.npy
similarity index 100%
rename from source/tests/nvnmd/ref/weight_v1_cnn.npy
rename to source/tests/tf/nvnmd/ref/weight_v1_cnn.npy
diff --git a/source/tests/pairwise_dprc.json b/source/tests/tf/pairwise_dprc.json
similarity index 100%
rename from source/tests/pairwise_dprc.json
rename to source/tests/tf/pairwise_dprc.json
diff --git a/source/tests/polar_se_a.json b/source/tests/tf/polar_se_a.json
similarity index 100%
rename from source/tests/polar_se_a.json
rename to source/tests/tf/polar_se_a.json
diff --git a/source/tests/polar_se_a_tebd.json b/source/tests/tf/polar_se_a_tebd.json
similarity index 100%
rename from source/tests/polar_se_a_tebd.json
rename to source/tests/tf/polar_se_a_tebd.json
diff --git a/source/tests/test.hdf5 b/source/tests/tf/test.hdf5
similarity index 100%
rename from source/tests/test.hdf5
rename to source/tests/tf/test.hdf5
diff --git a/source/tests/test_activation_fn_gelu.py b/source/tests/tf/test_activation_fn_gelu.py
similarity index 96%
rename from source/tests/test_activation_fn_gelu.py
rename to source/tests/tf/test_activation_fn_gelu.py
index b1c30eeefc..9be0885b74 100644
--- a/source/tests/test_activation_fn_gelu.py
+++ b/source/tests/tf/test_activation_fn_gelu.py
@@ -3,13 +3,13 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     get_activation_func,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
 )
 
diff --git a/source/tests/test_adjust_sel.py b/source/tests/tf/test_adjust_sel.py
similarity index 94%
rename from source/tests/test_adjust_sel.py
rename to source/tests/tf/test_adjust_sel.py
index 0ff6eb0792..435d17d959 100644
--- a/source/tests/test_adjust_sel.py
+++ b/source/tests/tf/test_adjust_sel.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     # we use the setting for model compression
     data_file = str(tests_path / os.path.join("model_compression", "data"))
@@ -82,12 +70,10 @@ def _init_models():
     return INPUT, frozen_model, decreased_model, increased_model
 
 
-INPUT, FROZEN_MODEL, DECREASED_MODEL, INCREASED_MODEL = _init_models()
-
-
 class TestDeepPotAAdjustSel(unittest.TestCase):
     @classmethod
     def setUpClass(self):
+        INPUT, FROZEN_MODEL, DECREASED_MODEL, INCREASED_MODEL = _init_models()
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_decreased = DeepPot(DECREASED_MODEL)
         self.dp_increased = DeepPot(INCREASED_MODEL)
diff --git a/source/tests/test_auto_batch_size.py b/source/tests/tf/test_auto_batch_size.py
similarity index 98%
rename from source/tests/test_auto_batch_size.py
rename to source/tests/tf/test_auto_batch_size.py
index 5a349f70b9..3316e186b6 100644
--- a/source/tests/test_auto_batch_size.py
+++ b/source/tests/tf/test_auto_batch_size.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 
-from deepmd.utils.batch_size import (
+from deepmd.tf.utils.batch_size import (
     AutoBatchSize,
 )
-from deepmd.utils.errors import (
+from deepmd.tf.utils.errors import (
     OutOfMemoryError,
 )
 
diff --git a/source/tests/tf/test_cluster.py b/source/tests/tf/test_cluster.py
new file mode 100644
index 0000000000..ea90e1ea6d
--- /dev/null
+++ b/source/tests/tf/test_cluster.py
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+from unittest import (
+    mock,
+)
+
+from deepmd.tf.cluster import (
+    local,
+)
+
+kHostName = "compute-b24-1"
+
+
+class FakePopen:
+    def __init__(self, stdout=b"", stderr=b"", returncode=0):
+        self._stdout = stdout
+        self._stderr = stderr
+        self._returncode = returncode
+
+    def communicate(self):
+        return self._stdout, self._stderr
+
+    @property
+    def returncode(self):
+        return self._returncode
+
+
+class TestGPU(unittest.TestCase):
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
+    def test_none(self, mock_Popen, mock_is_built_with_cuda):
+        mock_Popen.return_value.__enter__.return_value = FakePopen(b"0", b"")
+        mock_is_built_with_cuda.return_value = True
+        gpus = local.get_gpus()
+        self.assertIsNone(gpus)
+
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
+    def test_valid(self, mock_Popen, mock_is_built_with_cuda):
+        mock_Popen.return_value.__enter__.return_value = FakePopen(b"2", b"")
+        mock_is_built_with_cuda.return_value = True
+        gpus = local.get_gpus()
+        self.assertEqual(gpus, [0, 1])
+
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
+    def test_error(self, mock_Popen, mock_is_built_with_cuda):
+        mock_Popen.return_value.__enter__.return_value = FakePopen(
+            stderr=b"!", returncode=1
+        )
+        mock_is_built_with_cuda.return_value = True
+        with self.assertRaises(RuntimeError) as cm:
+            _ = local.get_gpus()
+            self.assertIn("Failed to detect", str(cm.exception))
+
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_rocm", create=True)
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm):
+        mock_is_built_with_cuda.return_value = False
+        mock_is_built_with_rocm.return_value = False
+        gpus = local.get_gpus()
+        self.assertIsNone(gpus)
+
+
+class TestLocal(unittest.TestCase):
+    @mock.patch("socket.gethostname")
+    def test_resource(self, mock_gethostname):
+        mock_gethostname.return_value = kHostName
+        nodename, nodelist, _ = local.get_resource()
+        self.assertEqual(nodename, kHostName)
+        self.assertEqual(nodelist, [kHostName])
diff --git a/source/tests/test_common.py b/source/tests/tf/test_common.py
similarity index 95%
rename from source/tests/test_common.py
rename to source/tests/tf/test_common.py
index bf68e7056b..95948f29bb 100644
--- a/source/tests/test_common.py
+++ b/source/tests/tf/test_common.py
@@ -5,12 +5,12 @@
     Path,
 )
 
-from deepmd.common import (
+from deepmd.tf.common import (
     GLOBAL_TF_FLOAT_PRECISION,
     cast_precision,
     expand_sys_str,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
@@ -66,7 +66,7 @@ def test_expand(self):
 
 
 class TestCastPrecision(unittest.TestCase):
-    """This class tests `deepmd.common.cast_precision`."""
+    """This class tests `deepmd.tf.common.cast_precision`."""
 
     @property
     def precision(self):
diff --git a/source/tests/test_compat_input.py b/source/tests/tf/test_compat_input.py
similarity index 96%
rename from source/tests/test_compat_input.py
rename to source/tests/tf/test_compat_input.py
index 97172be9e7..f7c605380c 100644
--- a/source/tests/test_compat_input.py
+++ b/source/tests/tf/test_compat_input.py
@@ -2,15 +2,15 @@
 import os
 import unittest
 
-from common import (
-    j_loader,
-)
-
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     convert_input_v0_v1,
     convert_input_v1_v2,
 )
 
+from .common import (
+    j_loader,
+)
+
 
 class TestConvertInput(unittest.TestCase):
     def test_convert_smth(self):
diff --git a/source/tests/test_compressed_training.py b/source/tests/tf/test_compressed_training.py
similarity index 95%
rename from source/tests/test_compressed_training.py
rename to source/tests/tf/test_compressed_training.py
index 0a0bbeaadf..998ef8cb59 100644
--- a/source/tests/test_compressed_training.py
+++ b/source/tests/tf/test_compressed_training.py
@@ -3,18 +3,19 @@
 import os
 import unittest
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 
 @unittest.skipIf(
     parse_version(tf.__version__) < parse_version("2"),
diff --git a/source/tests/test_data_large_batch.py b/source/tests/tf/test_data_large_batch.py
similarity index 95%
rename from source/tests/test_data_large_batch.py
rename to source/tests/tf/test_data_large_batch.py
index 5750f956f8..dad6bbf252 100644
--- a/source/tests/test_data_large_batch.py
+++ b/source/tests/tf/test_data_large_batch.py
@@ -3,34 +3,35 @@
 import unittest
 
 import numpy as np
-from common import (
-    gen_data,
-    j_loader,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeAtten,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -111,10 +112,13 @@ def test_data_mixed_type(self):
         jdata["model"]["descriptor"].pop("type", None)
         jdata["model"]["descriptor"]["ntypes"] = 2
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             activation_function=None,
@@ -307,10 +311,13 @@ def test_stripped_data_mixed_type(self):
         jdata["model"]["descriptor"]["ntypes"] = 2
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             activation_function=None,
@@ -503,10 +510,13 @@ def test_compressible_data_mixed_type(self):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 0
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             activation_function=None,
diff --git a/source/tests/test_data_modifier.py b/source/tests/tf/test_data_modifier.py
similarity index 95%
rename from source/tests/test_data_modifier.py
rename to source/tests/tf/test_data_modifier.py
index 368a60d68a..cf2c50b761 100644
--- a/source/tests/test_data_modifier.py
+++ b/source/tests/tf/test_data_modifier.py
@@ -2,33 +2,34 @@
 import os
 
 import numpy as np
-from common import (
-    Data,
-    j_loader,
-    tests_path,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     data_requirement,
     j_must_have,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer.data_modifier import (
+from deepmd.tf.infer.data_modifier import (
     DipoleChargeModifier,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    Data,
+    j_loader,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
     global_default_dw_hh = 1e-2
@@ -56,7 +57,6 @@ def _setUp(self):
             restart=None, init_model=None, log_path=None, log_level=30, mpi_log="master"
         )
         jdata = j_loader(INPUT)
-
         # init model
         model = DPTrainer(jdata, run_opt=run_opt)
         rcut = model.model.get_rcut()
diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/tf/test_data_modifier_shuffle.py
similarity index 91%
rename from source/tests/test_data_modifier_shuffle.py
rename to source/tests/tf/test_data_modifier_shuffle.py
index 9ddbb8ee29..41086cc775 100644
--- a/source/tests/test_data_modifier_shuffle.py
+++ b/source/tests/tf/test_data_modifier_shuffle.py
@@ -4,27 +4,27 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     data_requirement,
     j_must_have,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer.data_modifier import (
+from deepmd.tf.infer.data_modifier import (
     DipoleChargeModifier,
 )
-from deepmd.infer.deep_dipole import (
+from deepmd.tf.infer.deep_dipole import (
     DeepDipole,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
@@ -105,8 +105,10 @@ def _setUp_data(self):
         self.nsel = 0
         for ii in self.sel_type:
             self.nsel += np.sum(self.atom_types0 == ii)
-        self.coords0 = np.random.random([self.nframes, self.natoms * 3]) * scale
-        self.dipoles0 = np.random.random([self.nframes, self.nsel * 3])
+        self.coords0 = (
+            np.random.default_rng().random([self.nframes, self.natoms * 3]) * scale
+        )
+        self.dipoles0 = np.random.default_rng().random([self.nframes, self.nsel * 3])
         self.box0 = np.reshape(np.eye(3) * scale, [-1, 9])
         self.box0 = np.tile(self.box0, [self.nframes, 1])
         self._write_sys_data(
@@ -125,6 +127,8 @@ def _setUp_data(self):
         self.coords1 = np.reshape(self.coords1, [self.nframes, self.natoms * 3])
         self.dipoles1 = self.dipoles0[:, self.sel_idx_map]
         self.box1 = self.box0
+        self.sel_mask0 = np.isin(self.atom_types0, self.sel_type)
+        self.sel_mask1 = np.isin(self.atom_types1, self.sel_type)
 
     def _write_sys_data(self, dirname, atom_types, coords, dipoles, box):
         os.makedirs(dirname, exist_ok=True)
@@ -183,8 +187,8 @@ def _setUp_jdata(self):
     def test_z_dipole(self):
         dd = DeepDipole(os.path.join(modifier_datapath, "dipole.pb"))
 
-        dv0 = dd.eval(self.coords0, self.box0, self.atom_types0)
-        dv1 = dd.eval(self.coords1, self.box1, self.atom_types1)
+        dv0 = dd.eval(self.coords0, self.box0, self.atom_types0)[:, self.sel_mask0]
+        dv1 = dd.eval(self.coords1, self.box1, self.atom_types1)[:, self.sel_mask1]
 
         dv01 = dv0.reshape([self.nframes, -1, 3])
         dv01 = dv01[:, self.sel_idx_map, :]
diff --git a/source/tests/test_data_requirement.py b/source/tests/tf/test_data_requirement.py
similarity index 83%
rename from source/tests/test_data_requirement.py
rename to source/tests/tf/test_data_requirement.py
index 956cee8ccb..e825bc3f92 100644
--- a/source/tests/test_data_requirement.py
+++ b/source/tests/tf/test_data_requirement.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import unittest
 
-from deepmd.common import (
+from deepmd.tf.common import (
     add_data_requirement,
     data_requirement,
 )
@@ -16,3 +16,4 @@ def test_add(self):
         self.assertEqual(data_requirement["test"]["high_prec"], False)
         self.assertEqual(data_requirement["test"]["repeat"], 1)
         self.assertEqual(data_requirement["test"]["default"], 0.0)
+        self.assertEqual(data_requirement["test"]["output_natoms_for_type_sel"], False)
diff --git a/source/tests/test_deepdipole.py b/source/tests/tf/test_deepdipole.py
similarity index 96%
rename from source/tests/test_deepdipole.py
rename to source/tests/tf/test_deepdipole.py
index 1d06b5fe92..1e2f6dd45a 100644
--- a/source/tests/test_deepdipole.py
+++ b/source/tests/tf/test_deepdipole.py
@@ -4,24 +4,25 @@
 
 import ase.neighborlist
 import numpy as np
-from common import (
-    finite_difference,
-    strerch_box,
-    tests_path,
-    tf,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepDipole,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    finite_difference,
+    infer_path,
+    strerch_box,
+    tf,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -32,7 +33,7 @@ class TestDeepDipolePBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole.pbtxt")), "deepdipole.pb"
+            str(infer_path / os.path.join("deepdipole.pbtxt")), "deepdipole.pb"
         )
         cls.dp = DeepDipole("deepdipole.pb")
 
@@ -71,6 +72,7 @@ def setUp(self):
                 1.667785136187720063e00,
             ]
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -84,7 +86,7 @@ def test_attrs(self):
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_atm(self):
-        dd = self.dp.eval(self.coords, self.box, self.atype)
+        dd = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -96,7 +98,7 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        dd = self.dp.eval(coords2, box2, self.atype)
+        dd = self.dp.eval(coords2, box2, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -111,7 +113,7 @@ class TestDeepDipoleNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole.pbtxt")), "deepdipole.pb"
+            str(infer_path / os.path.join("deepdipole.pbtxt")), "deepdipole.pb"
         )
         cls.dp = DeepDipole("deepdipole.pb")
 
@@ -150,6 +152,7 @@ def setUp(self):
                 1.667798310054391e00,
             ]
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -157,7 +160,7 @@ def tearDownClass(cls):
         cls.dp = None
 
     def test_1frame_atm(self):
-        dd = self.dp.eval(self.coords, None, self.atype)
+        dd = self.dp.eval(self.coords, None, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -167,7 +170,7 @@ def test_1frame_atm(self):
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
     def test_1frame_atm_large_box(self):
-        dd = self.dp.eval(self.coords, self.box, self.atype)
+        dd = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -185,7 +188,7 @@ class TestDeepDipoleNewPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole_new.pbtxt")),
+            str(infer_path / os.path.join("deepdipole_new.pbtxt")),
             "deepdipole_new.pb",
         )
         cls.dp = DeepDipole("deepdipole_new.pb")
@@ -454,6 +457,7 @@ def setUp(self):
         self.expected_gv = (
             self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -475,7 +479,7 @@ def test_1frame_old(self):
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
 
     def test_1frame_old_atm(self):
-        at = self.dp.eval(self.coords, self.box, self.atype)
+        at = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -487,7 +491,7 @@ def test_1frame_old_atm(self):
     def test_2frame_old_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        at = self.dp.eval(coords2, box2, self.atype)
+        at = self.dp.eval(coords2, box2, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -514,6 +518,7 @@ def test_1frame_full_atm(self):
         gt, ff, vv, at, av = self.dp.eval_full(
             self.coords, self.box, self.atype, atomic=True
         )
+        at = at[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -549,6 +554,7 @@ def test_1frame_full_atm_shuffle(self):
             self.atype[i_sf],
             atomic=True,
         )
+        at = at[:, self.sel_mask[i_sf]]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -616,6 +622,7 @@ def test_2frame_full_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
         gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic=True)
+        at = at[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -656,7 +663,7 @@ class TestDeepDipoleFakePBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole_fake.pbtxt")),
+            str(infer_path / os.path.join("deepdipole_fake.pbtxt")),
             "deepdipole_fake.pb",
         )
         cls.dp = DeepDipole("deepdipole_fake.pb")
@@ -948,6 +955,7 @@ def setUp(self):
         )
         fake_target = fake_target - 13 * np.rint(fake_target / 13)
         self.target_t = fake_target.reshape(-1)
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -965,6 +973,7 @@ def test_1frame_full_atm(self):
         gt, ff, vv, at, av = self.dp.eval_full(
             self.coords, self.box, self.atype, atomic=True
         )
+        at = at[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1000,6 +1009,7 @@ def test_1frame_full_atm_shuffle(self):
             self.atype[i_sf],
             atomic=True,
         )
+        at = at[:, self.sel_mask[i_sf]]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1042,7 +1052,7 @@ class TestDeepDipoleNewPBCNeighborList(TestDeepDipoleNewPBC):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole_new.pbtxt")),
+            str(infer_path / os.path.join("deepdipole_new.pbtxt")),
             "deepdipole_new.pb",
         )
         cls.dp = DeepDipole(
diff --git a/source/tests/test_deepdos.py b/source/tests/tf/test_deepdos.py
similarity index 99%
rename from source/tests/test_deepdos.py
rename to source/tests/tf/test_deepdos.py
index c5e100f80e..d94c2c3f2d 100644
--- a/source/tests/test_deepdos.py
+++ b/source/tests/tf/test_deepdos.py
@@ -3,20 +3,21 @@
 import unittest
 
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepDOS,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -27,7 +28,7 @@ class TestDeepDOS(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdos.pbtxt")), "deepdos.pb"
+            str(infer_path / os.path.join("deepdos.pbtxt")), "deepdos.pb"
         )
         cls.dp = DeepDOS("deepdos.pb")
 
diff --git a/source/tests/test_deepmd_data.py b/source/tests/tf/test_deepmd_data.py
similarity index 79%
rename from source/tests/test_deepmd_data.py
rename to source/tests/tf/test_deepmd_data.py
index 92d89665b1..94e1f4c571 100644
--- a/source/tests/test_deepmd_data.py
+++ b/source/tests/tf/test_deepmd_data.py
@@ -5,17 +5,18 @@
 import unittest
 
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.utils.data import (
+from deepmd.tf.utils.data import (
     DeepmdData,
 )
 
+from .common import (
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     places = 6
 else:
@@ -36,13 +37,13 @@ def setUp(self):
         self.natoms = 6
         # coord
         path = os.path.join(self.data_name, "set.foo", "coord.npy")
-        self.coord = np.random.random([self.nframes, self.natoms, 3])
+        self.coord = np.random.default_rng().random([self.nframes, self.natoms, 3])
         np.save(path, np.reshape(self.coord, [self.nframes, -1]))
         self.coord = self.coord[:, [0, 3, 1, 2, 4, 5], :]
         self.coord = self.coord.reshape([self.nframes, -1])
         # box
         path = os.path.join(self.data_name, "set.foo", "box.npy")
-        self.box = np.random.random([self.nframes, 9])
+        self.box = np.random.default_rng().random([self.nframes, 9])
         np.save(path, self.box)
         # value
         path = os.path.join(self.data_name, "set.foo", "value_1.npy")
@@ -82,6 +83,7 @@ def setUp(self):
         os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
         os.makedirs(os.path.join(self.data_name, "set.bar"), exist_ok=True)
         os.makedirs(os.path.join(self.data_name, "set.tar"), exist_ok=True)
+        os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
         np.savetxt(os.path.join(self.data_name, "type.raw"), np.array([1, 0]), fmt="%d")
         np.savetxt(
             os.path.join(self.data_name, "type_map.raw"),
@@ -92,52 +94,64 @@ def setUp(self):
         self.natoms = 2
         # coord
         path = os.path.join(self.data_name, "set.foo", "coord.npy")
-        self.coord = np.random.random([self.nframes, self.natoms, 3])
+        self.coord = np.random.default_rng().random([self.nframes, self.natoms, 3])
         np.save(path, np.reshape(self.coord, [self.nframes, -1]))
         self.coord = self.coord[:, [1, 0], :]
         self.coord = self.coord.reshape([self.nframes, -1])
         # coord bar
         path = os.path.join(self.data_name, "set.bar", "coord.npy")
-        self.coord_bar = np.random.random([self.nframes, 3 * self.natoms])
+        self.coord_bar = np.random.default_rng().random([self.nframes, 3 * self.natoms])
         np.save(path, self.coord_bar)
         self.coord_bar = self.coord_bar.reshape([self.nframes, self.natoms, 3])
         self.coord_bar = self.coord_bar[:, [1, 0], :]
         self.coord_bar = self.coord_bar.reshape([self.nframes, -1])
         # coord tar
         path = os.path.join(self.data_name, "set.tar", "coord.npy")
-        self.coord_tar = np.random.random([2, 3 * self.natoms])
+        self.coord_tar = np.random.default_rng().random([2, 3 * self.natoms])
         np.save(path, self.coord_tar)
         self.coord_tar = self.coord_tar.reshape([2, self.natoms, 3])
         self.coord_tar = self.coord_tar[:, [1, 0], :]
         self.coord_tar = self.coord_tar.reshape([2, -1])
         # box
         path = os.path.join(self.data_name, "set.foo", "box.npy")
-        self.box = np.random.random([self.nframes, 9])
+        self.box = np.random.default_rng().random([self.nframes, 9])
         np.save(path, self.box)
         # box bar
         path = os.path.join(self.data_name, "set.bar", "box.npy")
-        self.box_bar = np.random.random([self.nframes, 9])
+        self.box_bar = np.random.default_rng().random([self.nframes, 9])
         np.save(path, self.box_bar)
         # box tar
         path = os.path.join(self.data_name, "set.tar", "box.npy")
-        self.box_tar = np.random.random([2, 9])
+        self.box_tar = np.random.default_rng().random([2, 9])
         np.save(path, self.box_tar)
         # t a
         path = os.path.join(self.data_name, "set.foo", "test_atomic.npy")
-        self.test_atomic = np.random.random([self.nframes, self.natoms, 7])
+        self.test_atomic = np.random.default_rng().random(
+            [self.nframes, self.natoms, 7]
+        )
         self.redu_atomic = np.sum(self.test_atomic, axis=1)
         np.save(path, np.reshape(self.test_atomic, [self.nframes, -1]))
         self.test_atomic = self.test_atomic[:, [1, 0], :]
         self.test_atomic = self.test_atomic.reshape([self.nframes, -1])
         # t f
         path = os.path.join(self.data_name, "set.foo", "test_frame.npy")
-        self.test_frame = np.random.random([self.nframes, 5])
+        self.test_frame = np.random.default_rng().random([self.nframes, 5])
         np.save(path, self.test_frame)
         path = os.path.join(self.data_name, "set.bar", "test_frame.npy")
-        self.test_frame_bar = np.random.random([self.nframes, 5])
+        self.test_frame_bar = np.random.default_rng().random([self.nframes, 5])
         np.save(path, self.test_frame_bar)
         # t n
         self.test_null = np.zeros([self.nframes, 2 * self.natoms])
+        # tensor shape
+        path = os.path.join(self.data_name, "set.foo", "tensor_natoms.npy")
+        self.tensor_natoms = np.random.default_rng().random(
+            [self.nframes, self.natoms, 6]
+        )
+        self.tensor_natoms[:, 0, :] = 0
+        np.save(path, self.tensor_natoms)
+        path = os.path.join(self.data_name, "set.foo", "tensor_nsel.npy")
+        self.tensor_nsel = self.tensor_natoms[:, 1, :]
+        np.save(path, self.tensor_nsel)
 
     def tearDown(self):
         shutil.rmtree(self.data_name)
@@ -289,6 +303,58 @@ def test_get_nbatch(self):
         nb = dd.get_numb_batch(2, 0)
         self.assertEqual(nb, 2)
 
+    def test_get_tensor(self):
+        dd_natoms = (
+            DeepmdData(self.data_name)
+            .add(
+                "tensor_nsel",
+                6,
+                atomic=True,
+                must=True,
+                type_sel=[0],
+                output_natoms_for_type_sel=True,
+            )
+            .add(
+                "tensor_natoms",
+                6,
+                atomic=True,
+                must=True,
+                type_sel=[0],
+                output_natoms_for_type_sel=True,
+            )
+        )
+        data_natoms = dd_natoms._load_set(os.path.join(self.data_name, "set.foo"))
+        dd_nsel = (
+            DeepmdData(self.data_name)
+            .add(
+                "tensor_nsel",
+                6,
+                atomic=True,
+                must=True,
+                type_sel=[0],
+                output_natoms_for_type_sel=False,
+            )
+            .add(
+                "tensor_natoms",
+                6,
+                atomic=True,
+                must=True,
+                type_sel=[0],
+                output_natoms_for_type_sel=False,
+            )
+        )
+        data_nsel = dd_nsel._load_set(os.path.join(self.data_name, "set.foo"))
+        np.testing.assert_allclose(
+            data_natoms["tensor_natoms"], data_natoms["tensor_nsel"]
+        )
+        np.testing.assert_allclose(data_nsel["tensor_natoms"], data_nsel["tensor_nsel"])
+        np.testing.assert_allclose(
+            data_natoms["tensor_natoms"].reshape(self.nframes, self.natoms, -1)[
+                :, 0, :
+            ],
+            data_nsel["tensor_natoms"],
+        )
+
     def _comp_np_mat2(self, first, second):
         np.testing.assert_almost_equal(first, second, places)
 
diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/tf/test_deepmd_data_sys.py
similarity index 97%
rename from source/tests/test_deepmd_data_sys.py
rename to source/tests/tf/test_deepmd_data_sys.py
index abfa7d7e48..84b5d39a05 100644
--- a/source/tests/test_deepmd_data_sys.py
+++ b/source/tests/tf/test_deepmd_data_sys.py
@@ -5,13 +5,13 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.utils import (
+from deepmd.tf.utils import (
     random,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
     prob_sys_size_ext,
 )
@@ -40,13 +40,15 @@ def setUp(self):
                 set_name = os.path.join(sys_name, "set.%03d" % jj)
                 os.makedirs(set_name, exist_ok=True)
                 path = os.path.join(set_name, "coord.npy")
-                val = np.random.random([self.nframes[ii] + jj, self.natoms[ii] * 3])
+                val = np.random.default_rng().random(
+                    [self.nframes[ii] + jj, self.natoms[ii] * 3]
+                )
                 np.save(path, val)
                 path = os.path.join(set_name, "box.npy")
-                val = np.random.random([self.nframes[ii] + jj, 9]) * 10
+                val = np.random.default_rng().random([self.nframes[ii] + jj, 9]) * 10
                 np.save(path, val)
                 path = os.path.join(set_name, "test.npy")
-                val = np.random.random(
+                val = np.random.default_rng().random(
                     [self.nframes[ii] + jj, self.natoms[ii] * self.test_ndof]
                 )
                 np.save(path, val)
diff --git a/source/tests/test_deeppolar.py b/source/tests/tf/test_deeppolar.py
similarity index 97%
rename from source/tests/test_deeppolar.py
rename to source/tests/tf/test_deeppolar.py
index 9627851de4..b4f3fe7d0d 100644
--- a/source/tests/test_deeppolar.py
+++ b/source/tests/tf/test_deeppolar.py
@@ -4,22 +4,23 @@
 
 import ase.neighborlist
 import numpy as np
-from common import (
-    tests_path,
-    tf,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPolar,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+    tf,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -30,7 +31,7 @@ class TestDeepPolarPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppolar.pbtxt")), "deeppolar.pb"
+            str(infer_path / os.path.join("deeppolar.pbtxt")), "deeppolar.pb"
         )
         cls.dp = DeepPolar("deeppolar.pb")
 
@@ -81,6 +82,7 @@ def setUp(self):
                 4.448255365635306879e-01,
             ]
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -94,7 +96,7 @@ def test_attrs(self):
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_atm(self):
-        dd = self.dp.eval(self.coords, self.box, self.atype)
+        dd = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -106,7 +108,7 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        dd = self.dp.eval(coords2, box2, self.atype)
+        dd = self.dp.eval(coords2, box2, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -121,7 +123,7 @@ class TestDeepPolarNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppolar.pbtxt")), "deeppolar.pb"
+            str(infer_path / os.path.join("deeppolar.pbtxt")), "deeppolar.pb"
         )
         cls.dp = DeepPolar("deeppolar.pb")
 
@@ -172,6 +174,7 @@ def setUp(self):
                 4.382376148484938e-01,
             ]
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -179,7 +182,7 @@ def tearDownClass(cls):
         cls.dp = None
 
     def test_1frame_atm(self):
-        dd = self.dp.eval(self.coords, None, self.atype)
+        dd = self.dp.eval(self.coords, None, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -189,7 +192,7 @@ def test_1frame_atm(self):
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
     def test_1frame_atm_large_box(self):
-        dd = self.dp.eval(self.coords, self.box, self.atype)
+        dd = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -207,7 +210,7 @@ class TestDeepPolarNewPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppolar_new.pbtxt")),
+            str(infer_path / os.path.join("deeppolar_new.pbtxt")),
             "deeppolar_new.pb",
         )
         cls.dp = DeepPolar("deeppolar_new.pb")
@@ -920,6 +923,7 @@ def setUp(self):
         self.expected_gv = (
             self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
         )
+        self.sel_mask = np.isin(self.atype, self.dp.get_sel_type())
 
     @classmethod
     def tearDownClass(cls):
@@ -941,7 +945,7 @@ def test_1frame_old(self):
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
 
     def test_1frame_old_atm(self):
-        at = self.dp.eval(self.coords, self.box, self.atype)
+        at = self.dp.eval(self.coords, self.box, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -953,7 +957,7 @@ def test_1frame_old_atm(self):
     def test_2frame_old_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        at = self.dp.eval(coords2, box2, self.atype)
+        at = self.dp.eval(coords2, box2, self.atype)[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -980,6 +984,7 @@ def test_1frame_full_atm(self):
         gt, ff, vv, at, av = self.dp.eval_full(
             self.coords, self.box, self.atype, atomic=True
         )
+        at = at[:, self.sel_mask]
 
         # check shape of the returns
         nframes = 1
@@ -1016,6 +1021,7 @@ def test_1frame_full_atm_shuffle(self):
             self.atype[i_sf],
             atomic=True,
         )
+        at = at[:, self.sel_mask[i_sf]]
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
@@ -1053,6 +1059,7 @@ def test_2frame_full_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
         gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic=True)
+        at = at[:, self.sel_mask]
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
@@ -1093,7 +1100,7 @@ class TestDeepPolarNewPBCNeighborList(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppolar_new.pbtxt")),
+            str(infer_path / os.path.join("deeppolar_new.pbtxt")),
             "deeppolar_new.pb",
         )
         cls.dp = DeepPolar(
diff --git a/source/tests/test_deeppot_a.py b/source/tests/tf/test_deeppot_a.py
similarity index 90%
rename from source/tests/test_deeppot_a.py
rename to source/tests/tf/test_deeppot_a.py
index c229b4302c..f40b57c213 100644
--- a/source/tests/test_deeppot_a.py
+++ b/source/tests/tf/test_deeppot_a.py
@@ -4,22 +4,19 @@
 import unittest
 
 import ase.neighborlist
+import dpdata
 import numpy as np
-from common import (
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     MODEL_VERSION,
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_dp10_to_dp11,
     convert_dp012_to_dp10,
     convert_dp12_to_dp13,
@@ -29,6 +26,12 @@
     detect_model_version,
 )
 
+from .common import (
+    infer_path,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -37,7 +40,7 @@
 
 class TestModelMajorCompatability(unittest.TestCase):
     def setUp(self):
-        model_file = str(tests_path / os.path.join("infer", "deeppot.pbtxt"))
+        model_file = str(infer_path / os.path.join("deeppot.pbtxt"))
         with open(model_file) as fp:
             # data = fp.read().replace('\n', '')
             data = fp.read().split("\n")
@@ -67,7 +70,7 @@ def test(self):
 
 class TestModelMinorCompatability(unittest.TestCase):
     def setUp(self):
-        model_file = str(tests_path / os.path.join("infer", "deeppot.pbtxt"))
+        model_file = str(infer_path / os.path.join("deeppot.pbtxt"))
         with open(model_file) as fp:
             # data = fp.read().replace('\n', '')
             data = fp.read().split("\n")
@@ -99,7 +102,7 @@ class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
@@ -277,7 +280,7 @@ def test_1frame_atm(self):
 
     def test_descriptor(self):
         descpt = self.dp.eval_descriptor(self.coords, self.box, self.atype)
-        expected_descpt = np.loadtxt(str(tests_path / "infer" / "deeppot_descpt.txt"))
+        expected_descpt = np.loadtxt(str(infer_path / "deeppot_descpt.txt"))
         np.testing.assert_almost_equal(descpt.ravel(), expected_descpt.ravel())
 
     def test_2frame_atm(self):
@@ -326,7 +329,7 @@ class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
@@ -518,12 +521,38 @@ def test_2frame_atm(self):
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
+    def test_dpdata_driver(self):
+        nframes = 1
+        system = dpdata.System(
+            data={
+                "coords": self.coords.reshape((nframes, -1, 3)),
+                "cells": np.zeros((nframes, 3, 3)),
+                "atom_types": np.array(self.atype),
+                "orig": np.zeros((3,)),
+                "atom_names": ["O", "H"],
+                "atom_numbs": [2, 4],
+                "nopbc": True,
+            }
+        )
+        system_predicted = system.predict(self.dp, driver="dp")
+        np.testing.assert_almost_equal(
+            system_predicted["forces"].ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
+        np.testing.assert_almost_equal(
+            system_predicted["energies"].ravel(), expected_se.ravel(), default_places
+        )
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
+        np.testing.assert_almost_equal(
+            system_predicted["virials"].ravel(), expected_sv.ravel(), default_places
+        )
+
 
 class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
@@ -697,7 +726,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
@@ -716,6 +745,31 @@ def test_ase(self):
         expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
 
+    def test_dpdata_driver(self):
+        nframes = 1
+        system = dpdata.System(
+            data={
+                "coords": self.coords.reshape((nframes, -1, 3)),
+                "cells": self.box.reshape((nframes, 3, 3)),
+                "atom_types": np.array(self.atype),
+                "orig": np.zeros((3,)),
+                "atom_names": ["O", "H"],
+                "atom_numbs": [2, 4],
+            }
+        )
+        system_predicted = system.predict("deeppot.pb", driver="dp")
+        np.testing.assert_almost_equal(
+            system_predicted["forces"].ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
+        np.testing.assert_almost_equal(
+            system_predicted["energies"].ravel(), expected_se.ravel(), default_places
+        )
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
+        np.testing.assert_almost_equal(
+            system_predicted["virials"].ravel(), expected_sv.ravel(), default_places
+        )
+
 
 class TestModelConvert(unittest.TestCase):
     def setUp(self):
@@ -747,20 +801,20 @@ def setUp(self):
     def test_convert_012(self):
         old_model = "deeppot.pb"
         new_model = "deeppot-new.pb"
-        convert_pbtxt_to_pb(str(tests_path / "infer" / "sea_012.pbtxt"), old_model)
+        convert_pbtxt_to_pb(str(infer_path / "sea_012.pbtxt"), old_model)
         run_dp(f"dp convert-from 0.12 -i {old_model} -o {new_model}")
         dp = DeepPot(new_model)
-        _, _, _, _, _ = dp.eval(self.coords, self.box, self.atype, atomic=True)
+        _ = dp.eval(self.coords, self.box, self.atype, atomic=True)
         os.remove(old_model)
         os.remove(new_model)
 
     def test_convert(self):
         old_model = "deeppot.pb"
         new_model = "deeppot-new.pb"
-        convert_pbtxt_to_pb(str(tests_path / "infer" / "sea_012.pbtxt"), old_model)
+        convert_pbtxt_to_pb(str(infer_path / "sea_012.pbtxt"), old_model)
         run_dp(f"dp convert-from -i {old_model} -o {new_model}")
         dp = DeepPot(new_model)
-        _, _, _, _, _ = dp.eval(self.coords, self.box, self.atype, atomic=True)
+        _ = dp.eval(self.coords, self.box, self.atype, atomic=True)
         os.remove(old_model)
         os.remove(new_model)
 
@@ -768,11 +822,11 @@ def test_detect(self):
         old_model = "deeppot.pb"
         new_model_txt = "deeppot_new.pbtxt"
         new_model_pb = "deeppot_new.pb"
-        convert_pbtxt_to_pb(str(tests_path / "infer" / "sea_012.pbtxt"), old_model)
+        convert_pbtxt_to_pb(str(infer_path / "sea_012.pbtxt"), old_model)
         version = detect_model_version(old_model)
         self.assertEqual(version, parse_version("0.12"))
         os.remove(old_model)
-        shutil.copyfile(str(tests_path / "infer" / "sea_012.pbtxt"), new_model_txt)
+        shutil.copyfile(str(infer_path / "sea_012.pbtxt"), new_model_txt)
         convert_dp012_to_dp10(new_model_txt)
         convert_pbtxt_to_pb(new_model_txt, new_model_pb)
         version = detect_model_version(new_model_pb)
@@ -805,7 +859,7 @@ class TestTypeEmbed(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "se_e2_a_tebd.pbtxt")),
+            str(infer_path / os.path.join("se_e2_a_tebd.pbtxt")),
             "se_e2_a_tebd.pb",
         )
         cls.dp = DeepPot("se_e2_a_tebd.pb")
@@ -840,17 +894,9 @@ def test_eval_typeebd(self):
         np.testing.assert_almost_equal(eval_typeebd, expected_typeebd, default_places)
 
 
-class TestFparamAparam(unittest.TestCase):
+class FparamAparamCommonTest:
     """Test fparam and aparam."""
 
-    @classmethod
-    def setUpClass(cls):
-        convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "fparam_aparam.pbtxt")),
-            "fparam_aparam.pb",
-        )
-        cls.dp = DeepPot("fparam_aparam.pb")
-
     def setUp(self):
         self.coords = np.array(
             [
@@ -968,15 +1014,11 @@ def setUp(self):
                 2.875323131744185121e-02,
             ]
         )
-
-    @classmethod
-    def tearDownClass(cls):
-        os.remove("fparam_aparam.pb")
-        cls.dp = None
+        self.places = default_places
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 1)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=self.places)
         self.assertEqual(self.dp.get_dim_fparam(), 1)
         self.assertEqual(self.dp.get_dim_aparam(), 1)
 
@@ -996,13 +1038,11 @@ def test_1frame(self):
         self.assertEqual(ff.shape, (nframes, natoms, 3))
         self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(
-            ff.ravel(), self.expected_f.ravel(), default_places
-        )
+        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), self.places)
         expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
-        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), self.places)
         expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
-        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), self.places)
 
     def test_1frame_atm(self):
         ee, ff, vv, ae, av = self.dp.eval(
@@ -1022,19 +1062,13 @@ def test_1frame_atm(self):
         self.assertEqual(ae.shape, (nframes, natoms, 1))
         self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(
-            ff.ravel(), self.expected_f.ravel(), default_places
-        )
-        np.testing.assert_almost_equal(
-            ae.ravel(), self.expected_e.ravel(), default_places
-        )
-        np.testing.assert_almost_equal(
-            av.ravel(), self.expected_v.ravel(), default_places
-        )
+        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), self.places)
+        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), self.places)
+        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), self.places)
         expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
-        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), self.places)
         expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
-        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), self.places)
 
     def test_2frame_atm_single_param(self):
         coords2 = np.concatenate((self.coords, self.coords))
@@ -1059,13 +1093,13 @@ def test_2frame_atm_single_param(self):
         expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
         expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
         expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
-        np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
+        np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), self.places)
+        np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), self.places)
+        np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), self.places)
         expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
-        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), self.places)
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
-        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), self.places)
 
     def test_2frame_atm_all_param(self):
         coords2 = np.concatenate((self.coords, self.coords))
@@ -1090,20 +1124,35 @@ def test_2frame_atm_all_param(self):
         expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
         expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
         expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
-        np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
+        np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), self.places)
+        np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), self.places)
+        np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), self.places)
         expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
-        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
+        np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), self.places)
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
-        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
+        np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), self.places)
+
+
+class TestFparamAparam(FparamAparamCommonTest, unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        convert_pbtxt_to_pb(
+            str(infer_path / os.path.join("fparam_aparam.pbtxt")),
+            "fparam_aparam.pb",
+        )
+        cls.dp = DeepPot("fparam_aparam.pb")
+
+    @classmethod
+    def tearDownClass(cls):
+        os.remove("fparam_aparam.pb")
+        cls.dp = None
 
 
 class TestDeepPotAPBCNeighborList(TestDeepPotAPBC):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot(
             "deeppot.pb",
diff --git a/source/tests/test_deeppot_r.py b/source/tests/tf/test_deeppot_r.py
similarity index 98%
rename from source/tests/test_deeppot_r.py
rename to source/tests/tf/test_deeppot_r.py
index 44c6e3c167..482a8c42ee 100644
--- a/source/tests/test_deeppot_r.py
+++ b/source/tests/tf/test_deeppot_r.py
@@ -3,21 +3,22 @@
 import unittest
 
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -28,7 +29,7 @@ class TestDeepPotRPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot-r.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
@@ -239,7 +240,7 @@ class TestDeepPotRNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot-r.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
@@ -453,7 +454,7 @@ class TestDeepPotRLargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+            str(infer_path / os.path.join("deeppot-r.pbtxt")), "deeppot.pb"
         )
         cls.dp = DeepPot("deeppot.pb")
 
diff --git a/source/tests/test_deeppot_spin.py b/source/tests/tf/test_deeppot_spin.py
similarity index 97%
rename from source/tests/test_deeppot_spin.py
rename to source/tests/tf/test_deeppot_spin.py
index 9ab119a54e..d64cdf2dd6 100644
--- a/source/tests/test_deeppot_spin.py
+++ b/source/tests/tf/test_deeppot_spin.py
@@ -3,20 +3,21 @@
 import unittest
 
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -27,7 +28,7 @@ class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepspin.pbtxt")), "deepspin.pb"
+            str(infer_path / os.path.join("deepspin.pbtxt")), "deepspin.pb"
         )
         cls.dp = DeepPot("deepspin.pb")
 
diff --git a/source/tests/test_descrpt_hybrid.py b/source/tests/tf/test_descrpt_hybrid.py
similarity index 97%
rename from source/tests/test_descrpt_hybrid.py
rename to source/tests/tf/test_descrpt_hybrid.py
index 317f6ea5a0..6aa04118da 100644
--- a/source/tests/test_descrpt_hybrid.py
+++ b/source/tests/tf/test_descrpt_hybrid.py
@@ -2,26 +2,27 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptHybrid,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -62,6 +63,7 @@ def test_descriptor_hybrid(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
diff --git a/source/tests/test_descrpt_nonsmth.py b/source/tests/tf/test_descrpt_nonsmth.py
similarity index 99%
rename from source/tests/test_descrpt_nonsmth.py
rename to source/tests/tf/test_descrpt_nonsmth.py
index fd3bb0b2f7..63c5f15c85 100644
--- a/source/tests/test_descrpt_nonsmth.py
+++ b/source/tests/tf/test_descrpt_nonsmth.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, comp=0, pbc=True, sess=None):
diff --git a/source/tests/test_descrpt_se_a_mask.py b/source/tests/tf/test_descrpt_se_a_mask.py
similarity index 92%
rename from source/tests/test_descrpt_se_a_mask.py
rename to source/tests/tf/test_descrpt_se_a_mask.py
index 85cd1cc2a1..b6488d88c6 100644
--- a/source/tests/test_descrpt_se_a_mask.py
+++ b/source/tests/tf/test_descrpt_se_a_mask.py
@@ -1,44 +1,44 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import pathlib
 
 import numpy as np
-from common import (
-    DataSystem,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeAMask,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    DataSystem,
+    infer_path,
+    j_loader,
+    tests_path,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
-tests_path = pathlib.Path(__file__).parent.absolute()
-
 
 class TestModel(tf.test.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "dp4mask.pbtxt")),
-            str(tests_path / os.path.join("infer", "dp4mask.pb")),
+            str(infer_path / os.path.join("dp4mask.pbtxt")),
+            str(infer_path / os.path.join("dp4mask.pb")),
         )
-        cls.dp = DeepPot(str(tests_path / os.path.join("infer", "dp4mask.pb")))
+        cls.dp = DeepPot(str(infer_path / os.path.join("dp4mask.pb")))
 
     def test_dp_mask_model(self):
         dcoord = np.array(
@@ -225,6 +225,12 @@ def test_descriptor_se_a_mask(self):
         jfile = "zinc_se_a_mask.json"
         jdata = j_loader(jfile)
 
+        jdata["training"]["training_data"]["systems"] = [
+            str(tests_path / "data_dp_mask")
+        ]
+        jdata["training"]["validation_data"]["systems"] = [
+            str(tests_path / "data_dp_mask")
+        ]
         systems = j_must_have(jdata["training"]["validation_data"], "systems")
         # set_pfx = j_must_have(jdata['validation_data'], "set_prefix")
         set_pfx = "set"
diff --git a/source/tests/test_descrpt_se_a_type.py b/source/tests/tf/test_descrpt_se_a_type.py
similarity index 98%
rename from source/tests/test_descrpt_se_a_type.py
rename to source/tests/tf/test_descrpt_se_a_type.py
index aeab18f149..87c2adcca7 100644
--- a/source/tests/test_descrpt_se_a_type.py
+++ b/source/tests/tf/test_descrpt_se_a_type.py
@@ -1,24 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -59,6 +60,7 @@ def test_descriptor_two_sides(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
@@ -223,6 +225,7 @@ def test_descriptor_one_side(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
diff --git a/source/tests/test_descrpt_se_atten.py b/source/tests/tf/test_descrpt_se_atten.py
similarity index 99%
rename from source/tests/test_descrpt_se_atten.py
rename to source/tests/tf/test_descrpt_se_atten.py
index 76df651a46..7a1bfd18f6 100644
--- a/source/tests/test_descrpt_se_atten.py
+++ b/source/tests/tf/test_descrpt_se_atten.py
@@ -3,26 +3,27 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeAtten,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -68,6 +69,7 @@ def test_descriptor_two_sides(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -245,6 +247,7 @@ def test_descriptor_one_side(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -422,6 +425,7 @@ def test_stripped_type_embedding_descriptor_two_sides(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -593,6 +597,7 @@ def test_compressible_descriptor_two_sides(self):
 
         # init models
         typeebd = TypeEmbedNet(
+            ntypes=ntypes,
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
diff --git a/source/tests/test_descrpt_se_r.py b/source/tests/tf/test_descrpt_se_r.py
similarity index 99%
rename from source/tests/test_descrpt_se_r.py
rename to source/tests/tf/test_descrpt_se_r.py
index 779954a545..d95c8fbb21 100644
--- a/source/tests/test_descrpt_se_r.py
+++ b/source/tests/tf/test_descrpt_se_r.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, pbc=True, sess=None):
diff --git a/source/tests/test_descrpt_sea_ef.py b/source/tests/tf/test_descrpt_sea_ef.py
similarity index 99%
rename from source/tests/test_descrpt_sea_ef.py
rename to source/tests/tf/test_descrpt_sea_ef.py
index efd86854c7..e9e990a659 100644
--- a/source/tests/test_descrpt_sea_ef.py
+++ b/source/tests/tf/test_descrpt_sea_ef.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, pbc=True, sess=None):
diff --git a/source/tests/test_descrpt_sea_ef_para.py b/source/tests/tf/test_descrpt_sea_ef_para.py
similarity index 99%
rename from source/tests/test_descrpt_sea_ef_para.py
rename to source/tests/tf/test_descrpt_sea_ef_para.py
index 1a109013cb..1af6ea648a 100644
--- a/source/tests/test_descrpt_sea_ef_para.py
+++ b/source/tests/tf/test_descrpt_sea_ef_para.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, pbc=True, sess=None):
diff --git a/source/tests/test_descrpt_sea_ef_rot.py b/source/tests/tf/test_descrpt_sea_ef_rot.py
similarity index 98%
rename from source/tests/test_descrpt_sea_ef_rot.py
rename to source/tests/tf/test_descrpt_sea_ef_rot.py
index 56cdb357b0..6ebc067211 100644
--- a/source/tests/test_descrpt_sea_ef_rot.py
+++ b/source/tests/tf/test_descrpt_sea_ef_rot.py
@@ -3,11 +3,11 @@
 
 import numpy as np
 
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
     DescrptSeAEfLower,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
@@ -97,7 +97,7 @@ def build_efv(self, dcoord, dbox, dtype, tnatoms, name, op, reuse=None):
         return energy, force, virial, atom_ener, atom_vir
 
     def make_test_data(self, nframes):
-        dcoord = np.random.random([nframes, self.natoms[0], 3])
+        dcoord = np.random.default_rng().random([nframes, self.natoms[0], 3])
         for ii in range(nframes):
             dcoord[ii, :, :] = dcoord[ii, :, :] - np.tile(
                 dcoord[ii, 0, :], [self.natoms[0], 1]
@@ -111,7 +111,7 @@ def make_test_data(self, nframes):
         np.random.shuffle(one_type)  # noqa: NPY002
         one_type = np.array(one_type, dtype=int).reshape([1, -1])
         dtype = np.tile(one_type, [nframes, 1])
-        defield = np.random.random(dcoord.shape)
+        defield = np.random.default_rng().random(dcoord.shape)
         return dcoord, dbox, dtype, defield
 
     def rotate_mat(self, axis_, theta):
diff --git a/source/tests/test_descrpt_sea_ef_vert.py b/source/tests/tf/test_descrpt_sea_ef_vert.py
similarity index 99%
rename from source/tests/test_descrpt_sea_ef_vert.py
rename to source/tests/tf/test_descrpt_sea_ef_vert.py
index 77ffb3150c..09bca9b754 100644
--- a/source/tests/test_descrpt_sea_ef_vert.py
+++ b/source/tests/tf/test_descrpt_sea_ef_vert.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, pbc=True, sess=None):
diff --git a/source/tests/test_descrpt_smooth.py b/source/tests/tf/test_descrpt_smooth.py
similarity index 99%
rename from source/tests/test_descrpt_smooth.py
rename to source/tests/tf/test_descrpt_smooth.py
index 59076e366e..91a3f7dbf0 100644
--- a/source/tests/test_descrpt_smooth.py
+++ b/source/tests/tf/test_descrpt_smooth.py
@@ -2,22 +2,23 @@
 import unittest
 
 import numpy as np
-from common import (
-    Data,
-    force_dw_test,
-    force_test,
-    virial_dw_test,
-    virial_test,
-)
 
 # load grad of force module
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
 
+from .common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+
 
 class Inter:
     def setUp(self, data, pbc=True, sess=None):
diff --git a/source/tests/test_dipole_se_a.py b/source/tests/tf/test_dipole_se_a.py
similarity index 93%
rename from source/tests/test_dipole_se_a.py
rename to source/tests/tf/test_dipole_se_a.py
index 687e68c2be..6905d94371 100644
--- a/source/tests/test_dipole_se_a.py
+++ b/source/tests/tf/test_dipole_se_a.py
@@ -1,29 +1,30 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    finite_difference,
-    gen_data,
-    j_loader,
-    strerch_box,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DipoleFittingSeA,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     DipoleModel,
 )
 
+from .common import (
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -55,7 +56,9 @@ def test_model(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
         jdata["model"]["fitting_net"].pop("type", None)
         jdata["model"]["fitting_net"].pop("fit_diag", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["embedding_width"] = descrpt.get_dim_rot_mat_1()
         fitting = DipoleFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = DipoleModel(descrpt, fitting)
 
diff --git a/source/tests/test_dipole_se_a_tebd.py b/source/tests/tf/test_dipole_se_a_tebd.py
similarity index 93%
rename from source/tests/test_dipole_se_a_tebd.py
rename to source/tests/tf/test_dipole_se_a_tebd.py
index 4b2e6d0688..3db90bc3f8 100644
--- a/source/tests/test_dipole_se_a_tebd.py
+++ b/source/tests/tf/test_dipole_se_a_tebd.py
@@ -2,34 +2,35 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    finite_difference,
-    gen_data,
-    j_loader,
-    strerch_box,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DipoleFittingSeA,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     DipoleModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -65,10 +66,13 @@ def test_model(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
         jdata["model"]["fitting_net"].pop("type", None)
         jdata["model"]["fitting_net"].pop("fit_diag", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["embedding_width"] = descrpt.get_dim_rot_mat_1()
         fitting = DipoleFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
diff --git a/source/tests/test_dipolecharge.py b/source/tests/tf/test_dipolecharge.py
similarity index 96%
rename from source/tests/test_dipolecharge.py
rename to source/tests/tf/test_dipolecharge.py
index 58459d6845..408b1bbdf2 100644
--- a/source/tests/test_dipolecharge.py
+++ b/source/tests/tf/test_dipolecharge.py
@@ -3,20 +3,21 @@
 import unittest
 
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DipoleChargeModifier,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -27,7 +28,7 @@ class TestDipoleCharge(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "dipolecharge_d.pbtxt")),
+            str(infer_path / os.path.join("dipolecharge_d.pbtxt")),
             "dipolecharge_d.pb",
         )
         cls.dp = DipoleChargeModifier(
diff --git a/source/tests/test_dp_test.py b/source/tests/tf/test_dp_test.py
similarity index 96%
rename from source/tests/test_dp_test.py
rename to source/tests/tf/test_dp_test.py
index a07706acfe..9a3dde3da0 100644
--- a/source/tests/test_dp_test.py
+++ b/source/tests/tf/test_dp_test.py
@@ -8,15 +8,16 @@
 
 import dpdata
 import numpy as np
-from common import (
-    tests_path,
-)
 
-from deepmd.entrypoints.test import test as dp_test
-from deepmd.utils.convert import (
+from deepmd.tf.entrypoints.test import test as dp_test
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 default_places = 6
 
 
@@ -71,7 +72,7 @@ class TestDPTestEner(unittest.TestCase, TestDPTest):
     def setUpClass(cls):
         cls.model_name = "deeppot.pb"
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), cls.model_name
+            str(infer_path / os.path.join("deeppot.pbtxt")), cls.model_name
         )
 
     def setUp(self):
@@ -207,7 +208,7 @@ class TestDPTestDipole(unittest.TestCase, TestDPTest):
     def setUpClass(cls):
         cls.model_name = "deepdipole.pb"
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deepdipole.pbtxt")), cls.model_name
+            str(infer_path / os.path.join("deepdipole.pbtxt")), cls.model_name
         )
 
     def setUp(self):
@@ -266,7 +267,7 @@ class TestDPTestPolar(unittest.TestCase, TestDPTest):
     def setUpClass(cls):
         cls.model_name = "deeppolar.pb"
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppolar.pbtxt")), cls.model_name
+            str(infer_path / os.path.join("deeppolar.pbtxt")), cls.model_name
         )
 
     def setUp(self):
diff --git a/source/tests/test_embedding_net.py b/source/tests/tf/test_embedding_net.py
similarity index 98%
rename from source/tests/test_embedding_net.py
rename to source/tests/tf/test_embedding_net.py
index 1b8c68c089..f766fff8b3 100644
--- a/source/tests/test_embedding_net.py
+++ b/source/tests/tf/test_embedding_net.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.network import (
+from deepmd.tf.utils.network import (
     embedding_net,
 )
 
diff --git a/source/tests/test_env.py b/source/tests/tf/test_env.py
similarity index 87%
rename from source/tests/test_env.py
rename to source/tests/tf/test_env.py
index d575c3cf93..cd066b06a5 100644
--- a/source/tests/test_env.py
+++ b/source/tests/tf/test_env.py
@@ -4,7 +4,7 @@
     mock,
 )
 
-from deepmd import (
+from deepmd.tf import (
     env,
 )
 
@@ -19,8 +19,8 @@ def test_empty(self):
     @mock.patch.dict(
         "os.environ",
         values={
-            "TF_INTRA_OP_PARALLELISM_THREADS": "5",
-            "TF_INTER_OP_PARALLELISM_THREADS": "3",
+            "DP_INTRA_OP_PARALLELISM_THREADS": "5",
+            "DP_INTER_OP_PARALLELISM_THREADS": "3",
         },
     )
     def test_given(self):
@@ -35,7 +35,7 @@ def test_default(self):
         new = env.get_tf_session_config()
         self.assertNotEqual(id(shared), id(new))
 
-    @mock.patch("deepmd.env.get_tf_default_nthreads")
+    @mock.patch("deepmd.tf.env.get_tf_default_nthreads")
     def test_get(self, mock_method):
         mock_method.return_value = (5, 3)
         config = env.get_tf_session_config()
diff --git a/source/tests/test_ewald.py b/source/tests/tf/test_ewald.py
similarity index 96%
rename from source/tests/test_ewald.py
rename to source/tests/tf/test_ewald.py
index ef2ace39a4..c68a0c84ee 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/tf/test_ewald.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer.ewald_recp import (
+from deepmd.tf.infer.ewald_recp import (
     EwaldRecp,
     op_module,
 )
@@ -38,16 +38,16 @@ def setUp(self):
             box = np.eye(3) * boxl
             box[1][1] += 1
             box[2][2] += 2
-            box += np.random.random([3, 3]) * box_pert
+            box += np.random.default_rng().random([3, 3]) * box_pert
             box = 0.5 * (box + box.T)
             self.dbox.append(box)
             # scaled
-            coord = np.random.random([self.natoms, 3])
+            coord = np.random.default_rng().random([self.natoms, 3])
             self.rcoord.append(coord)
             # real coords
             self.dcoord.append(np.matmul(coord, box))
             # charge
-            dcharge = np.random.random([self.natoms])
+            dcharge = np.random.default_rng().random([self.natoms])
             dcharge -= np.average(dcharge)
             assert np.abs(np.sum(self.dcharge) - 0) < 1e-12
             self.dcharge.append(dcharge)
diff --git a/source/tests/test_finetune_se_atten.py b/source/tests/tf/test_finetune_se_atten.py
similarity index 67%
rename from source/tests/test_finetune_se_atten.py
rename to source/tests/tf/test_finetune_se_atten.py
index f4689aacb3..40fc5b68a3 100644
--- a/source/tests/test_finetune_se_atten.py
+++ b/source/tests/tf/test_finetune_se_atten.py
@@ -1,37 +1,37 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPotential,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.graph import (
+from deepmd.tf.utils.graph import (
     get_tensor_by_name,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -45,17 +45,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models(setup_model, i):
     data_file = str(tests_path / os.path.join("finetune", "data"))
     data_file_mixed_type = str(tests_path / os.path.join("finetune", "data_mixed_type"))
@@ -147,67 +136,77 @@ def _init_models(setup_model, i):
     )
 
 
-if not parse_version(tf.__version__) < parse_version("1.15"):
-
-    def previous_se_atten(jdata):
-        jdata["model"]["descriptor"]["stripped_type_embedding"] = False
-        jdata["model"]["descriptor"]["attn_layer"] = 2
-
-    def stripped_model(jdata):
-        jdata["model"]["descriptor"]["stripped_type_embedding"] = True
-        jdata["model"]["descriptor"]["attn_layer"] = 2
-
-    def compressible_model(jdata):
-        jdata["model"]["descriptor"]["stripped_type_embedding"] = True
-        jdata["model"]["descriptor"]["attn_layer"] = 0
-
-    models = [previous_se_atten, stripped_model, compressible_model]
-    INPUT_PRES = []
-    INPUT_FINETUNES = []
-    INPUT_FINETUNE_MIXS = []
-    PRE_MODELS = []
-    FINETUNED_MODELS = []
-    FINETUNED_MODEL_MIXS = []
-    PRE_MAPS = []
-    FINETUNED_MAPS = []
-    VALID_DATAS = []
-    for i, model in enumerate(models):
-        (
-            INPUT_PRE,
-            INPUT_FINETUNE,
-            INPUT_FINETUNE_MIX,
-            PRE_MODEL,
-            FINETUNED_MODEL,
-            FINETUNED_MODEL_MIX,
-            PRE_MAP,
-            FINETUNED_MAP,
-            VALID_DATA,
-        ) = _init_models(model, i)
-        INPUT_PRES.append(INPUT_PRE)
-        INPUT_FINETUNES.append(INPUT_FINETUNE)
-        INPUT_FINETUNE_MIXS.append(INPUT_FINETUNE_MIX)
-        PRE_MODELS.append(PRE_MODEL)
-        FINETUNED_MODELS.append(FINETUNED_MODEL)
-        FINETUNED_MODEL_MIXS.append(FINETUNED_MODEL_MIX)
-        PRE_MAPS.append(PRE_MAP)
-        FINETUNED_MAPS.append(FINETUNED_MAP)
-        VALID_DATAS.append(VALID_DATA)
-
-
 @unittest.skipIf(
     parse_version(tf.__version__) < parse_version("1.15"),
     f"The current tf version {tf.__version__} is too low to run the new testing model.",
 )
 class TestFinetuneSeAtten(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        if not parse_version(tf.__version__) < parse_version("1.15"):
+
+            def previous_se_atten(jdata):
+                jdata["model"]["descriptor"]["stripped_type_embedding"] = False
+                jdata["model"]["descriptor"]["attn_layer"] = 2
+
+            def stripped_model(jdata):
+                jdata["model"]["descriptor"]["stripped_type_embedding"] = True
+                jdata["model"]["descriptor"]["attn_layer"] = 2
+
+            def compressible_model(jdata):
+                jdata["model"]["descriptor"]["stripped_type_embedding"] = True
+                jdata["model"]["descriptor"]["attn_layer"] = 0
+
+            models = [previous_se_atten, stripped_model, compressible_model]
+            INPUT_PRES = []
+            INPUT_FINETUNES = []
+            INPUT_FINETUNE_MIXS = []
+            PRE_MODELS = []
+            FINETUNED_MODELS = []
+            FINETUNED_MODEL_MIXS = []
+            PRE_MAPS = []
+            FINETUNED_MAPS = []
+            VALID_DATAS = []
+            for i, model in enumerate(models):
+                (
+                    INPUT_PRE,
+                    INPUT_FINETUNE,
+                    INPUT_FINETUNE_MIX,
+                    PRE_MODEL,
+                    FINETUNED_MODEL,
+                    FINETUNED_MODEL_MIX,
+                    PRE_MAP,
+                    FINETUNED_MAP,
+                    VALID_DATA,
+                ) = _init_models(model, i)
+                INPUT_PRES.append(INPUT_PRE)
+                INPUT_FINETUNES.append(INPUT_FINETUNE)
+                INPUT_FINETUNE_MIXS.append(INPUT_FINETUNE_MIX)
+                PRE_MODELS.append(PRE_MODEL)
+                FINETUNED_MODELS.append(FINETUNED_MODEL)
+                FINETUNED_MODEL_MIXS.append(FINETUNED_MODEL_MIX)
+                PRE_MAPS.append(PRE_MAP)
+                FINETUNED_MAPS.append(FINETUNED_MAP)
+                VALID_DATAS.append(VALID_DATA)
+        cls.INPUT_PRES = INPUT_PRES
+        cls.INPUT_FINETUNES = INPUT_FINETUNES
+        cls.INPUT_FINETUNE_MIXS = INPUT_FINETUNE_MIXS
+        cls.PRE_MODELS = PRE_MODELS
+        cls.FINETUNED_MODELS = FINETUNED_MODELS
+        cls.FINETUNED_MODEL_MIXS = FINETUNED_MODEL_MIXS
+        cls.PRE_MAPS = PRE_MAPS
+        cls.FINETUNED_MAPS = FINETUNED_MAPS
+        cls.VALID_DATAS = VALID_DATAS
+
     @classmethod
     def tearDownClass(self):
-        for i in range(len(INPUT_PRES)):
-            _file_delete(INPUT_PRES[i])
-            _file_delete(INPUT_FINETUNES[i])
-            _file_delete(INPUT_FINETUNE_MIXS[i])
-            _file_delete(PRE_MODELS[i])
-            _file_delete(FINETUNED_MODELS[i])
-            _file_delete(FINETUNED_MODEL_MIXS[i])
+        for i in range(len(self.INPUT_PRES)):
+            _file_delete(self.INPUT_PRES[i])
+            _file_delete(self.INPUT_FINETUNES[i])
+            _file_delete(self.INPUT_FINETUNE_MIXS[i])
+            _file_delete(self.PRE_MODELS[i])
+            _file_delete(self.FINETUNED_MODELS[i])
+            _file_delete(self.FINETUNED_MODEL_MIXS[i])
             _file_delete("out.json")
             _file_delete("model.ckpt.meta")
             _file_delete("model.ckpt.index")
@@ -223,22 +222,22 @@ def tearDownClass(self):
             _file_delete("lcurve.out")
 
     def test_finetune_standard(self):
-        for i in range(len(INPUT_PRES)):
-            self.valid_data = VALID_DATAS[i]
+        for i in range(len(self.INPUT_PRES)):
+            self.valid_data = self.VALID_DATAS[i]
             pretrained_bias = get_tensor_by_name(
-                PRE_MODELS[i], "fitting_attr/t_bias_atom_e"
+                self.PRE_MODELS[i], "fitting_attr/t_bias_atom_e"
             )
             finetuned_bias = get_tensor_by_name(
-                FINETUNED_MODELS[i], "fitting_attr/t_bias_atom_e"
+                self.FINETUNED_MODELS[i], "fitting_attr/t_bias_atom_e"
             )
-            sorter = np.argsort(PRE_MAPS[i])
+            sorter = np.argsort(self.PRE_MAPS[i])
             idx_type_map = sorter[
-                np.searchsorted(PRE_MAPS[i], FINETUNED_MAPS[i], sorter=sorter)
+                np.searchsorted(self.PRE_MAPS[i], self.FINETUNED_MAPS[i], sorter=sorter)
             ]
             test_data = self.valid_data.get_test()
             atom_nums = np.tile(np.bincount(test_data["type"][0])[idx_type_map], (4, 1))
 
-            dp = DeepPotential(PRE_MODELS[i])
+            dp = DeepPotential(self.PRE_MODELS[i])
             energy = dp.eval(
                 test_data["coord"], test_data["box"], test_data["type"][0]
             )[0]
@@ -250,7 +249,7 @@ def test_finetune_standard(self):
                 0
             ].reshape(-1)
 
-            dp_finetuned = DeepPotential(FINETUNED_MODELS[i])
+            dp_finetuned = DeepPotential(self.FINETUNED_MODELS[i])
             energy_finetuned = dp_finetuned.eval(
                 test_data["coord"], test_data["box"], test_data["type"][0]
             )[0]
@@ -266,22 +265,22 @@ def test_finetune_standard(self):
             np.testing.assert_almost_equal(finetune_results, 0.0, default_places)
 
     def test_finetune_mixed_type(self):
-        for i in range(len(INPUT_PRES)):
-            self.valid_data = VALID_DATAS[i]
+        for i in range(len(self.INPUT_PRES)):
+            self.valid_data = self.VALID_DATAS[i]
             pretrained_bias = get_tensor_by_name(
-                PRE_MODELS[i], "fitting_attr/t_bias_atom_e"
+                self.PRE_MODELS[i], "fitting_attr/t_bias_atom_e"
             )
             finetuned_bias_mixed_type = get_tensor_by_name(
-                FINETUNED_MODEL_MIXS[i], "fitting_attr/t_bias_atom_e"
+                self.FINETUNED_MODEL_MIXS[i], "fitting_attr/t_bias_atom_e"
             )
-            sorter = np.argsort(PRE_MAPS[i])
+            sorter = np.argsort(self.PRE_MAPS[i])
             idx_type_map = sorter[
-                np.searchsorted(PRE_MAPS[i], FINETUNED_MAPS[i], sorter=sorter)
+                np.searchsorted(self.PRE_MAPS[i], self.FINETUNED_MAPS[i], sorter=sorter)
             ]
             test_data = self.valid_data.get_test()
             atom_nums = np.tile(np.bincount(test_data["type"][0])[idx_type_map], (4, 1))
 
-            dp = DeepPotential(PRE_MODELS[i])
+            dp = DeepPotential(self.PRE_MODELS[i])
             energy = dp.eval(
                 test_data["coord"], test_data["box"], test_data["type"][0]
             )[0]
@@ -293,7 +292,7 @@ def test_finetune_mixed_type(self):
                 0
             ].reshape(-1)
 
-            dp_finetuned_mixed_type = DeepPotential(FINETUNED_MODEL_MIXS[i])
+            dp_finetuned_mixed_type = DeepPotential(self.FINETUNED_MODEL_MIXS[i])
             energy_finetuned = dp_finetuned_mixed_type.eval(
                 test_data["coord"], test_data["box"], test_data["type"][0]
             )[0]
diff --git a/source/tests/test_fitting_dos.py b/source/tests/tf/test_fitting_dos.py
similarity index 93%
rename from source/tests/test_fitting_dos.py
rename to source/tests/tf/test_fitting_dos.py
index 60a0ee4158..f9df5fc126 100644
--- a/source/tests/test_fitting_dos.py
+++ b/source/tests/tf/test_fitting_dos.py
@@ -1,24 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DOSFitting,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -58,7 +59,8 @@ def test_fitting(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         jdata["model"]["fitting_net"].pop("type", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = DOSFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
@@ -188,21 +190,20 @@ def test_fitting(self):
 
         ref_atom_dos_1 = [
             -0.32495014,
-            -0.87979356,
-            -0.26630668,
             -0.32495882,
-            -0.87979767,
-            -0.2663072,
+            -0.32496842,
+            -0.32495892,
+            -0.32495469,
+            -0.32496075,
         ]
         ref_atom_dos_2 = [
-            -0.26630917,
             0.21549911,
-            -0.87979638,
-            -0.26630564,
             0.21550413,
-            -0.87979585,
+            0.21551077,
+            0.21550547,
+            0.21550303,
+            0.21550645,
         ]
         places = 4
-
         np.testing.assert_almost_equal(pred_atom_dos[:, 0], ref_atom_dos_1, places)
         np.testing.assert_almost_equal(pred_atom_dos[:, 50], ref_atom_dos_2, places)
diff --git a/source/tests/test_fitting_ener_type.py b/source/tests/tf/test_fitting_ener_type.py
similarity index 95%
rename from source/tests/test_fitting_ener_type.py
rename to source/tests/tf/test_fitting_ener_type.py
index 42190ef557..c1c1698d4f 100644
--- a/source/tests/test_fitting_ener_type.py
+++ b/source/tests/tf/test_fitting_ener_type.py
@@ -1,24 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -53,7 +54,9 @@ def test_fitting(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
diff --git a/source/tests/test_fitting_stat.py b/source/tests/tf/test_fitting_stat.py
similarity index 95%
rename from source/tests/test_fitting_stat.py
rename to source/tests/tf/test_fitting_stat.py
index ad62c89f2a..100868fd18 100644
--- a/source/tests/test_fitting_stat.py
+++ b/source/tests/tf/test_fitting_stat.py
@@ -5,17 +5,18 @@
 )
 
 import numpy as np
-from common import (
-    j_loader,
-)
 
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
 
+from .common import (
+    j_loader,
+)
+
 input_json = "water_se_a_afparam.json"
 
 
@@ -80,7 +81,8 @@ def test(self):
         # fitting = EnerFitting(jdata['fitting_net'], descrpt)
         descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
         fitting = EnerFitting(
-            descrpt,
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
             neuron=[240, 240, 240],
             resnet_dt=True,
             numb_fparam=2,
diff --git a/source/tests/test_gen_stat_data.py b/source/tests/tf/test_gen_stat_data.py
similarity index 84%
rename from source/tests/test_gen_stat_data.py
rename to source/tests/tf/test_gen_stat_data.py
index 6667aa15fd..5442fded75 100644
--- a/source/tests/test_gen_stat_data.py
+++ b/source/tests/tf/test_gen_stat_data.py
@@ -5,19 +5,19 @@
 import dpdata
 import numpy as np
 
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model.model_stat import (
+from deepmd.tf.model.model_stat import (
     _make_all_stat_ref,
     make_stat_input,
     merge_sys_stat,
 )
-from deepmd.utils import random as dp_random
-from deepmd.utils.data_system import (
+from deepmd.tf.utils import random as dp_random
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
@@ -25,10 +25,10 @@
 def gen_sys(nframes, atom_types):
     natoms = len(atom_types)
     data = {}
-    data["coords"] = np.random.random([nframes, natoms, 3])
-    data["forces"] = np.random.random([nframes, natoms, 3])
-    data["cells"] = np.random.random([nframes, 9])
-    data["energies"] = np.random.random([nframes, 1])
+    data["coords"] = np.random.default_rng().random([nframes, natoms, 3])
+    data["forces"] = np.random.default_rng().random([nframes, natoms, 3])
+    data["cells"] = np.random.default_rng().random([nframes, 9])
+    data["energies"] = np.random.default_rng().random([nframes, 1])
     types = list(set(atom_types))
     types.sort()
     data["atom_names"] = []
@@ -119,7 +119,12 @@ def test_ener_shift(self):
         ener_shift0 = data.compute_energy_shift(rcond=1)
         all_stat = make_stat_input(data, 4, merge_sys=False)
         descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
-        fitting = EnerFitting(descrpt, neuron=[240, 240, 240], resnet_dt=True)
+        fitting = EnerFitting(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+        )
         ener_shift1 = fitting._compute_output_stats(all_stat, rcond=1)
         np.testing.assert_almost_equal(ener_shift0, ener_shift1)
 
@@ -131,7 +136,11 @@ def test_ener_shift_assigned(self):
         all_stat = make_stat_input(data, 4, merge_sys=False)
         descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
         fitting = EnerFitting(
-            descrpt, neuron=[240, 240, 240], resnet_dt=True, atom_ener=[ae0, None, None]
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            atom_ener=[ae0, None, None],
         )
         ener_shift1 = fitting._compute_output_stats(all_stat, rcond=1)
         # check assigned energy
diff --git a/source/tests/test_get_potential.py b/source/tests/tf/test_get_potential.py
similarity index 93%
rename from source/tests/test_get_potential.py
rename to source/tests/tf/test_get_potential.py
index e2f342537a..47462a20a3 100644
--- a/source/tests/test_get_potential.py
+++ b/source/tests/tf/test_get_potential.py
@@ -2,24 +2,25 @@
 """Test if `DeepPotential` facto function returns the right type of potential."""
 
 import unittest
-from pathlib import (
-    Path,
-)
 
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepDipole,
     DeepPolar,
     DeepPot,
     DeepPotential,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 
 class TestGetPotential(unittest.TestCase):
     def setUp(self):
-        self.work_dir = Path(__file__).parent / "infer"
+        self.work_dir = infer_path
 
         convert_pbtxt_to_pb(
             str(self.work_dir / "deeppot.pbtxt"), str(self.work_dir / "deep_pot.pb")
diff --git a/source/tests/test_init_frz_model_multi.py b/source/tests/tf/test_init_frz_model_multi.py
similarity index 82%
rename from source/tests/test_init_frz_model_multi.py
rename to source/tests/tf/test_init_frz_model_multi.py
index 6696f39319..b6209a7e69 100644
--- a/source/tests/test_init_frz_model_multi.py
+++ b/source/tests/tf/test_init_frz_model_multi.py
@@ -4,35 +4,36 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.multi_init import (
+from deepmd.tf.utils.multi_init import (
     replace_model_params_with_frz_multi_model,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -63,12 +64,12 @@ def _init_models():
     jdata["training"]["data_dict"]["water_ener"] = {}
     jdata["training"]["data_dict"]["water_ener"]["training_data"] = training_data_config
     jdata["training"]["data_dict"]["water_ener"]["training_data"]["systems"] = data_file
-    jdata["training"]["data_dict"]["water_ener"][
-        "validation_data"
-    ] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener"]["validation_data"][
-        "systems"
-    ] = data_file
+    jdata["training"]["data_dict"]["water_ener"]["validation_data"] = (
+        validation_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener"]["validation_data"]["systems"] = (
+        data_file
+    )
     jdata["training"]["save_ckpt"] = ckpt
     jdata["model"]["fitting_net_dict"] = {}
     jdata["model"]["fitting_net_dict"]["water_ener"] = fitting_config
@@ -97,18 +98,18 @@ def _init_models():
     jdata["learning_rate_dict"]["water_ener_new"] = learning_rate_config
     jdata["training"]["data_dict"] = {}
     jdata["training"]["data_dict"]["water_ener_new"] = {}
-    jdata["training"]["data_dict"]["water_ener_new"][
-        "training_data"
-    ] = training_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["training_data"][
-        "systems"
-    ] = data_file
-    jdata["training"]["data_dict"]["water_ener_new"][
-        "validation_data"
-    ] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"][
-        "systems"
-    ] = data_file
+    jdata["training"]["data_dict"]["water_ener_new"]["training_data"] = (
+        training_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["training_data"]["systems"] = (
+        data_file
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"] = (
+        validation_data_config
+    )
+    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"]["systems"] = (
+        data_file
+    )
     jdata["training"].pop("fitting_weight")
 
     jdata = replace_model_params_with_frz_multi_model(jdata, frozen_model)
@@ -180,20 +181,19 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelMulti(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
+
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data_dict = {"water_ener": VALID_DATA}
@@ -205,19 +205,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_init_frz_model_se_a.py b/source/tests/tf/test_init_frz_model_se_a.py
similarity index 83%
rename from source/tests/test_init_frz_model_se_a.py
rename to source/tests/tf/test_init_frz_model_se_a.py
index 06532009d1..5d4ed1063c 100644
--- a/source/tests/test_init_frz_model_se_a.py
+++ b/source/tests/tf/test_init_frz_model_se_a.py
@@ -4,32 +4,33 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -128,20 +129,18 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelA(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data = VALID_DATA
@@ -149,19 +148,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_init_frz_model_se_a_tebd.py b/source/tests/tf/test_init_frz_model_se_a_tebd.py
similarity index 83%
rename from source/tests/test_init_frz_model_se_a_tebd.py
rename to source/tests/tf/test_init_frz_model_se_a_tebd.py
index e54cae9781..afc1e46ed8 100644
--- a/source/tests/test_init_frz_model_se_a_tebd.py
+++ b/source/tests/tf/test_init_frz_model_se_a_tebd.py
@@ -4,32 +4,33 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -129,20 +130,19 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelA(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
+
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data = VALID_DATA
@@ -150,19 +150,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_init_frz_model_se_a_type.py b/source/tests/tf/test_init_frz_model_se_a_type.py
similarity index 84%
rename from source/tests/test_init_frz_model_se_a_type.py
rename to source/tests/tf/test_init_frz_model_se_a_type.py
index 9d2c49579a..48ff4eb294 100644
--- a/source/tests/test_init_frz_model_se_a_type.py
+++ b/source/tests/tf/test_init_frz_model_se_a_type.py
@@ -4,32 +4,33 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -132,20 +133,18 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelAType(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data = VALID_DATA
@@ -153,19 +152,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_init_frz_model_se_atten.py b/source/tests/tf/test_init_frz_model_se_atten.py
similarity index 74%
rename from source/tests/test_init_frz_model_se_atten.py
rename to source/tests/tf/test_init_frz_model_se_atten.py
index 01956e51c4..a114deffc8 100644
--- a/source/tests/test_init_frz_model_se_atten.py
+++ b/source/tests/tf/test_init_frz_model_se_atten.py
@@ -4,33 +4,34 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -146,32 +147,6 @@ def compressible_model(jdata):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 0
 
-    models = [previous_se_atten, stripped_model, compressible_model]
-    INPUTS = []
-    CKPTS = []
-    FROZEN_MODELS = []
-    CKPT_TRAINERS = []
-    FRZ_TRAINERS = []
-    VALID_DATAS = []
-    STOP_BATCHS = []
-    for i, model in enumerate(models):
-        (
-            INPUT,
-            CKPT,
-            FROZEN_MODEL,
-            CKPT_TRAINER,
-            FRZ_TRAINER,
-            VALID_DATA,
-            STOP_BATCH,
-        ) = _init_models(model, i)
-        INPUTS.append(INPUT)
-        CKPTS.append(CKPT)
-        FROZEN_MODELS.append(FROZEN_MODEL)
-        CKPT_TRAINERS.append(CKPT_TRAINER)
-        FRZ_TRAINERS.append(FRZ_TRAINER)
-        VALID_DATAS.append(VALID_DATA)
-        STOP_BATCHS.append(STOP_BATCH)
-
 
 @unittest.skipIf(
     parse_version(tf.__version__) < parse_version("1.15"),
@@ -180,6 +155,38 @@ def compressible_model(jdata):
 class TestInitFrzModelAtten(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        models = [previous_se_atten, stripped_model, compressible_model]
+        INPUTS = []
+        CKPTS = []
+        FROZEN_MODELS = []
+        CKPT_TRAINERS = []
+        FRZ_TRAINERS = []
+        VALID_DATAS = []
+        STOP_BATCHS = []
+        for i, model in enumerate(models):
+            (
+                INPUT,
+                CKPT,
+                FROZEN_MODEL,
+                CKPT_TRAINER,
+                FRZ_TRAINER,
+                VALID_DATA,
+                STOP_BATCH,
+            ) = _init_models(model, i)
+            INPUTS.append(INPUT)
+            CKPTS.append(CKPT)
+            FROZEN_MODELS.append(FROZEN_MODEL)
+            CKPT_TRAINERS.append(CKPT_TRAINER)
+            FRZ_TRAINERS.append(FRZ_TRAINER)
+            VALID_DATAS.append(VALID_DATA)
+            STOP_BATCHS.append(STOP_BATCH)
+        cls.INPUTS = INPUTS
+        cls.CKPTS = CKPTS
+        cls.FROZEN_MODELS = FROZEN_MODELS
+        cls.CKPT_TRAINERS = CKPT_TRAINERS
+        cls.FRZ_TRAINERS = FRZ_TRAINERS
+        cls.VALID_DATAS = VALID_DATAS
+        cls.STOP_BATCHS = STOP_BATCHS
         cls.dp_ckpts = CKPT_TRAINERS
         cls.dp_frzs = FRZ_TRAINERS
         cls.valid_datas = VALID_DATAS
@@ -188,28 +195,28 @@ def setUpClass(cls):
     @classmethod
     def tearDownClass(cls):
         for i in range(len(cls.dp_ckpts)):
-            _file_delete(INPUTS[i])
-            _file_delete(FROZEN_MODELS[i])
+            _file_delete(cls.INPUTS[i])
+            _file_delete(cls.FROZEN_MODELS[i])
             _file_delete("out.json")
             _file_delete(str(tests_path / "checkpoint"))
-            _file_delete(CKPT[i] + ".meta")
-            _file_delete(CKPT[i] + ".index")
-            _file_delete(CKPT[i] + ".data-00000-of-00001")
-            _file_delete(CKPT[i] + "-0.meta")
-            _file_delete(CKPT[i] + "-0.index")
-            _file_delete(CKPT[i] + "-0.data-00000-of-00001")
-            _file_delete(CKPT[i] + "-1.meta")
-            _file_delete(CKPT[i] + "-1.index")
-            _file_delete(CKPT[i] + "-1.data-00000-of-00001")
+            _file_delete(cls.CKPTS[i] + ".meta")
+            _file_delete(cls.CKPTS[i] + ".index")
+            _file_delete(cls.CKPTS[i] + ".data-00000-of-00001")
+            _file_delete(cls.CKPTS[i] + "-0.meta")
+            _file_delete(cls.CKPTS[i] + "-0.index")
+            _file_delete(cls.CKPTS[i] + "-0.data-00000-of-00001")
+            _file_delete(cls.CKPTS[i] + "-1.meta")
+            _file_delete(cls.CKPTS[i] + "-1.index")
+            _file_delete(cls.CKPTS[i] + "-1.data-00000-of-00001")
             _file_delete(f"input_v2_compat{i}.json")
             _file_delete("lcurve.out")
 
     def test_single_frame(self):
         for i in range(len(self.dp_ckpts)):
-            self.dp_ckpt = CKPT_TRAINERS[i]
-            self.dp_frz = FRZ_TRAINERS[i]
-            self.valid_data = VALID_DATAS[i]
-            self.stop_batch = STOP_BATCHS[i]
+            self.dp_ckpt = self.CKPT_TRAINERS[i]
+            self.dp_frz = self.FRZ_TRAINERS[i]
+            self.valid_data = self.VALID_DATAS[i]
+            self.stop_batch = self.STOP_BATCHS[i]
 
             valid_batch = self.valid_data.get_batch()
             natoms = valid_batch["natoms_vec"]
diff --git a/source/tests/test_init_frz_model_se_r.py b/source/tests/tf/test_init_frz_model_se_r.py
similarity index 84%
rename from source/tests/test_init_frz_model_se_r.py
rename to source/tests/tf/test_init_frz_model_se_r.py
index 34eca9bd05..100c09196e 100644
--- a/source/tests/test_init_frz_model_se_r.py
+++ b/source/tests/tf/test_init_frz_model_se_r.py
@@ -4,32 +4,33 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -136,20 +137,19 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelR(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
+
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data = VALID_DATA
@@ -157,19 +157,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_init_frz_model_spin.py b/source/tests/tf/test_init_frz_model_spin.py
similarity index 85%
rename from source/tests/test_init_frz_model_spin.py
rename to source/tests/tf/test_init_frz_model_spin.py
index c6f257dd7b..c2c433cde0 100644
--- a/source/tests/test_init_frz_model_spin.py
+++ b/source/tests/tf/test_init_frz_model_spin.py
@@ -4,32 +4,33 @@
 import unittest
 
 import numpy as np
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
 
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -140,20 +141,19 @@ def _init_models():
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-(
-    INPUT,
-    CKPT,
-    FROZEN_MODEL,
-    CKPT_TRAINER,
-    FRZ_TRAINER,
-    VALID_DATA,
-    STOP_BATCH,
-) = _init_models()
-
-
 class TestInitFrzModelR(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        (
+            cls.INPUT,
+            cls.CKPT,
+            cls.FROZEN_MODEL,
+            CKPT_TRAINER,
+            FRZ_TRAINER,
+            VALID_DATA,
+            STOP_BATCH,
+        ) = _init_models()
+
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data = VALID_DATA
@@ -161,19 +161,19 @@ def setUpClass(cls):
 
     @classmethod
     def tearDownClass(cls):
-        _file_delete(INPUT)
-        _file_delete(FROZEN_MODEL)
+        _file_delete(cls.INPUT)
+        _file_delete(cls.FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT + ".meta")
-        _file_delete(CKPT + ".index")
-        _file_delete(CKPT + ".data-00000-of-00001")
-        _file_delete(CKPT + "-0.meta")
-        _file_delete(CKPT + "-0.index")
-        _file_delete(CKPT + "-0.data-00000-of-00001")
-        _file_delete(CKPT + "-1.meta")
-        _file_delete(CKPT + "-1.index")
-        _file_delete(CKPT + "-1.data-00000-of-00001")
+        _file_delete(cls.CKPT + ".meta")
+        _file_delete(cls.CKPT + ".index")
+        _file_delete(cls.CKPT + ".data-00000-of-00001")
+        _file_delete(cls.CKPT + "-0.meta")
+        _file_delete(cls.CKPT + "-0.index")
+        _file_delete(cls.CKPT + "-0.data-00000-of-00001")
+        _file_delete(cls.CKPT + "-1.meta")
+        _file_delete(cls.CKPT + "-1.index")
+        _file_delete(cls.CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
diff --git a/source/tests/test_lammps.py b/source/tests/tf/test_lammps.py
similarity index 82%
rename from source/tests/test_lammps.py
rename to source/tests/tf/test_lammps.py
index 19dbe70ade..b295d5212a 100644
--- a/source/tests/test_lammps.py
+++ b/source/tests/tf/test_lammps.py
@@ -2,14 +2,15 @@
 import os
 import subprocess
 import unittest
-from pathlib import (
-    Path,
-)
 
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+)
+
 
 @unittest.skipIf(
     os.environ.get("CIBUILDWHEEL", "0") != "1",
@@ -20,7 +21,7 @@ class TestLAMMPS(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        cls.work_dir = (Path(__file__).parent / "infer").absolute()
+        cls.work_dir = infer_path
 
         convert_pbtxt_to_pb(
             str(cls.work_dir / "deeppot.pbtxt"), str(cls.work_dir / "deep_pot.pb")
diff --git a/source/tests/test_layer_name.py b/source/tests/tf/test_layer_name.py
similarity index 94%
rename from source/tests/test_layer_name.py
rename to source/tests/tf/test_layer_name.py
index c6a2f0b09c..089bd19dd1 100644
--- a/source/tests/test_layer_name.py
+++ b/source/tests/tf/test_layer_name.py
@@ -1,29 +1,30 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    del_data,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DipoleFittingSeA,
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     MultiModel,
 )
 
+from .common import (
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -65,7 +66,8 @@ def test_model(self):
             fitting_type_dict[fitting_key] = item_fitting_type
             item_fitting_param.pop("type", None)
             item_fitting_param.pop("fit_diag", None)
-            item_fitting_param["descrpt"] = descrpt
+            item_fitting_param["ntypes"] = descrpt.get_ntypes()
+            item_fitting_param["dim_descrpt"] = descrpt.get_dim_out()
             if item_fitting_type == "ener":
                 fitting_dict[fitting_key] = EnerFitting(
                     **item_fitting_param, uniform_seed=True
diff --git a/source/tests/test_linear_model.py b/source/tests/tf/test_linear_model.py
similarity index 91%
rename from source/tests/test_linear_model.py
rename to source/tests/tf/test_linear_model.py
index 21f0f6efc8..95ece9c19f 100644
--- a/source/tests/test_linear_model.py
+++ b/source/tests/tf/test_linear_model.py
@@ -1,30 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import sys
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPotential,
 )
-from deepmd.model.linear import (
+from deepmd.tf.model.linear import (
     LinearEnergyModel,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
-sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-from common import (
+from .common import (
     DataSystem,
     del_data,
     gen_data,
-    tests_path,
+    infer_path,
 )
 
 
@@ -35,8 +33,8 @@ def setUp(self):
         with open(os.path.join(self.data_dir, "type_map.raw"), "w") as f:
             f.write("O\nH")
         self.pbtxts = [
-            os.path.join(tests_path, "infer/deeppot.pbtxt"),
-            os.path.join(tests_path, "infer/deeppot-1.pbtxt"),
+            os.path.join(infer_path, "deeppot.pbtxt"),
+            os.path.join(infer_path, "deeppot-1.pbtxt"),
         ]
         self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts]
         for pbtxt, pb in zip(self.pbtxts, self.graph_dirs):
diff --git a/source/tests/test_loss_gf.py b/source/tests/tf/test_loss_gf.py
similarity index 98%
rename from source/tests/test_loss_gf.py
rename to source/tests/tf/test_loss_gf.py
index 04f40d943b..78e5404e03 100644
--- a/source/tests/test_loss_gf.py
+++ b/source/tests/tf/test_loss_gf.py
@@ -2,7 +2,7 @@
 import numpy as np
 import tensorflow as tf
 
-from deepmd.loss import (
+from deepmd.tf.loss import (
     EnerStdLoss,
 )
 
diff --git a/source/tests/test_mixed_prec_training.py b/source/tests/tf/test_mixed_prec_training.py
similarity index 80%
rename from source/tests/test_mixed_prec_training.py
rename to source/tests/tf/test_mixed_prec_training.py
index d4c859f958..4a4021771d 100644
--- a/source/tests/test_mixed_prec_training.py
+++ b/source/tests/tf/test_mixed_prec_training.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
-
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 from packaging.version import (
     Version,
 )
 
-from deepmd.env import (
+from deepmd.tf.env import (
     TF_VERSION,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 
 def _file_delete(file):
     if os.path.isdir(file):
@@ -28,17 +27,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 class TestMixedPrecTraining(unittest.TestCase):
     def setUp(self):
         data_file = str(tests_path / os.path.join("model_compression", "data"))
diff --git a/source/tests/test_model_compression_se_a.py b/source/tests/tf/test_model_compression_se_a.py
similarity index 97%
rename from source/tests/test_model_compression_se_a.py
rename to source/tests/tf/test_model_compression_se_a.py
index 0e6e1361ad..4e49dd44e0 100644
--- a/source/tests/test_model_compression_se_a.py
+++ b/source/tests/tf/test_model_compression_se_a.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original.pb")
@@ -404,7 +392,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
diff --git a/source/tests/test_model_compression_se_a_ebd.py b/source/tests/tf/test_model_compression_se_a_ebd.py
similarity index 97%
rename from source/tests/test_model_compression_se_a_ebd.py
rename to source/tests/tf/test_model_compression_se_a_ebd.py
index 2a3163b062..debae1f0ba 100644
--- a/source/tests/test_model_compression_se_a_ebd.py
+++ b/source/tests/tf/test_model_compression_se_a_ebd.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-e2-a-v2.pb")
@@ -416,7 +404,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
diff --git a/source/tests/test_model_compression_se_a_ebd_type_one_side.py b/source/tests/tf/test_model_compression_se_a_ebd_type_one_side.py
similarity index 96%
rename from source/tests/test_model_compression_se_a_ebd_type_one_side.py
rename to source/tests/tf/test_model_compression_se_a_ebd_type_one_side.py
index 2f3d16b05f..a24bf48398 100644
--- a/source/tests/test_model_compression_se_a_ebd_type_one_side.py
+++ b/source/tests/tf/test_model_compression_se_a_ebd_type_one_side.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-e2-a-v2-one-side.pb")
@@ -98,7 +86,6 @@ def _init_models_exclude_types():
 
 
 INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
-INPUT_ET, FROZEN_MODEL_ET, COMPRESSED_MODEL_ET = _init_models_exclude_types()
 
 
 class TestDeepPotAPBC(unittest.TestCase):
@@ -416,7 +403,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
@@ -444,8 +431,13 @@ def test_ase(self):
 class TestDeepPotAPBCExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        self.dp_original = DeepPot(FROZEN_MODEL_ET)
-        self.dp_compressed = DeepPot(COMPRESSED_MODEL_ET)
+        (
+            self.INPUT_ET,
+            self.FROZEN_MODEL_ET,
+            self.COMPRESSED_MODEL_ET,
+        ) = _init_models_exclude_types()
+        self.dp_original = DeepPot(self.FROZEN_MODEL_ET)
+        self.dp_compressed = DeepPot(self.COMPRESSED_MODEL_ET)
         self.coords = np.array(
             [
                 12.83,
@@ -473,9 +465,9 @@ def setUpClass(self):
 
     @classmethod
     def tearDownClass(self):
-        _file_delete(INPUT_ET)
-        _file_delete(FROZEN_MODEL_ET)
-        _file_delete(COMPRESSED_MODEL_ET)
+        _file_delete(self.INPUT_ET)
+        _file_delete(self.FROZEN_MODEL_ET)
+        _file_delete(self.COMPRESSED_MODEL_ET)
         _file_delete("out.json")
         _file_delete("compress.json")
         _file_delete("checkpoint")
diff --git a/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py b/source/tests/tf/test_model_compression_se_a_type_one_side_exclude_types.py
similarity index 92%
rename from source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
rename to source/tests/tf/test_model_compression_se_a_type_one_side_exclude_types.py
index 10ce352b6c..a9de974e4d 100644
--- a/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
+++ b/source/tests/tf/test_model_compression_se_a_type_one_side_exclude_types.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-type-one-side-exclude-types.pb")
@@ -66,12 +54,11 @@ def _init_models():
     return INPUT, frozen_model, compressed_model
 
 
-INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
-
-
 class TestDeepPotAPBCTypeOneSideExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
+        INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
+
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
         self.coords = np.array(
diff --git a/source/tests/test_model_compression_se_atten.py b/source/tests/tf/test_model_compression_se_atten.py
similarity index 98%
rename from source/tests/test_model_compression_se_atten.py
rename to source/tests/tf/test_model_compression_se_atten.py
index 6bab1a3881..aa1f0afa38 100644
--- a/source/tests/test_model_compression_se_atten.py
+++ b/source/tests/tf/test_model_compression_se_atten.py
@@ -1,26 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
-
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 
 def _file_delete(file):
     if os.path.isdir(file):
@@ -29,17 +28,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 # 4 tests:
 # - type embedding FP64, se_atten FP64
 # - type embedding FP64, se_atten FP32
@@ -552,7 +540,7 @@ def test_ase(self):
                 Atoms,
             )
 
-            from deepmd.calculator import (
+            from deepmd.tf.calculator import (
                 DP,
             )
 
diff --git a/source/tests/test_model_compression_se_r.py b/source/tests/tf/test_model_compression_se_r.py
similarity index 97%
rename from source/tests/test_model_compression_se_r.py
rename to source/tests/tf/test_model_compression_se_r.py
index f79cdbee6c..26665e5354 100644
--- a/source/tests/test_model_compression_se_r.py
+++ b/source/tests/tf/test_model_compression_se_r.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-r.pb")
@@ -390,7 +378,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
diff --git a/source/tests/test_model_compression_se_t.py b/source/tests/tf/test_model_compression_se_t.py
similarity index 97%
rename from source/tests/test_model_compression_se_t.py
rename to source/tests/tf/test_model_compression_se_t.py
index 48fee4ea1d..ec68176cdb 100644
--- a/source/tests/test_model_compression_se_t.py
+++ b/source/tests/tf/test_model_compression_se_t.py
@@ -1,25 +1,24 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import json
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
 
-# from deepmd.entrypoints.compress import compress
-from common import (
-    j_loader,
-    run_dp,
-    tests_path,
-)
-
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
 
+# from deepmd.tf.entrypoints.compress import compress
+from .common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -33,17 +32,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 def _init_models():
     data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-t.pb")
@@ -412,7 +400,7 @@ def test_ase(self):
             Atoms,
         )
 
-        from deepmd.calculator import (
+        from deepmd.tf.calculator import (
             DP,
         )
 
diff --git a/source/tests/test_model_devi.py b/source/tests/tf/test_model_devi.py
similarity index 96%
rename from source/tests/test_model_devi.py
rename to source/tests/tf/test_model_devi.py
index c7d050cd76..58a6266ca9 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/tf/test_model_devi.py
@@ -1,29 +1,27 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import sys
 import unittest
 
 import numpy as np
 
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPotential,
     calc_model_devi,
 )
-from deepmd.infer.model_devi import (
+from deepmd.tf.infer.model_devi import (
     make_model_devi,
 )
+from deepmd.tf.utils.convert import (
+    convert_pbtxt_to_pb,
+)
 
-sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-from common import (
+from .common import (
     del_data,
     gen_data,
+    infer_path,
     tests_path,
 )
 
-from deepmd.utils.convert import (
-    convert_pbtxt_to_pb,
-)
-
 
 class TestMakeModelDevi(unittest.TestCase):
     def setUp(self):
@@ -39,8 +37,8 @@ def setUp(self):
         self.freq = 10
 
         self.pbtxts = [
-            os.path.join(tests_path, "infer/deeppot.pbtxt"),
-            os.path.join(tests_path, "infer/deeppot-1.pbtxt"),
+            os.path.join(infer_path, "deeppot.pbtxt"),
+            os.path.join(infer_path, "deeppot-1.pbtxt"),
         ]
         self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts]
         for pbtxt, pb in zip(self.pbtxts, self.graph_dirs):
@@ -215,7 +213,7 @@ class TestMakeModelDeviFparamAparam(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.pbtxts = [
-            os.path.join(tests_path, "infer/fparam_aparam.pbtxt"),
+            os.path.join(infer_path, "fparam_aparam.pbtxt"),
         ]
         cls.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in cls.pbtxts]
         for pbtxt, pb in zip(cls.pbtxts, cls.graph_dirs):
diff --git a/source/tests/test_model_devi_mix.py b/source/tests/tf/test_model_devi_mix.py
similarity index 91%
rename from source/tests/test_model_devi_mix.py
rename to source/tests/tf/test_model_devi_mix.py
index 98caf409eb..d9062e939a 100644
--- a/source/tests/test_model_devi_mix.py
+++ b/source/tests/tf/test_model_devi_mix.py
@@ -1,32 +1,30 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import sys
 import unittest
 
 import numpy as np
+from packaging.version import parse as parse_version
 
-from deepmd.infer import (
+from deepmd.tf.env import (
+    tf,
+)
+from deepmd.tf.infer import (
     DeepPotential,
     calc_model_devi,
 )
-from deepmd.infer.model_devi import (
+from deepmd.tf.infer.model_devi import (
     make_model_devi,
 )
+from deepmd.tf.utils.convert import (
+    convert_pbtxt_to_pb,
+)
 
-sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-from common import (
+from .common import (
     del_data,
     gen_data,
+    infer_path,
     tests_path,
 )
-from packaging.version import parse as parse_version
-
-from deepmd.env import (
-    tf,
-)
-from deepmd.utils.convert import (
-    convert_pbtxt_to_pb,
-)
 
 
 @unittest.skipIf(
@@ -56,8 +54,8 @@ def setUp(self):
         )
 
         self.pbtxts = [
-            os.path.join(tests_path, "infer/se_atten_no_atten_1.pbtxt"),
-            os.path.join(tests_path, "infer/se_atten_no_atten_2.pbtxt"),
+            os.path.join(infer_path, "se_atten_no_atten_1.pbtxt"),
+            os.path.join(infer_path, "se_atten_no_atten_2.pbtxt"),
         ]
         self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts]
         for pbtxt, pb in zip(self.pbtxts, self.graph_dirs):
diff --git a/source/tests/test_model_dos.py b/source/tests/tf/test_model_dos.py
similarity index 51%
rename from source/tests/test_model_dos.py
rename to source/tests/tf/test_model_dos.py
index c7160d4dda..9c01b14e32 100644
--- a/source/tests/test_model_dos.py
+++ b/source/tests/tf/test_model_dos.py
@@ -1,28 +1,29 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    del_data,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DOSFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     DOSModel,
 )
 
+from .common import (
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -65,7 +66,8 @@ def test_model(self):
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         jdata["model"]["fitting_net"].pop("type", None)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = DOSFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = DOSModel(descrpt, fitting)
 
@@ -122,106 +124,106 @@ def test_model(self):
 
         ref_dos = np.array(
             [
-                -2.98834333,
-                -0.63166985,
-                -3.37199568,
-                -1.88397887,
-                0.87560992,
-                4.85426159,
-                -1.22677731,
-                -0.60918118,
-                8.80472675,
-                -1.12006829,
-                -3.72653765,
-                -3.03698828,
-                3.50906891,
-                5.55140795,
-                -3.34920924,
-                -4.43507641,
-                -6.1729281,
-                -8.34865917,
-                0.14371788,
-                -4.38078479,
-                -6.43141133,
-                4.07791938,
-                7.14102837,
-                -0.52347718,
-                0.82663796,
-                -1.64225631,
-                -4.63088421,
-                3.3910594,
-                -9.09682274,
-                1.61104204,
-                4.45900773,
-                -2.44688559,
-                -2.83298183,
-                -2.00733658,
-                7.33444256,
-                7.09187373,
-                -1.97065392,
-                0.01623084,
-                -7.48861264,
-                -1.17790161,
-                2.77126775,
-                -2.55552037,
-                3.3518257,
-                -0.09316856,
-                -1.94521413,
-                0.50089251,
-                -2.75763233,
-                -1.94382637,
-                1.30562041,
-                5.08351043,
-                -1.90604837,
-                -0.80030045,
-                -4.87093267,
-                4.18009666,
-                -2.9011435,
-                2.58497143,
-                4.47495176,
-                -0.9639419,
-                8.15692179,
-                0.48758731,
-                -0.62264663,
-                -1.70677258,
-                -5.51641378,
-                3.98621565,
-                0.57749944,
-                2.9658081,
-                -4.10467591,
-                -7.14827888,
-                0.02838605,
-                -2.48630333,
-                -4.82178216,
-                -0.7444178,
-                2.48224802,
-                -1.54683936,
-                0.46969412,
-                -0.0960347,
-                -2.08290541,
-                6.357031,
-                -3.49716615,
-                3.28959028,
-                7.83932727,
-                1.51457023,
-                -4.14575033,
-                0.02007839,
-                4.20953773,
-                3.66456664,
-                -4.67441496,
-                -0.13296372,
-                -3.77145766,
-                1.49368976,
-                -2.53627817,
-                -3.14188618,
-                0.24991722,
-                0.8770123,
-                0.16635733,
-                -3.15391098,
-                -3.7733242,
-                -2.25134676,
-                1.00975552,
-                1.38717682,
+                -1.98049388,
+                -4.58033899,
+                -6.95508968,
+                -0.79619016,
+                15.58478599,
+                2.7636959,
+                -2.99147438,
+                -6.94430794,
+                -1.77877141,
+                -4.5000298,
+                -3.12026893,
+                -8.42191319,
+                3.8991195,
+                4.85271854,
+                8.30541908,
+                -1.0435944,
+                -4.42713079,
+                19.70011955,
+                -6.53945284,
+                0.85064846,
+                4.36868488,
+                4.77303801,
+                3.00829128,
+                0.70043584,
+                -7.69047143,
+                -0.0647043,
+                4.56830405,
+                -8.67154404,
+                -4.64015279,
+                -7.62202078,
+                -8.97078455,
+                -5.19685985,
+                -1.66080276,
+                -6.03225716,
+                -4.06780949,
+                -0.53046979,
+                8.3543131,
+                -1.84893576,
+                2.42669245,
+                -4.26357086,
+                -11.33995527,
+                10.98529887,
+                -10.70000829,
+                -4.50179402,
+                -1.34978505,
+                -8.83091676,
+                -11.85324773,
+                -3.6305035,
+                2.89933807,
+                4.65750153,
+                1.25464578,
+                -5.06196944,
+                10.05305042,
+                -1.83868447,
+                -11.57017913,
+                -2.03900316,
+                -3.37235187,
+                -1.37010554,
+                -2.93769471,
+                0.11905709,
+                6.99367431,
+                3.48640865,
+                -4.16242817,
+                4.44778342,
+                -0.98405367,
+                1.81581506,
+                -5.31481686,
+                8.72426364,
+                4.78954098,
+                7.67879332,
+                -5.00417706,
+                0.79717914,
+                -3.20581567,
+                -2.96034568,
+                6.31165294,
+                2.9891188,
+                -12.2013139,
+                -13.67496037,
+                4.77102881,
+                2.71353286,
+                6.83849229,
+                -3.50400312,
+                1.3839428,
+                -5.07550528,
+                -8.5623218,
+                17.64081151,
+                6.46051807,
+                2.89067584,
+                14.23057359,
+                17.85941763,
+                -6.46129295,
+                -3.43602528,
+                -3.13520203,
+                4.45313732,
+                -5.23012576,
+                -2.65929557,
+                -0.66191939,
+                4.47530191,
+                9.33992973,
+                -6.29808733,
             ]
         )
 
@@ -229,104 +231,104 @@ def test_model(self):
             [
                 -0.33019322,
                 -0.76332506,
-                -0.32665648,
-                -0.76601747,
-                -1.16441856,
-                -0.13627609,
                 -1.15916671,
                 -0.13280604,
-                2.60139518,
-                0.44470952,
-                -0.48316771,
-                -1.15926141,
                 2.59680457,
                 0.46049936,
-                -0.29459777,
-                -0.76433726,
-                -0.52091744,
-                -1.39903065,
                 -0.49890317,
                 -1.15747878,
-                0.66585524,
-                0.81804842,
-                1.38592217,
-                -0.18025826,
                 -0.2964021,
                 -0.74953328,
-                -0.7427461,
-                3.27935087,
-                -1.09340192,
-                0.1462458,
                 -0.51982728,
                 -1.40236941,
-                0.73902497,
-                0.79969456,
-                0.50726592,
-                0.11403234,
                 0.64964525,
                 0.8084967,
-                -1.27543102,
-                -0.00571457,
-                0.7748912,
-                -1.42492251,
                 1.38371838,
                 -0.17366078,
-                -0.76119888,
-                -1.26083707,
-                -1.48263244,
-                -0.85698727,
                 -0.7374573,
                 3.28274006,
-                -0.27029769,
-                -1.00478711,
-                -0.67481511,
-                -0.07978058,
                 -1.09001574,
                 0.14173437,
-                1.4092343,
-                -0.31785424,
-                0.40551362,
-                -0.71900495,
                 0.7269307,
                 0.79545851,
-                -1.88407155,
-                1.83983772,
-                -1.78413438,
-                -0.74852344,
                 0.50059876,
                 0.1165872,
-                -0.2139368,
-                -1.44989426,
-                -1.96651281,
-                -0.6031689,
                 -1.28106632,
                 -0.01107711,
-                0.48796663,
-                0.76500912,
-                0.21308153,
-                -0.85297893,
                 0.76139868,
                 -1.44547292,
-                1.68105021,
-                -0.30655702,
-                -1.93123,
-                -0.34294737,
                 -0.77352498,
                 -1.26982082,
-                -0.5562998,
-                -0.22048683,
-                -0.48641512,
-                0.01124872,
                 -1.49597963,
                 -0.86647985,
-                1.17310075,
-                0.59402879,
-                -0.705076,
-                0.72991794,
                 -0.27728806,
                 -1.00542829,
-                -0.16289102,
-                0.29464248,
+                -0.67794229,
+                -0.08898442,
+                1.39205396,
+                -0.30789099,
+                0.40393006,
+                -0.70982912,
+                -1.88961087,
+                1.830906,
+                -1.78326071,
+                -0.75013615,
+                -0.22537904,
+                -1.47257916,
+                -1.9756803,
+                -0.60493323,
+                0.48350014,
+                0.77676571,
+                0.20885468,
+                -0.84351691,
+                1.67501205,
+                -0.30662021,
+                -1.92884376,
+                -0.34021625,
+                -0.56212664,
+                -0.22884438,
+                -0.4891038,
+                0.0199886,
+                1.16506594,
+                0.58068956,
+                -0.69376438,
+                0.74156043,
+                -0.16360848,
+                0.30303168,
+                -0.88639571,
+                1.453683,
+                0.79818052,
+                1.2796414,
+                -0.8335433,
+                0.13359098,
+                -0.53425462,
+                -0.4939294,
+                1.05247266,
+                0.49770575,
+                -2.03320073,
+                -2.27918678,
+                0.79462598,
+                0.45187804,
+                1.13925239,
+                -0.58410808,
+                0.23092918,
+                -0.84611213,
+                -1.42726499,
+                2.93985879,
+                1.07635712,
+                0.48092082,
+                2.37197063,
+                2.97647126,
+                -1.07670667,
+                -0.57300341,
+                -0.52316403,
+                0.74274268,
+                -0.87188274,
+                -0.44279998,
+                -0.11060956,
+                0.74619435,
+                1.55646754,
+                -1.05043903,
             ]
         )
 
diff --git a/source/tests/test_model_loc_frame.py b/source/tests/tf/test_model_loc_frame.py
similarity index 94%
rename from source/tests/test_model_loc_frame.py
rename to source/tests/tf/test_model_loc_frame.py
index c493013316..84467b436a 100644
--- a/source/tests/test_model_loc_frame.py
+++ b/source/tests/tf/test_model_loc_frame.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptLocFrame,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -52,7 +53,11 @@ def test_model(self):
         jdata["model"]["descriptor"].pop("_comment", None)
         descrpt = DescrptLocFrame(**jdata["model"]["descriptor"])
         fitting = EnerFitting(
-            descrpt, neuron=[240, 120, 60, 30, 10], seed=1, uniform_seed=True
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 120, 60, 30, 10],
+            seed=1,
+            uniform_seed=True,
         )
         model = EnerModel(
             descrpt,
diff --git a/source/tests/test_model_multi.py b/source/tests/tf/test_model_multi.py
similarity index 96%
rename from source/tests/test_model_multi.py
rename to source/tests/tf/test_model_multi.py
index 9017da22e7..66b0cce000 100644
--- a/source/tests/test_model_multi.py
+++ b/source/tests/tf/test_model_multi.py
@@ -1,31 +1,32 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    del_data,
-    finite_difference,
-    gen_data,
-    j_loader,
-    strerch_box,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     DipoleFittingSeA,
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     MultiModel,
 )
 
+from .common import (
+    DataSystem,
+    del_data,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -68,6 +69,9 @@ def test_model(self):
             item_fitting_param.pop("type", None)
             item_fitting_param.pop("fit_diag", None)
             item_fitting_param["descrpt"] = descrpt
+            item_fitting_param["embedding_width"] = descrpt.get_dim_rot_mat_1()
+            item_fitting_param["ntypes"] = descrpt.get_ntypes()
+            item_fitting_param["dim_descrpt"] = descrpt.get_dim_out()
             if item_fitting_type == "ener":
                 fitting_dict[fitting_key] = EnerFitting(
                     **item_fitting_param, uniform_seed=True
diff --git a/source/tests/test_model_pairtab.py b/source/tests/tf/test_model_pairtab.py
similarity index 97%
rename from source/tests/test_model_pairtab.py
rename to source/tests/tf/test_model_pairtab.py
index fd678894b5..e2c45ee50c 100644
--- a/source/tests/test_model_pairtab.py
+++ b/source/tests/tf/test_model_pairtab.py
@@ -1,22 +1,23 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 import scipy.spatial.distance
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
diff --git a/source/tests/test_model_se_a.py b/source/tests/tf/test_model_se_a.py
similarity index 92%
rename from source/tests/test_model_se_a.py
rename to source/tests/tf/test_model_se_a.py
index d3b4323f0d..0b2f17e99c 100644
--- a/source/tests/test_model_se_a.py
+++ b/source/tests/tf/test_model_se_a.py
@@ -1,32 +1,33 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import dpdata
 import numpy as np
-from common import (
-    DataSystem,
-    del_data,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -73,7 +74,9 @@ def test_model_atom_ener(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
@@ -153,7 +156,9 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
@@ -294,10 +299,15 @@ def test_model_atom_ener_type_embedding(self):
         test_data = data.get_test()
         numb_test = 1
 
-        typeebd = TypeEmbedNet(**jdata["model"]["type_embeding"])
+        typeebd = TypeEmbedNet(
+            ntypes=len(jdata["model"]["descriptor"]["sel"]),
+            **jdata["model"]["type_embeding"],
+        )
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting, typeebd=typeebd)
 
diff --git a/source/tests/test_model_se_a_aparam.py b/source/tests/tf/test_model_se_a_aparam.py
similarity index 93%
rename from source/tests/test_model_se_a_aparam.py
rename to source/tests/tf/test_model_se_a_aparam.py
index 41111c57ee..00a71f9136 100644
--- a/source/tests/test_model_se_a_aparam.py
+++ b/source/tests/tf/test_model_se_a_aparam.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -52,7 +53,9 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
diff --git a/source/tests/test_model_se_a_ebd.py b/source/tests/tf/test_model_se_a_ebd.py
similarity index 93%
rename from source/tests/test_model_se_a_ebd.py
rename to source/tests/tf/test_model_se_a_ebd.py
index bf856b7bc5..e4a6d78d65 100644
--- a/source/tests/test_model_se_a_ebd.py
+++ b/source/tests/tf/test_model_se_a_ebd.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor.se_a_ebd import (
+from deepmd.tf.descriptor.se_a_ebd import (
     DescrptSeAEbd,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -53,7 +54,9 @@ def test_model(self):
         descrpt = DescrptSeAEbd(
             **jdata["model"]["descriptor"],
         )
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(
             **jdata["model"]["fitting_net"],
         )
diff --git a/source/tests/test_model_se_a_ebd_v2.py b/source/tests/tf/test_model_se_a_ebd_v2.py
similarity index 92%
rename from source/tests/test_model_se_a_ebd_v2.py
rename to source/tests/tf/test_model_se_a_ebd_v2.py
index 71860890ce..86aead5eef 100644
--- a/source/tests/test_model_se_a_ebd_v2.py
+++ b/source/tests/tf/test_model_se_a_ebd_v2.py
@@ -1,30 +1,31 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor.se_a_ebd_v2 import (
+from deepmd.tf.descriptor.se_a_ebd_v2 import (
     DescrptSeAEbdV2,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -59,6 +60,7 @@ def test_model(self):
         jdata["model"]["type_embedding"]["seed"] = 1
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=len(jdata["model"]["descriptor"]["sel"]),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -69,7 +71,9 @@ def test_model(self):
         descrpt = DescrptSeAEbdV2(
             **jdata["model"]["descriptor"],
         )
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(
             **jdata["model"]["fitting_net"],
         )
diff --git a/source/tests/test_model_se_a_fparam.py b/source/tests/tf/test_model_se_a_fparam.py
similarity index 93%
rename from source/tests/test_model_se_a_fparam.py
rename to source/tests/tf/test_model_se_a_fparam.py
index cdb85157a4..3045948480 100644
--- a/source/tests/test_model_se_a_fparam.py
+++ b/source/tests/tf/test_model_se_a_fparam.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -51,7 +52,9 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # descrpt = DescrptSeA(jdata['model']['descriptor'])
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
diff --git a/source/tests/test_model_se_a_srtab.py b/source/tests/tf/test_model_se_a_srtab.py
similarity index 93%
rename from source/tests/test_model_se_a_srtab.py
rename to source/tests/tf/test_model_se_a_srtab.py
index 98cab9e073..2c4d5d70f9 100644
--- a/source/tests/test_model_se_a_srtab.py
+++ b/source/tests/tf/test_model_se_a_srtab.py
@@ -2,28 +2,29 @@
 import os
 
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -68,7 +69,9 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # descrpt = DescrptSeA(jdata['model']['descriptor'])
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
diff --git a/source/tests/test_model_se_a_type.py b/source/tests/tf/test_model_se_a_type.py
similarity index 92%
rename from source/tests/test_model_se_a_type.py
rename to source/tests/tf/test_model_se_a_type.py
index 85e4a2916d..9a42466766 100644
--- a/source/tests/test_model_se_a_type.py
+++ b/source/tests/tf/test_model_se_a_type.py
@@ -1,30 +1,31 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -54,10 +55,13 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
diff --git a/source/tests/test_model_se_atten.py b/source/tests/tf/test_model_se_atten.py
similarity index 96%
rename from source/tests/test_model_se_atten.py
rename to source/tests/tf/test_model_se_atten.py
index 5417201a9f..1a5094c743 100644
--- a/source/tests/test_model_se_atten.py
+++ b/source/tests/tf/test_model_se_atten.py
@@ -3,34 +3,35 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    check_smooth_efv,
-    finite_difference_fv,
-    gen_data,
-    j_loader,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeAtten,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    check_smooth_efv,
+    finite_difference_fv,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -66,10 +67,13 @@ def test_model(self):
         jdata["model"]["descriptor"].pop("type", None)
         jdata["model"]["descriptor"]["ntypes"] = 2
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -229,6 +233,7 @@ def test_exclude_types(self):
         descrpt = DescrptSeAtten(ntypes=ntypes, **jdata["model"]["descriptor"])
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -291,10 +296,13 @@ def test_compressible_model(self):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 0
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -456,6 +464,7 @@ def test_compressible_exclude_types(self):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 0
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -518,10 +527,13 @@ def test_stripped_type_embedding_model(self):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 2
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -686,6 +698,7 @@ def test_stripped_type_embedding_exclude_types(self):
         jdata["model"]["descriptor"]["stripped_type_embedding"] = True
         jdata["model"]["descriptor"]["attn_layer"] = 2
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
@@ -756,10 +769,13 @@ def test_smoothness_of_stripped_type_embedding_smooth_model(self):
         jdata["model"]["descriptor"]["rcut"] = 6.0
         jdata["model"]["descriptor"]["rcut_smth"] = 4.0
         descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["dim_rot_mat_1"] = descrpt.get_dim_rot_mat_1()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             activation_function=None,
             resnet_dt=typeebd_param["resnet_dt"],
diff --git a/source/tests/test_model_se_r.py b/source/tests/tf/test_model_se_r.py
similarity index 94%
rename from source/tests/test_model_se_r.py
rename to source/tests/tf/test_model_se_r.py
index 94812308c6..1e63922e19 100644
--- a/source/tests/test_model_se_r.py
+++ b/source/tests/tf/test_model_se_r.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeR,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -51,7 +52,8 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeR(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
         model = EnerModel(descrpt, fitting)
diff --git a/source/tests/test_model_se_t.py b/source/tests/tf/test_model_se_t.py
similarity index 94%
rename from source/tests/test_model_se_t.py
rename to source/tests/tf/test_model_se_t.py
index 1d67e852c7..d75fac2f07 100644
--- a/source/tests/test_model_se_t.py
+++ b/source/tests/tf/test_model_se_t.py
@@ -1,27 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeT,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -51,7 +52,8 @@ def test_model(self):
 
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeT(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
         fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
diff --git a/source/tests/test_model_spin.json b/source/tests/tf/test_model_spin.json
similarity index 100%
rename from source/tests/test_model_spin.json
rename to source/tests/tf/test_model_spin.json
diff --git a/source/tests/test_model_spin.py b/source/tests/tf/test_model_spin.py
similarity index 97%
rename from source/tests/test_model_spin.py
rename to source/tests/tf/test_model_spin.py
index 9bdf1d780a..5d20c76c35 100644
--- a/source/tests/test_model_spin.py
+++ b/source/tests/tf/test_model_spin.py
@@ -2,33 +2,34 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    del_data,
-    gen_data,
-    j_loader,
-    tests_path,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     EnerFitting,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     EnerModel,
 )
-from deepmd.utils.spin import (
+from deepmd.tf.utils.spin import (
     Spin,
 )
 
+from .common import (
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+    tests_path,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -46,7 +47,6 @@ def test_model_spin(self):
         jdata = j_loader(jfile)
 
         # set system information
-        systems = j_must_have(jdata["training"]["training_data"], "systems")
         set_pfx = j_must_have(jdata["training"], "set_prefix")
         batch_size = j_must_have(jdata["training"]["training_data"], "batch_size")
         batch_size = 2
@@ -59,6 +59,7 @@ def test_model_spin(self):
         jdata["training"]["validation_data"]["systems"] = [
             str(tests_path / "model_spin/")
         ]
+        systems = j_must_have(jdata["training"]["training_data"], "systems")
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
         test_data = data.get_test()
 
@@ -70,7 +71,8 @@ def test_model_spin(self):
         descrpt_param["spin"] = spin
         descrpt = DescrptSeA(**descrpt_param, uniform_seed=True)
         fitting_param.pop("type", None)
-        fitting_param["descrpt"] = descrpt
+        fitting_param["ntypes"] = descrpt.get_ntypes()
+        fitting_param["dim_descrpt"] = descrpt.get_dim_out()
         fitting_param["spin"] = spin
         fitting = EnerFitting(**fitting_param, uniform_seed=True)
         model = EnerModel(descrpt, fitting, spin=spin)
diff --git a/source/tests/tf/test_neighbor_stat.py b/source/tests/tf/test_neighbor_stat.py
new file mode 100644
index 0000000000..653634d674
--- /dev/null
+++ b/source/tests/tf/test_neighbor_stat.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import shutil
+import unittest
+
+import dpdata
+import numpy as np
+
+from deepmd.tf.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
+
+
+def gen_sys(nframes):
+    natoms = 1000
+    data = {}
+    X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
+    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
+    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
+    data["forces"] = np.random.default_rng().random([nframes, natoms, 3])
+    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
+        1, 3, 3
+    )
+    data["energies"] = np.random.default_rng().random([nframes, 1])
+    data["atom_names"] = ["TYPE"]
+    data["atom_numbs"] = [27]
+    data["atom_types"] = np.repeat(0, 27)
+    return data
+
+
+class TestNeighborStat(unittest.TestCase):
+    def setUp(self):
+        data0 = gen_sys(1)
+        sys0 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys0.to_deepmd_npy("system_0", set_size=1)
+
+    def tearDown(self):
+        shutil.rmtree("system_0")
+
+    def test_neighbor_stat(self):
+        for rcut in (0.0, 1.0, 2.0, 4.0):
+            for mixed_type in (True, False):
+                with self.subTest(rcut=rcut, mixed_type=mixed_type):
+                    rcut += 1e-3  # prevent numerical errors
+                    min_nbor_dist, max_nbor_size = neighbor_stat(
+                        system="system_0",
+                        rcut=rcut,
+                        type_map=["TYPE", "NO_THIS_TYPE"],
+                        mixed_type=mixed_type,
+                    )
+                    upper = np.ceil(rcut) + 1
+                    X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
+                    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
+                    # distance to (0,0,0)
+                    distance = np.linalg.norm(positions, axis=1)
+                    expected_neighbors = np.count_nonzero(
+                        np.logical_and(distance > 0, distance <= rcut)
+                    )
+                    self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
+                    ret = [expected_neighbors]
+                    if not mixed_type:
+                        ret.append(0)
+                    np.testing.assert_array_equal(max_nbor_size, ret)
diff --git a/source/tests/tf/test_nlist.py b/source/tests/tf/test_nlist.py
new file mode 100644
index 0000000000..9e66b185ee
--- /dev/null
+++ b/source/tests/tf/test_nlist.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.utils import (
+    inter2phys,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.tf.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    tf,
+)
+from deepmd.tf.utils.nlist import (
+    extend_coord_with_ghosts,
+)
+
+
+class TestNeighList(unittest.TestCase):
+    def setUp(self):
+        self.nf = 3
+        self.nloc = 2
+        self.ns = 5 * 5 * 3
+        self.nall = self.ns * self.nloc
+        self.cell = np.array(
+            [[1, 0, 0], [0.4, 0.8, 0], [0.1, 0.3, 2.1]], dtype=GLOBAL_NP_FLOAT_PRECISION
+        )
+        self.icoord = np.array(
+            [[0, 0, 0], [0.5, 0.5, 0.1]], dtype=GLOBAL_NP_FLOAT_PRECISION
+        )
+        self.atype = np.array([0, 1], dtype=int)
+        [self.cell, self.icoord, self.atype] = [
+            np.expand_dims(ii, 0) for ii in [self.cell, self.icoord, self.atype]
+        ]
+        self.coord = inter2phys(self.icoord, self.cell).reshape([-1, self.nloc * 3])
+        self.cell = self.cell.reshape([-1, 9])
+        [self.cell, self.coord, self.atype] = [
+            np.tile(ii, [self.nf, 1]) for ii in [self.cell, self.coord, self.atype]
+        ]
+        self.rcut = 1.01
+        self.prec = 1e-10
+        self.nsel = [10, 10]
+        self.ref_nlist = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1],
+                [0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1],
+            ]
+        )
+
+    def test_extend_coord(self):
+        t_coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, None], name="i_coord"
+        )
+        t_atype = tf.placeholder(tf.int32, [None, None], name="i_atype")
+        t_cell = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, None], name="i_cell")
+        t_pbc = tf.placeholder(tf.bool, [], name="i_pbc")
+        t_ecoord, t_eatype, t_mapping = extend_coord_with_ghosts(
+            t_coord, t_atype, t_cell, self.rcut, t_pbc
+        )
+        with tf.Session(config=default_tf_session_config) as sess:
+            ecoord, eatype, mapping = sess.run(
+                [t_ecoord, t_eatype, t_mapping],
+                feed_dict={
+                    t_coord: self.coord,
+                    t_atype: self.atype,
+                    t_cell: self.cell,
+                    t_pbc: self.cell is not None,
+                },
+            )
+        # expected ncopy x nloc
+        self.assertEqual(list(ecoord.shape), [self.nf, self.nall * 3])
+        self.assertEqual(list(eatype.shape), [self.nf, self.nall])
+        self.assertEqual(list(mapping.shape), [self.nf, self.nall])
+        # check the nloc part is identical with original coord
+        np.testing.assert_allclose(
+            ecoord[:, : self.nloc * 3], self.coord, rtol=self.prec, atol=self.prec
+        )
+        # check the shift vectors are aligned with grid
+        shift_vec = (
+            ecoord.reshape([-1, self.ns, self.nloc, 3])
+            - self.coord.reshape([-1, self.nloc, 3])[:, None, :, :]
+        )
+        shift_vec = shift_vec.reshape([-1, self.nall, 3])
+        # hack!!! assumes identical cell across frames
+        shift_vec = np.matmul(
+            shift_vec, np.linalg.inv(self.cell.reshape([self.nf, 3, 3])[0])
+        )
+        # nf x nall x 3
+        shift_vec = np.round(shift_vec)
+        # check: identical shift vecs
+        np.testing.assert_allclose(
+            shift_vec[0], shift_vec[1], rtol=self.prec, atol=self.prec
+        )
+        # check: shift idx aligned with grid
+        mm, cc = np.unique(shift_vec[0][:, 0], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-2, -1, 0, 1, 2], dtype=GLOBAL_NP_FLOAT_PRECISION),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([30, 30, 30, 30, 30], dtype=np.int64),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = np.unique(shift_vec[1][:, 1], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-2, -1, 0, 1, 2], dtype=GLOBAL_NP_FLOAT_PRECISION),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([30, 30, 30, 30, 30], dtype=GLOBAL_NP_FLOAT_PRECISION),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        mm, cc = np.unique(shift_vec[1][:, 2], axis=-1, return_counts=True)
+        np.testing.assert_allclose(
+            mm,
+            np.array([-1, 0, 1], dtype=GLOBAL_NP_FLOAT_PRECISION),
+            rtol=self.prec,
+            atol=self.prec,
+        )
+        np.testing.assert_allclose(
+            cc,
+            np.array([50, 50, 50], dtype=np.int64),
+            rtol=self.prec,
+            atol=self.prec,
+        )
diff --git a/source/tests/test_nvnmd_entrypoints.py b/source/tests/tf/test_nvnmd_entrypoints.py
similarity index 98%
rename from source/tests/test_nvnmd_entrypoints.py
rename to source/tests/tf/test_nvnmd_entrypoints.py
index d82c905024..cc7a92c032 100644
--- a/source/tests/test_nvnmd_entrypoints.py
+++ b/source/tests/tf/test_nvnmd_entrypoints.py
@@ -3,47 +3,48 @@
 
 import numpy as np
 import pytest
-from common import (
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     tf,
 )
-from deepmd.nvnmd.data.data import (
+from deepmd.tf.nvnmd.data.data import (
     jdata_deepmd_input_v0,
     jdata_deepmd_input_v1,
 )
-from deepmd.nvnmd.entrypoints.freeze import (
+from deepmd.tf.nvnmd.entrypoints.freeze import (
     save_weight,
 )
-from deepmd.nvnmd.entrypoints.mapt import (
+from deepmd.tf.nvnmd.entrypoints.mapt import (
     MapTable,
 )
-from deepmd.nvnmd.entrypoints.wrap import (
+from deepmd.tf.nvnmd.entrypoints.wrap import (
     wrap,
 )
-from deepmd.nvnmd.utils.config import (
+from deepmd.tf.nvnmd.utils.config import (
     nvnmd_cfg,
 )
-from deepmd.nvnmd.utils.fio import (
+from deepmd.tf.nvnmd.utils.fio import (
     FioBin,
     FioNpyDic,
 )
-from deepmd.train.run_options import (
+from deepmd.tf.train.run_options import (
     RunOptions,
 )
-from deepmd.train.trainer import (
+from deepmd.tf.train.trainer import (
     DPTrainer,
 )
-from deepmd.utils.argcheck import (
+from deepmd.tf.utils.argcheck import (
     normalize,
 )
-from deepmd.utils.compat import (
+from deepmd.tf.utils.compat import (
     update_deepmd_input,
 )
 
+from .common import (
+    tests_path,
+)
+
 
 class TestNvnmdEntrypointsV0(tf.test.TestCase):
     @pytest.mark.run(order=0)
diff --git a/source/tests/test_nvnmd_op.py b/source/tests/tf/test_nvnmd_op.py
similarity index 99%
rename from source/tests/test_nvnmd_op.py
rename to source/tests/tf/test_nvnmd_op.py
index 3419b375e4..beff8375b8 100644
--- a/source/tests/test_nvnmd_op.py
+++ b/source/tests/tf/test_nvnmd_op.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/tf/test_pairwise_dprc.py
similarity index 99%
rename from source/tests/test_pairwise_dprc.py
rename to source/tests/tf/test_pairwise_dprc.py
index e95b66c7a0..38b8d8b775 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/tf/test_pairwise_dprc.py
@@ -1,43 +1,45 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test pairwise DPRc features."""
+
 import json
 import unittest
 
 import dpdata
 import numpy as np
-from common import (
-    run_dp,
-    tests_path,
-)
 from packaging.version import parse as parse_version
 
-from deepmd import (
+from deepmd.tf import (
     DeepPotential,
 )
-from deepmd.common import (
+from deepmd.tf.common import (
     j_loader,
     j_must_have,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
-from deepmd.model.model import (
+from deepmd.tf.model.model import (
     Model,
 )
-from deepmd.model.pairwise_dprc import (
+from deepmd.tf.model.pairwise_dprc import (
     gather_placeholder,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.sess import (
+from deepmd.tf.utils.sess import (
     run_sess,
 )
 
+from .common import (
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
diff --git a/source/tests/test_parallel_training.py b/source/tests/tf/test_parallel_training.py
similarity index 94%
rename from source/tests/test_parallel_training.py
rename to source/tests/tf/test_parallel_training.py
index 0a1a63a29b..d190764695 100644
--- a/source/tests/test_parallel_training.py
+++ b/source/tests/tf/test_parallel_training.py
@@ -3,12 +3,12 @@
 import subprocess as sp
 import unittest
 
-from common import (
-    tests_path,
+from deepmd.tf.cluster.local import (
+    get_gpus,
 )
 
-from deepmd.cluster.local import (
-    get_gpus,
+from .common import (
+    tests_path,
 )
 
 
@@ -44,7 +44,6 @@ def test_two_workers(self):
             if hasattr(line, "decode"):
                 line = line.decode("utf-8")
             line = line.rstrip()
-            print(line)
         popen.wait()
         self.assertEqual(0, popen.returncode, "Parallel training failed!")
 
diff --git a/source/tests/test_polar_se_a.py b/source/tests/tf/test_polar_se_a.py
similarity index 75%
rename from source/tests/test_polar_se_a.py
rename to source/tests/tf/test_polar_se_a.py
index 2564dc0656..8ef967de55 100644
--- a/source/tests/test_polar_se_a.py
+++ b/source/tests/tf/test_polar_se_a.py
@@ -1,28 +1,36 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import numpy as np
-from common import (
-    DataSystem,
-    finite_difference,
-    gen_data,
-    j_loader,
-    strerch_box,
+from pathlib import (
+    Path,
 )
 
-from deepmd.common import (
+import numpy as np
+
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     PolarFittingSeA,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     PolarModel,
 )
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+from .common import (
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
@@ -54,7 +62,9 @@ def test_model(self):
         jdata["model"]["descriptor"].pop("type", None)
         jdata["model"]["fitting_net"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["embedding_width"] = descrpt.get_dim_rot_mat_1()
         fitting = PolarFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = PolarModel(descrpt, fitting)
 
@@ -195,3 +205,52 @@ def test_model(self):
         # make sure atomic virial sum to virial
         places = 10
         np.testing.assert_almost_equal(pv, spv, places)
+
+    def test_data_stat(self):
+        jfile = "polar_se_a.json"
+        jdata = j_loader(jfile)
+
+        systems = [
+            str(
+                Path(__file__).parent.parent
+                / "pt"
+                / "water_tensor"
+                / "polar"
+                / "global_system"
+            ),
+            str(
+                Path(__file__).parent.parent
+                / "pt"
+                / "water_tensor"
+                / "polar"
+                / "atomic_system"
+            ),
+        ]
+
+        batch_size = 1
+        test_size = 1
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DeepmdDataSystem(systems, batch_size, test_size, rcut)
+        data.add(
+            "atomic_polarizability",
+            9,
+            atomic=True,
+            type_sel=jdata["model"]["fitting_net"]["sel_type"],
+        )
+        data.add(
+            "polarizability",
+            9,
+            atomic=False,
+        )
+
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["fitting_net"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["embedding_width"] = descrpt.get_dim_rot_mat_1()
+        fitting = PolarFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
+        model = PolarModel(descrpt, fitting)
+
+        model.data_stat(data)
diff --git a/source/tests/test_polar_se_a_tebd.py b/source/tests/tf/test_polar_se_a_tebd.py
similarity index 93%
rename from source/tests/test_polar_se_a_tebd.py
rename to source/tests/tf/test_polar_se_a_tebd.py
index 570c4261d9..5a3c001004 100644
--- a/source/tests/test_polar_se_a_tebd.py
+++ b/source/tests/tf/test_polar_se_a_tebd.py
@@ -2,34 +2,35 @@
 import unittest
 
 import numpy as np
-from common import (
-    DataSystem,
-    finite_difference,
-    gen_data,
-    j_loader,
-    strerch_box,
-)
 from packaging.version import parse as parse_version
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     DescrptSeA,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.fit import (
+from deepmd.tf.fit import (
     PolarFittingSeA,
 )
-from deepmd.model import (
+from deepmd.tf.model import (
     PolarModel,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
 )
 
+from .common import (
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
@@ -64,10 +65,13 @@ def test_model(self):
         jdata["model"]["descriptor"].pop("type", None)
         jdata["model"]["fitting_net"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
-        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        jdata["model"]["fitting_net"]["ntypes"] = descrpt.get_ntypes()
+        jdata["model"]["fitting_net"]["dim_descrpt"] = descrpt.get_dim_out()
+        jdata["model"]["fitting_net"]["embedding_width"] = descrpt.get_dim_rot_mat_1()
         fitting = PolarFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
+            ntypes=descrpt.get_ntypes(),
             neuron=typeebd_param["neuron"],
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
diff --git a/source/tests/test_prod_env_mat.py b/source/tests/tf/test_prod_env_mat.py
similarity index 99%
rename from source/tests/test_prod_env_mat.py
rename to source/tests/tf/test_prod_env_mat.py
index 663b991831..ac1c16bf97 100644
--- a/source/tests/test_prod_env_mat.py
+++ b/source/tests/tf/test_prod_env_mat.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
diff --git a/source/tests/test_prod_force.py b/source/tests/tf/test_prod_force.py
similarity index 99%
rename from source/tests/test_prod_force.py
rename to source/tests/tf/test_prod_force.py
index 83a44c0be9..7d3bcee6ce 100644
--- a/source/tests/test_prod_force.py
+++ b/source/tests/tf/test_prod_force.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
diff --git a/source/tests/test_prod_force_grad.py b/source/tests/tf/test_prod_force_grad.py
similarity index 99%
rename from source/tests/test_prod_force_grad.py
rename to source/tests/tf/test_prod_force_grad.py
index 012def217f..49e63d161c 100644
--- a/source/tests/test_prod_force_grad.py
+++ b/source/tests/tf/test_prod_force_grad.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_grads_module,
     tf,
diff --git a/source/tests/test_prod_virial.py b/source/tests/tf/test_prod_virial.py
similarity index 99%
rename from source/tests/test_prod_virial.py
rename to source/tests/tf/test_prod_virial.py
index 2abcfcb1bf..fa6347382e 100644
--- a/source/tests/test_prod_virial.py
+++ b/source/tests/tf/test_prod_virial.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
diff --git a/source/tests/test_prod_virial_grad.py b/source/tests/tf/test_prod_virial_grad.py
similarity index 99%
rename from source/tests/test_prod_virial_grad.py
rename to source/tests/tf/test_prod_virial_grad.py
index 548b63a54b..470441a939 100644
--- a/source/tests/test_prod_virial_grad.py
+++ b/source/tests/tf/test_prod_virial_grad.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_TF_FLOAT_PRECISION,
     op_grads_module,
     tf,
diff --git a/source/tests/test_tab_nonsmth.py b/source/tests/tf/test_tab_nonsmth.py
similarity index 97%
rename from source/tests/test_tab_nonsmth.py
rename to source/tests/tf/test_tab_nonsmth.py
index 9e3f9ff640..7132d0c206 100644
--- a/source/tests/test_tab_nonsmth.py
+++ b/source/tests/tf/test_tab_nonsmth.py
@@ -3,27 +3,28 @@
 import unittest
 
 import numpy as np
-from common import (
+
+# load grad of force module
+import deepmd.tf.op  # noqa: F401
+from deepmd.tf.env import (
+    op_module,
+    tf,
+)
+from deepmd.tf.utils.pair_tab import (
+    PairTab,
+)
+
+from .common import (
     Data,
     force_dw_test,
     force_test,
     virial_dw_test,
     virial_test,
 )
-from test_descrpt_nonsmth import (
+from .test_descrpt_nonsmth import (
     Inter,
 )
 
-# load grad of force module
-import deepmd.op  # noqa: F401
-from deepmd.env import (
-    op_module,
-    tf,
-)
-from deepmd.utils.pair_tab import (
-    PairTab,
-)
-
 
 def _make_tab(ntype):
     xx = np.arange(0, 9, 0.001)
diff --git a/source/tests/test_tab_smooth.py b/source/tests/tf/test_tab_smooth.py
similarity index 97%
rename from source/tests/test_tab_smooth.py
rename to source/tests/tf/test_tab_smooth.py
index 49b18e14f3..e0cf564cd6 100644
--- a/source/tests/test_tab_smooth.py
+++ b/source/tests/tf/test_tab_smooth.py
@@ -3,26 +3,27 @@
 import unittest
 
 import numpy as np
-from common import (
+
+# load grad of force module
+from deepmd.tf.env import (
+    op_module,
+    tf,
+)
+from deepmd.tf.utils.pair_tab import (
+    PairTab,
+)
+
+from .common import (
     Data,
     force_dw_test,
     force_test,
     virial_dw_test,
     virial_test,
 )
-from test_descrpt_smooth import (
+from .test_descrpt_smooth import (
     Inter,
 )
 
-# load grad of force module
-from deepmd.env import (
-    op_module,
-    tf,
-)
-from deepmd.utils.pair_tab import (
-    PairTab,
-)
-
 
 def _make_tab(ntype):
     xx = np.arange(0, 9, 0.001)
diff --git a/source/tests/test_tabulate.py b/source/tests/tf/test_tabulate.py
similarity index 97%
rename from source/tests/test_tabulate.py
rename to source/tests/tf/test_tabulate.py
index 12c805fe79..0d46293b62 100644
--- a/source/tests/test_tabulate.py
+++ b/source/tests/tf/test_tabulate.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 
-from deepmd.common import (
+from deepmd.tf.common import (
     gelu,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     op_module,
     tf,
 )
@@ -58,7 +58,7 @@ def test_op_tanh(self):
             ]
         )
 
-        places = 18
+        places = 15
         np.testing.assert_almost_equal(dy_array, answer, places)
 
     def test_op_gelu(self):
@@ -104,7 +104,7 @@ def test_op_gelu(self):
             ]
         )
 
-        places = 18
+        places = 15
         np.testing.assert_almost_equal(dy_array, answer, places)
 
 
diff --git a/source/tests/test_train.py b/source/tests/tf/test_train.py
similarity index 75%
rename from source/tests/test_train.py
rename to source/tests/tf/test_train.py
index 145457260f..3e22dc57bc 100644
--- a/source/tests/test_train.py
+++ b/source/tests/tf/test_train.py
@@ -4,47 +4,50 @@
     patch,
 )
 
-from deepmd.entrypoints.train import (
-    parse_auto_sel,
-    parse_auto_sel_ratio,
-    update_one_sel,
+from deepmd.tf.entrypoints.train import (
     update_sel,
-    wrap_up_4,
+)
+from deepmd.tf.utils.update_sel import (
+    UpdateSel,
 )
 
 
 class TestTrain(unittest.TestCase):
+    def setUp(self) -> None:
+        self.update_sel = UpdateSel()
+        return super().setUp()
+
     def test_train_parse_auto_sel(self):
-        self.assertTrue(parse_auto_sel("auto"))
-        self.assertTrue(parse_auto_sel("auto:12"))
-        self.assertTrue(parse_auto_sel("auto:12:13"))
-        self.assertFalse(parse_auto_sel([1, 2]))
-        self.assertFalse(parse_auto_sel("abc:12:13"))
+        self.assertTrue(self.update_sel.parse_auto_sel("auto"))
+        self.assertTrue(self.update_sel.parse_auto_sel("auto:12"))
+        self.assertTrue(self.update_sel.parse_auto_sel("auto:12:13"))
+        self.assertFalse(self.update_sel.parse_auto_sel([1, 2]))
+        self.assertFalse(self.update_sel.parse_auto_sel("abc:12:13"))
 
     def test_train_parse_auto_sel_ratio(self):
-        self.assertEqual(parse_auto_sel_ratio("auto"), 1.1)
-        self.assertEqual(parse_auto_sel_ratio("auto:1.2"), 1.2)
+        self.assertEqual(self.update_sel.parse_auto_sel_ratio("auto"), 1.1)
+        self.assertEqual(self.update_sel.parse_auto_sel_ratio("auto:1.2"), 1.2)
         with self.assertRaises(RuntimeError):
-            parse_auto_sel_ratio("auto:1.2:1.3")
+            self.update_sel.parse_auto_sel_ratio("auto:1.2:1.3")
         with self.assertRaises(RuntimeError):
-            parse_auto_sel_ratio("abc")
+            self.update_sel.parse_auto_sel_ratio("abc")
         with self.assertRaises(RuntimeError):
-            parse_auto_sel_ratio([1, 2, 3])
+            self.update_sel.parse_auto_sel_ratio([1, 2, 3])
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_one_sel(self, sel_mock):
         sel_mock.return_value = [10, 20]
         jdata = {}
         descriptor = {"type": "se_e2_a", "rcut": 6, "sel": "auto"}
-        descriptor = update_one_sel(jdata, descriptor)
+        descriptor = self.update_sel.update_one_sel(jdata, descriptor)
         # self.assertEqual(descriptor['sel'], [11,22])
         self.assertEqual(descriptor["sel"], [12, 24])
         descriptor = {"type": "se_e2_a", "rcut": 6, "sel": "auto:1.5"}
-        descriptor = update_one_sel(jdata, descriptor)
+        descriptor = self.update_sel.update_one_sel(jdata, descriptor)
         # self.assertEqual(descriptor['sel'], [15,30])
         self.assertEqual(descriptor["sel"], [16, 32])
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_sel_hybrid(self, sel_mock):
         sel_mock.return_value = [10, 20]
         jdata = {
@@ -72,7 +75,7 @@ def test_update_sel_hybrid(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_sel(self, sel_mock):
         sel_mock.return_value = [10, 20]
         jdata = {"model": {"descriptor": {"type": "se_e2_a", "rcut": 6, "sel": "auto"}}}
@@ -82,7 +85,7 @@ def test_update_sel(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_sel_atten_auto(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
@@ -106,7 +109,7 @@ def test_update_sel_atten_auto(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_sel_atten_int(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
@@ -130,7 +133,7 @@ def test_update_sel_atten_int(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @patch("deepmd.entrypoints.train.get_sel")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_sel")
     def test_update_sel_atten_list(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
@@ -200,7 +203,7 @@ def test_skip_linear_frozen(self):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @patch("deepmd.entrypoints.train.get_min_nbor_dist")
+    @patch("deepmd.tf.utils.update_sel.UpdateSel.get_min_nbor_dist")
     def test_pairwise_dprc(self, sel_mock):
         sel_mock.return_value = 0.5
         jdata = {
@@ -219,9 +222,9 @@ def test_pairwise_dprc(self, sel_mock):
         self.assertEqual(jdata, expected_out)
 
     def test_wrap_up_4(self):
-        self.assertEqual(wrap_up_4(12), 3 * 4)
-        self.assertEqual(wrap_up_4(13), 4 * 4)
-        self.assertEqual(wrap_up_4(14), 4 * 4)
-        self.assertEqual(wrap_up_4(15), 4 * 4)
-        self.assertEqual(wrap_up_4(16), 4 * 4)
-        self.assertEqual(wrap_up_4(17), 5 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(12), 3 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(13), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(14), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(15), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(16), 4 * 4)
+        self.assertEqual(self.update_sel.wrap_up_4(17), 5 * 4)
diff --git a/source/tests/test_transfer.py b/source/tests/tf/test_transfer.py
similarity index 93%
rename from source/tests/test_transfer.py
rename to source/tests/tf/test_transfer.py
index 27b97571c9..48e9f78e0d 100644
--- a/source/tests/test_transfer.py
+++ b/source/tests/tf/test_transfer.py
@@ -1,24 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import os
-import subprocess as sp
 import unittest
 
 import numpy as np
-from common import (
-    run_dp,
-    tests_path,
-)
 
-from deepmd.env import (
+from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
 
+from .common import (
+    infer_path,
+    run_dp,
+    tests_path,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -30,17 +31,6 @@ def _file_delete(file):
         os.remove(file)
 
 
-def _subprocess_run(command):
-    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b""):
-        if hasattr(line, "decode"):
-            line = line.decode("utf-8")
-        line = line.rstrip()
-        print(line)
-    popen.wait()
-    return popen.returncode
-
-
 class TestTransform(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -48,10 +38,10 @@ def setUpClass(self):
         self.raw_model = str(tests_path / "dp-raw.pb")
         self.new_model = str(tests_path / "dp-new.pb")
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), self.old_model
+            str(infer_path / os.path.join("deeppot.pbtxt")), self.old_model
         )
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "deeppot-1.pbtxt")), self.raw_model
+            str(infer_path / os.path.join("deeppot-1.pbtxt")), self.raw_model
         )
         ret = run_dp(
             "dp transfer -O "
diff --git a/source/tests/test_type_embed.py b/source/tests/tf/test_type_embed.py
similarity index 92%
rename from source/tests/test_type_embed.py
rename to source/tests/tf/test_type_embed.py
index 3e79bad70b..c3f5077943 100644
--- a/source/tests/test_type_embed.py
+++ b/source/tests/tf/test_type_embed.py
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
-from deepmd.utils.type_embed import (
+from deepmd.tf.utils.type_embed import (
     TypeEmbedNet,
     embed_atom_type,
 )
@@ -28,7 +28,7 @@ def test_embed_atom_type(self):
         np.testing.assert_almost_equal(atom_embed, expected_out, 10)
 
     def test_type_embed_net(self):
-        ten = TypeEmbedNet([2, 4, 8], seed=1, uniform_seed=True)
+        ten = TypeEmbedNet(ntypes=2, neuron=[2, 4, 8], seed=1, uniform_seed=True)
         type_embedding = ten.build(2)
         sess = self.cached_session().__enter__()
         sess.run(tf.global_variables_initializer())
diff --git a/source/tests/test_type_one_side.py b/source/tests/tf/test_type_one_side.py
similarity index 98%
rename from source/tests/test_type_one_side.py
rename to source/tests/tf/test_type_one_side.py
index 8e7c173912..5c71a41739 100644
--- a/source/tests/test_type_one_side.py
+++ b/source/tests/tf/test_type_one_side.py
@@ -1,21 +1,22 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
-from common import (
-    DataSystem,
-    gen_data,
-    j_loader,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.descriptor import (
+from deepmd.tf.descriptor import (
     Descriptor,
 )
-from deepmd.env import (
+from deepmd.tf.env import (
     tf,
 )
 
+from .common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
diff --git a/source/tests/test_virtual_type.py b/source/tests/tf/test_virtual_type.py
similarity index 90%
rename from source/tests/test_virtual_type.py
rename to source/tests/tf/test_virtual_type.py
index f7fc3c0127..a3e87a35ed 100644
--- a/source/tests/test_virtual_type.py
+++ b/source/tests/tf/test_virtual_type.py
@@ -1,37 +1,39 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test virtual atomic type."""
+
 import os
 import unittest
 
 import numpy as np
-from common import (
-    gen_data,
-    j_loader,
-    tests_path,
-)
 
-from deepmd.common import (
+from deepmd.tf.common import (
     j_must_have,
 )
-from deepmd.infer import (
+from deepmd.tf.infer import (
     DeepPot,
 )
-from deepmd.utils.convert import (
+from deepmd.tf.utils.convert import (
     convert_pbtxt_to_pb,
 )
-from deepmd.utils.data_system import (
+from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
-from deepmd.utils.neighbor_stat import (
+from deepmd.tf.utils.neighbor_stat import (
     NeighborStat,
 )
 
+from .common import (
+    gen_data,
+    infer_path,
+    j_loader,
+)
+
 
 class TestVirtualType(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         convert_pbtxt_to_pb(
-            str(tests_path / os.path.join("infer", "virtual_type.pbtxt")),
+            str(infer_path / os.path.join("virtual_type.pbtxt")),
             "virtual_type.pb",
         )
         cls.dp = DeepPot("virtual_type.pb")
@@ -137,5 +139,5 @@ def test_data_mixed_type(self):
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data.get_batch()
         # neighbor stat
-        nei_stat = NeighborStat(len(type_map), rcut, one_type=True)
+        nei_stat = NeighborStat(len(type_map), rcut, mixed_type=True)
         min_nbor_dist, max_nbor_size = nei_stat.get_stat(data)
diff --git a/source/tests/train_dos.json b/source/tests/tf/train_dos.json
similarity index 100%
rename from source/tests/train_dos.json
rename to source/tests/tf/train_dos.json
diff --git a/source/tests/water.json b/source/tests/tf/water.json
similarity index 100%
rename from source/tests/water.json
rename to source/tests/tf/water.json
diff --git a/source/tests/water_hybrid.json b/source/tests/tf/water_hybrid.json
similarity index 100%
rename from source/tests/water_hybrid.json
rename to source/tests/tf/water_hybrid.json
diff --git a/source/tests/water_layer_name.json b/source/tests/tf/water_layer_name.json
similarity index 100%
rename from source/tests/water_layer_name.json
rename to source/tests/tf/water_layer_name.json
diff --git a/source/tests/water_multi.json b/source/tests/tf/water_multi.json
similarity index 100%
rename from source/tests/water_multi.json
rename to source/tests/tf/water_multi.json
diff --git a/source/tests/water_se_a.json b/source/tests/tf/water_se_a.json
similarity index 100%
rename from source/tests/water_se_a.json
rename to source/tests/tf/water_se_a.json
diff --git a/source/tests/water_se_a_afparam.json b/source/tests/tf/water_se_a_afparam.json
similarity index 100%
rename from source/tests/water_se_a_afparam.json
rename to source/tests/tf/water_se_a_afparam.json
diff --git a/source/tests/water_se_a_aparam.json b/source/tests/tf/water_se_a_aparam.json
similarity index 100%
rename from source/tests/water_se_a_aparam.json
rename to source/tests/tf/water_se_a_aparam.json
diff --git a/source/tests/water_se_a_ebd.json b/source/tests/tf/water_se_a_ebd.json
similarity index 100%
rename from source/tests/water_se_a_ebd.json
rename to source/tests/tf/water_se_a_ebd.json
diff --git a/source/tests/water_se_a_fparam.json b/source/tests/tf/water_se_a_fparam.json
similarity index 100%
rename from source/tests/water_se_a_fparam.json
rename to source/tests/tf/water_se_a_fparam.json
diff --git a/source/tests/water_se_a_srtab.json b/source/tests/tf/water_se_a_srtab.json
similarity index 100%
rename from source/tests/water_se_a_srtab.json
rename to source/tests/tf/water_se_a_srtab.json
diff --git a/source/tests/water_se_a_type.json b/source/tests/tf/water_se_a_type.json
similarity index 100%
rename from source/tests/water_se_a_type.json
rename to source/tests/tf/water_se_a_type.json
diff --git a/source/tests/water_se_atten.json b/source/tests/tf/water_se_atten.json
similarity index 100%
rename from source/tests/water_se_atten.json
rename to source/tests/tf/water_se_atten.json
diff --git a/source/tests/water_se_atten_compressible_mixed_type.json b/source/tests/tf/water_se_atten_compressible_mixed_type.json
similarity index 100%
rename from source/tests/water_se_atten_compressible_mixed_type.json
rename to source/tests/tf/water_se_atten_compressible_mixed_type.json
diff --git a/source/tests/water_se_atten_mixed_type.json b/source/tests/tf/water_se_atten_mixed_type.json
similarity index 100%
rename from source/tests/water_se_atten_mixed_type.json
rename to source/tests/tf/water_se_atten_mixed_type.json
diff --git a/source/tests/water_se_r.json b/source/tests/tf/water_se_r.json
similarity index 100%
rename from source/tests/water_se_r.json
rename to source/tests/tf/water_se_r.json
diff --git a/source/tests/water_se_t.json b/source/tests/tf/water_se_t.json
similarity index 100%
rename from source/tests/water_se_t.json
rename to source/tests/tf/water_se_t.json
diff --git a/source/tests/wfc.json b/source/tests/tf/wfc.json
similarity index 100%
rename from source/tests/wfc.json
rename to source/tests/tf/wfc.json
diff --git a/source/tests/yaml_inputs/water_se_a_v1.json b/source/tests/tf/yaml_inputs/water_se_a_v1.json
similarity index 100%
rename from source/tests/yaml_inputs/water_se_a_v1.json
rename to source/tests/tf/yaml_inputs/water_se_a_v1.json
diff --git a/source/tests/yaml_inputs/water_se_a_v1.yaml b/source/tests/tf/yaml_inputs/water_se_a_v1.yaml
similarity index 84%
rename from source/tests/yaml_inputs/water_se_a_v1.yaml
rename to source/tests/tf/yaml_inputs/water_se_a_v1.yaml
index 55580daf1e..f0a1e32766 100644
--- a/source/tests/yaml_inputs/water_se_a_v1.yaml
+++ b/source/tests/tf/yaml_inputs/water_se_a_v1.yaml
@@ -2,22 +2,22 @@ model:
   descriptor:
     type: se_a
     sel:
-    - 46
-    - 92
+      - 46
+      - 92
     rcut_smth: 5.8
     rcut: 6.0
     neuron:
-    - 25
-    - 50
-    - 100
+      - 25
+      - 50
+      - 100
     axis_neuron: 16
     resnet_dt: false
     seed: 1
   fitting_net:
     neuron:
-    - 240
-    - 240
-    - 240
+      - 240
+      - 240
+      - 240
     resnet_dt: true
     seed: 1
 learning_rate:
@@ -33,11 +33,11 @@ loss:
   start_pref_v: 0
   limit_pref_v: 0
 training:
-  systems: ['../data/']
+  systems: ["../data/"]
   set_prefix: set
   stop_batch: 1000000
   batch_size:
-  - 1
+    - 1
   seed: 1
   disp_file: lcurve.out
   disp_freq: 100
diff --git a/source/tests/yaml_inputs/water_v1.json b/source/tests/tf/yaml_inputs/water_v1.json
similarity index 100%
rename from source/tests/yaml_inputs/water_v1.json
rename to source/tests/tf/yaml_inputs/water_v1.json
diff --git a/source/tests/yaml_inputs/water_v1.yaml b/source/tests/tf/yaml_inputs/water_v1.yaml
similarity index 83%
rename from source/tests/yaml_inputs/water_v1.yaml
rename to source/tests/tf/yaml_inputs/water_v1.yaml
index 9ddbb89f9c..95000631f6 100644
--- a/source/tests/yaml_inputs/water_v1.yaml
+++ b/source/tests/tf/yaml_inputs/water_v1.yaml
@@ -2,20 +2,20 @@ model:
   descriptor:
     type: loc_frame
     sel_a:
-    - 16
-    - 32
+      - 16
+      - 32
     sel_r:
-    - 30
-    - 60
+      - 30
+      - 60
     rcut: 6.0
     axis_rule: [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
   fitting_net:
     neuron:
-    - 240
-    - 120
-    - 60
-    - 30
-    - 10
+      - 240
+      - 120
+      - 60
+      - 30
+      - 10
     resnet_dt: true
     seed: 1
 learning_rate:
@@ -32,11 +32,11 @@ loss:
   limit_pref_v: 0
 training:
   systems:
-  - ../data/
+    - ../data/
   set_prefix: set
   stop_batch: 1000000
   batch_size:
-  - 4
+    - 4
   seed: 1
   disp_file: lcurve.out
   disp_freq: 100
diff --git a/source/tests/zinc_se_a_mask.json b/source/tests/tf/zinc_se_a_mask.json
similarity index 100%
rename from source/tests/zinc_se_a_mask.json
rename to source/tests/tf/zinc_se_a_mask.json