From 68b86b17280483d42ce3b7b7dbf497dcd5e5922a Mon Sep 17 00:00:00 2001
From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>
Date: Mon, 29 Jul 2024 13:21:10 +0800
Subject: [PATCH 1/6] Revert "Fix incomplete atom map due to the subtle
 symmetry_prec (#4752)" (#4817)

This reverts commit 541f6b3dbd268bf630e491dfb10b819ea46d85da.
---
 .../module_cell/module_symmetry/symmetry.cpp  | 43 +------------------
 source/module_cell/module_symmetry/symmetry.h |  2 -
 2 files changed, 1 insertion(+), 44 deletions(-)
diff --git a/source/module_cell/module_symmetry/symmetry.cpp b/source/module_cell/module_symmetry/symmetry.cpp
index 3cd0b2872c..6c19ef1e35 100644
--- a/source/module_cell/module_symmetry/symmetry.cpp
+++ b/source/module_cell/module_symmetry/symmetry.cpp
@@ -1807,54 +1807,13 @@ void Symmetry::set_atom_map(const Atom* atoms)
                     if (equal(diff1, 0.0) && equal(diff2, 0.0) && equal(diff3, 0.0))
                     {
                         this->isym_rotiat_[k][ia] = ja;
+
                         break;
                     }
                 }
             }
         }
     }
-    this->reset_atom_map(atoms);
-}
-
-inline bool check_full_atom_map(const std::vector<std::vector<int>>& m)
-{
-    if (m.empty()) { return false;
-}
-    for (const auto& v1 : m) { for (const auto& v2 : v1) { if (v2 == -1) { return false; } } }
-    return true;
-}
-
-void Symmetry::reset_atom_map(const Atom* atoms)
-{
-    const double eps_start = this->epsilon;
-    while (!check_full_atom_map(this->isym_rotiat_) && this->epsilon < 100 * eps_start)
-    {
-        this->epsilon *= 2;
-        this->isym_rotiat_.clear();
-        this->set_atom_map(atoms);
-
-        std::cout << "try eps=" << this->epsilon << std::endl;
-        std::cout << check_full_atom_map(this->isym_rotiat_) << std::endl;
-    }
-    if (this->epsilon > 100 * eps_start)
-    {
-        ModuleBase::WARNING("Symmetry::reset_atom_map", "Cannot find all the equivalent atoms even at 100*symmetry_prec.");
-        if (ModuleSymmetry::Symmetry::symm_autoclose)
-        {
-            ModuleBase::WARNING("K_Vectors::ibz_kpoint", "Automatically set symmetry to 0 and continue ...");
-            std::cout << "Automatically set symmetry to 0 and continue ..." << std::endl;
-            ModuleSymmetry::Symmetry::symm_flag = 0;
-        }
-        else {
-            ModuleBase::WARNING_QUIT("Symmetry::reset_atom_map",
-                "Possible solutions: \n \
-1. Refine the lattice parameters in STRU;\n \
-2. Use a different`symmetry_prec`.  \n \
-3. Close symemtry: set `symmetry` to 0 in INPUT. \n \
-4. Set `symmetry_autoclose` to 1 in INPUT to automatically close symmetry when this error occurs.");
-        }
-    }
-    this->epsilon = eps_start;  //recover symmetry_prec after a successful reset
 }
 
 /// @brief return a map that is inequivalent atom index to its symmetry multiplicity
diff --git a/source/module_cell/module_symmetry/symmetry.h b/source/module_cell/module_symmetry/symmetry.h
index d17c8a9386..3626cddc52 100644
--- a/source/module_cell/module_symmetry/symmetry.h
+++ b/source/module_cell/module_symmetry/symmetry.h
@@ -130,8 +130,6 @@ class Symmetry : public Symmetry_Basic
 
     /// @brief  set atom map for each symmetry operation
     void set_atom_map(const Atom* atoms);
-    /// @brief deal with the rarely happen incomplete atom map due to a subtle symmetry_prec
-    void reset_atom_map(const Atom* atoms);
     /// @brief check if all the atoms are movable
     ///  delta_pos symmetrization in relax is only meaningful when all the atoms are movable in all the directions.
     bool is_all_movable(const Atom* atoms, const Statistics& st)const;

From a0d709f38ceaca879fbce048e9d33049463d29a8 Mon Sep 17 00:00:00 2001
From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>
Date: Mon, 29 Jul 2024 14:23:22 +0800
Subject: [PATCH 2/6] v3.7.2 (#4820)

---
 source/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/version.h b/source/version.h
index 7dada15c72..412836cf84 100644
--- a/source/version.h
+++ b/source/version.h
@@ -1,3 +1,3 @@
 #ifndef VERSION
-#define VERSION "v3.7.1"
+#define VERSION "v3.7.2"
 #endif

From 2acef61a0978155a8bb17533314bf3312c668773 Mon Sep 17 00:00:00 2001
From: GRYS <57103981+grysgreat@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:34:29 +0800
Subject: [PATCH 3/6] Refactor: Heterogeneous parallel acceleration hsolver-pw
 force_scc. (#4759)

* Init PR.

* fix some vector and init value comments.

* clean GlobalC

* use doxygen style notation

* computes some parameters in once.

* [pre-commit.ci lite] apply automatic fixes

* use std::vector to replace new and delete?

---------

Co-authored-by: Mohan Chen <mohanchen@pku.edu.cn>
Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
---
 source/Makefile.Objects                       |   1 +
 .../hamilt_lcaodft/FORCE_STRESS.cpp           |   2 +-
 .../hamilt_pwdft/CMakeLists.txt               |   1 +
 .../module_hamilt_pw/hamilt_pwdft/forces.cpp  | 140 +----------
 source/module_hamilt_pw/hamilt_pwdft/forces.h |  11 +-
 .../hamilt_pwdft/forces_scc.cpp               | 231 ++++++++++++++++++
 .../hamilt_pwdft/kernels/cuda/stress_op.cu    |  73 +++++-
 .../hamilt_stodft/sto_forces.cpp              |  50 ++--
 8 files changed, 336 insertions(+), 173 deletions(-)
 create mode 100644 source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 2c726c05e0..e78fb55c9b 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -627,6 +627,7 @@ OBJS_SRCPW=H_Ewald_pw.o\
     forces_us.o\
     forces_nl.o\
     forces_cc.o\
+    forces_scc.o\
     fs_nonlocal_tools.o\
     force_op.o\
     stress_op.o\
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index 338a315ad6..9efc766902 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -764,7 +764,7 @@ void Force_Stress_LCAO<T>::calForcePwPart(ModuleBase::matrix& fvl_dvl,
     //--------------------------------------------------------
     // force due to self-consistent charge.
     //--------------------------------------------------------
-    f_pw.cal_force_scc(fscc, rhopw, vnew, vnew_exist);
+    f_pw.cal_force_scc(fscc, rhopw, vnew, vnew_exist,GlobalC::ucell);
     return;
 }
 
diff --git a/source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt
index 44eca9406e..c2523f872f 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt
+++ b/source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt
@@ -10,6 +10,7 @@ list(APPEND objects
     operator_pw/operator_pw.cpp
     forces_nl.cpp
     forces_cc.cpp
+    forces_scc.cpp
     forces.cpp
     forces_us.cpp
     stress_func_cc.cpp
diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
index 2159e87ebe..cb84387efa 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
@@ -168,7 +168,7 @@ void Forces<FPTYPE, Device>::cal_force(ModuleBase::matrix& force,
     // For PAW, calculated together in paw_cell.calculate_force
     if (!GlobalV::use_paw)
     {
-        this->cal_force_scc(forcescc, rho_basis, elec.vnew, elec.vnew_exist);
+        this->cal_force_scc(forcescc, rho_basis, elec.vnew, elec.vnew_exist,GlobalC::ucell);
     }
     else
     {
@@ -804,144 +804,6 @@ void Forces<FPTYPE, Device>::cal_force_ew(ModuleBase::matrix& forceion,
 }
 
 
-template <typename FPTYPE, typename Device>
-void Forces<FPTYPE, Device>::cal_force_scc(ModuleBase::matrix& forcescc,
-                                           ModulePW::PW_Basis* rho_basis,
-                                           const ModuleBase::matrix& vnew,
-                                           const bool vnew_exist)
-{
-    ModuleBase::TITLE("Forces", "cal_force_scc");
-    ModuleBase::timer::tick("Forces", "cal_force_scc");
-
-    // for orbital free case
-    if (!vnew_exist)
-    {
-        ModuleBase::timer::tick("Forces", "cal_force_scc");
-        return;
-    }
-
-    std::complex<double>* psic = new std::complex<double>[rho_basis->nmaxgr];
-
-    const int nrxx = vnew.nc;
-    const int nspin = vnew.nr;
-
-    if (nspin == 1 || nspin == 4)
-    {
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-        for (int ir = 0; ir < nrxx; ir++)
-        {
-            psic[ir] = vnew(0, ir);
-        }
-    }
-    else
-    {
-        int isup = 0;
-        int isdw = 1;
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-        for (int ir = 0; ir < nrxx; ir++)
-        {
-            psic[ir] = (vnew(isup, ir) + vnew(isdw, ir)) * 0.5;
-        }
-    }
-
-    int ndm = 0;
-
-    for (int it = 0; it < GlobalC::ucell.ntype; it++)
-    {
-        if (ndm < GlobalC::ucell.atoms[it].ncpp.msh)
-        {
-            ndm = GlobalC::ucell.atoms[it].ncpp.msh;
-        }
-    }
-
-    // work space
-    double* rhocgnt = new double[rho_basis->ngg];
-    ModuleBase::GlobalFunc::ZEROS(rhocgnt, rho_basis->ngg);
-
-    rho_basis->real2recip(psic, psic);
-
-    int igg0 = 0;
-    const int ig0 = rho_basis->ig_gge0;
-    if (rho_basis->gg_uniq[0] < 1.0e-8) {
-        igg0 = 1;
-}
-
-    double fact = 2.0;
-    for (int nt = 0; nt < GlobalC::ucell.ntype; nt++)
-    {
-        //		Here we compute the G.ne.0 term
-        const int mesh = GlobalC::ucell.atoms[nt].ncpp.msh;
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-        for (int ig = igg0; ig < rho_basis->ngg; ++ig)
-        {
-            double* aux = new double[mesh];
-            const double gx = sqrt(rho_basis->gg_uniq[ig]) * GlobalC::ucell.tpiba;
-            for (int ir = 0; ir < mesh; ir++)
-            {
-                if (GlobalC::ucell.atoms[nt].ncpp.r[ir] < 1.0e-8)
-                {
-                    aux[ir] = GlobalC::ucell.atoms[nt].ncpp.rho_at[ir];
-                }
-                else
-                {
-                    const double gxx = gx * GlobalC::ucell.atoms[nt].ncpp.r[ir];
-                    aux[ir] = GlobalC::ucell.atoms[nt].ncpp.rho_at[ir] * ModuleBase::libm::sin(gxx) / gxx;
-                }
-            }
-            ModuleBase::Integral::Simpson_Integral(mesh, aux, GlobalC::ucell.atoms[nt].ncpp.rab, rhocgnt[ig]);
-            delete[] aux; // mohan fix bug 2012-03-22
-        }
-
-        int iat = 0;
-        for (int it = 0; it < GlobalC::ucell.ntype; it++)
-        {
-            for (int ia = 0; ia < GlobalC::ucell.atoms[it].na; ia++)
-            {
-                if (nt == it)
-                {
-                    const ModuleBase::Vector3<double> pos = GlobalC::ucell.atoms[it].tau[ia];
-                    double &force0 = forcescc(iat, 0), &force1 = forcescc(iat, 1), &force2 = forcescc(iat, 2);
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+ : force0) reduction(+ : force1) reduction(+ : force2)
-#endif
-                    for (int ig = 0; ig < rho_basis->npw; ++ig)
-                    {
-                        if (ig == ig0) {
-                            continue;
-}
-                        const ModuleBase::Vector3<double> gv = rho_basis->gcar[ig];
-                        const double rhocgntigg = rhocgnt[rho_basis->ig2igg[ig]];
-                        const double arg = ModuleBase::TWO_PI * (gv * pos);
-                        double sinp, cosp;
-                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
-                        const std::complex<double> cpm = std::complex<double>(sinp, cosp) * conj(psic[ig]);
-
-                        force0 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.x * cpm.real();
-                        force1 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.y * cpm.real();
-                        force2 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.z * cpm.real();
-                    }
-                    // std::cout << " forcescc = " << forcescc(iat,0) << " " << forcescc(iat,1) << " " <<
-                    // forcescc(iat,2) << std::endl;
-                }
-                iat++;
-            }
-        }
-    }
-
-    Parallel_Reduce::reduce_pool(forcescc.c, forcescc.nr * forcescc.nc);
-
-    delete[] psic;    // mohan fix bug 2012-03-22
-    delete[] rhocgnt; // mohan fix bug 2012-03-22
-
-    ModuleBase::timer::tick("Forces", "cal_force_scc");
-    return;
-}
 
 template class Forces<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.h b/source/module_hamilt_pw/hamilt_pwdft/forces.h
index d1cb1942af..8c734932a1 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/forces.h
+++ b/source/module_hamilt_pw/hamilt_pwdft/forces.h
@@ -71,7 +71,8 @@ class Forces
     void cal_force_scc(ModuleBase::matrix& forcescc,
                        ModulePW::PW_Basis* rho_basis,
                        const ModuleBase::matrix& v_current,
-                       const bool vnew_exist);
+                       const bool vnew_exist,
+                       const UnitCell& ucell_in);
     void cal_force_us(ModuleBase::matrix& forcenl,
                       ModulePW::PW_Basis* rho_basis,
                       pseudopot_cell_vnl* ppcell_in,
@@ -86,6 +87,14 @@ class Forces
                      FPTYPE* drhocg,
                      ModulePW::PW_Basis* rho_basis,
                      int type); // used in nonlinear core correction stress
+    void deriv_drhoc_scc(const bool& numeric,
+                     const int mesh,
+                     const FPTYPE* r,
+                     const FPTYPE* rab,
+                     const FPTYPE* rhoc,
+                     FPTYPE* drhocg,
+                     ModulePW::PW_Basis* rho_basis,
+                     const UnitCell& ucell_in); // used in nonlinear core correction stress
   private:
     Device* ctx = {};
     base_device::DEVICE_CPU* cpu_ctx = {};
diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp
new file mode 100644
index 0000000000..33b953e5ea
--- /dev/null
+++ b/source/module_hamilt_pw/hamilt_pwdft/forces_scc.cpp
@@ -0,0 +1,231 @@
+#include "forces.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_io/output_log.h"
+#include "stress_func.h"
+// new
+#include "module_base/complexmatrix.h"
+#include "module_base/libm/libm.h"
+#include "module_base/math_integral.h"
+#include "module_base/mathzone.h"
+#include "module_base/timer.h"
+#include "module_base/tool_threading.h"
+#include "module_elecstate/potentials/efield.h"
+#include "module_elecstate/potentials/gatefield.h"
+#include "module_hamilt_general/module_ewald/H_Ewald_pw.h"
+#include "module_hamilt_general/module_surchem/surchem.h"
+#include "module_hamilt_general/module_vdw/vdw.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+#ifdef USE_PAW
+#include "module_cell/module_paw/paw_cell.h"
+#endif
+
+
+template <typename FPTYPE, typename Device>
+void Forces<FPTYPE, Device>::cal_force_scc(ModuleBase::matrix& forcescc,
+                                           ModulePW::PW_Basis* rho_basis,
+                                           const ModuleBase::matrix& vnew,
+                                           const bool vnew_exist,
+                                           const UnitCell& ucell_in) {
+    ModuleBase::TITLE("Forces", "cal_force_scc");
+    ModuleBase::timer::tick("Forces", "cal_force_scc");
+
+    // for orbital free case
+    if (!vnew_exist) {
+        ModuleBase::timer::tick("Forces", "cal_force_scc");
+        return;
+    }
+
+    std::vector<std::complex<double>> psic(rho_basis->nmaxgr);
+
+    const int nrxx = vnew.nc;
+    const int nspin = vnew.nr;
+
+    if (nspin == 1 || nspin == 4) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 1024)
+#endif
+        for (int ir = 0; ir < nrxx; ir++) {
+            psic[ir] = vnew(0, ir);
+        }
+    } else {
+        int isup = 0;
+        int isdw = 1;
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 1024)
+#endif
+        for (int ir = 0; ir < nrxx; ir++) {
+            psic[ir] = (vnew(isup, ir) + vnew(isdw, ir)) * 0.5;
+        }
+    }
+
+    int ndm = 0;
+
+    for (int it = 0; it < ucell_in.ntype; it++) {
+        if (ndm < ucell_in.atoms[it].ncpp.msh) {
+            ndm = ucell_in.atoms[it].ncpp.msh;
+        }
+    }
+
+    // work space
+	std::vector<double> rhocgnt(rho_basis->ngg);
+    ModuleBase::GlobalFunc::ZEROS(rhocgnt.data(), rho_basis->ngg);
+
+    rho_basis->real2recip(psic.data(), psic.data());
+
+    int igg0 = 0;
+    const int ig0 = rho_basis->ig_gge0;
+    if (rho_basis->gg_uniq[0] < 1.0e-8)
+        igg0 = 1;
+
+    double fact = 2.0;
+    for (int nt = 0; nt < ucell_in.ntype; nt++) {
+        //		Here we compute the G.ne.0 term
+        const int mesh = ucell_in.atoms[nt].ncpp.msh;
+        this->deriv_drhoc_scc(GlobalC::ppcell.numeric,
+                            mesh,
+                            ucell_in.atoms[nt].ncpp.r,
+                            ucell_in.atoms[nt].ncpp.rab,
+                            ucell_in.atoms[nt].ncpp.rho_at,
+                            rhocgnt.data(),
+                            rho_basis,
+                            ucell_in);        
+        int iat = 0;
+        for (int it = 0; it < ucell_in.ntype; it++) {
+            for (int ia = 0; ia < ucell_in.atoms[it].na; ia++) {
+                if (nt == it) {
+                    const ModuleBase::Vector3<double> pos
+                        = ucell_in.atoms[it].tau[ia];
+                    double &force0 = forcescc(iat, 0),
+                           &force1 = forcescc(iat, 1),
+                           &force2 = forcescc(iat, 2);
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : force0) reduction(+ : force1) reduction(+ : force2)
+#endif
+                    for (int ig = 0; ig < rho_basis->npw; ++ig) {
+                        if (ig == ig0)
+                            continue;
+                        const ModuleBase::Vector3<double> gv
+                            = rho_basis->gcar[ig];
+                        const double rhocgntigg
+                            = rhocgnt[rho_basis->ig2igg[ig]];
+                        const double arg = ModuleBase::TWO_PI * (gv * pos);
+                        double sinp, cosp;
+                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                        const std::complex<double> cpm
+                            = std::complex<double>(sinp, cosp) * conj(psic[ig]);
+
+                        force0 += fact * rhocgntigg * ucell_in.tpiba
+                                  * gv.x * cpm.real();
+                        force1 += fact * rhocgntigg * ucell_in.tpiba
+                                  * gv.y * cpm.real();
+                        force2 += fact * rhocgntigg * ucell_in.tpiba
+                                  * gv.z * cpm.real();
+                    }
+                }
+                iat++;
+            }
+        }
+    }
+
+
+    Parallel_Reduce::reduce_pool(forcescc.c, forcescc.nr * forcescc.nc);
+
+    ModuleBase::timer::tick("Forces", "cal_force_scc");
+    return;
+}
+
+
+template <typename FPTYPE, typename Device>
+void Forces<FPTYPE, Device>::deriv_drhoc_scc(const bool& numeric,
+                                              const int mesh,
+                                              const FPTYPE* r,
+                                              const FPTYPE* rab,
+                                              const FPTYPE* rhoc,
+                                              FPTYPE* drhocg,
+                                              ModulePW::PW_Basis* rho_basis,
+                                              const UnitCell& ucell_in) {
+    int igl0 = 0;
+    double gx = 0;
+    double rhocg1 = 0;
+    this->device = base_device::get_device_type<Device>(this->ctx);
+    /// the modulus of g for a given shell
+    /// the fourier transform
+    /// auxiliary memory for integration
+	std::vector<double> gx_arr(rho_basis->ngg);
+    double* gx_arr_d = nullptr;
+    /// counter on radial mesh points
+    /// counter on g shells
+    /// lower limit for loop on ngl
+
+    ///
+    /// G=0 term
+    ///
+    if (rho_basis->gg_uniq[0] < 1.0e-8) {
+        drhocg[0] = 0.0;
+        igl0 = 1;
+    } else {
+        igl0 = 0;
+    }
+    
+
+    ///
+    /// G <> 0 term
+    ///]
+
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for (int igl = igl0; igl < rho_basis->ngg; igl++) {
+        gx_arr[igl] = sqrt(rho_basis->gg_uniq[igl]) * ucell_in.tpiba;
+    }
+
+	double *r_d = nullptr;
+    double *rhoc_d = nullptr;
+    double *rab_d = nullptr;
+    double *aux_d = nullptr;
+    double *drhocg_d = nullptr;
+    if (this->device == base_device::GpuDevice) {
+        resmem_var_op()(this->ctx, r_d, mesh);
+        resmem_var_op()(this->ctx, rhoc_d, mesh);
+        resmem_var_op()(this->ctx, rab_d, mesh);
+
+        resmem_var_op()(this->ctx, aux_d, mesh);
+        resmem_var_op()(this->ctx, gx_arr_d, rho_basis->ngg);
+        resmem_var_op()(this->ctx, drhocg_d, rho_basis->ngg);
+
+        syncmem_var_h2d_op()(this->ctx,
+                             this->cpu_ctx,
+                             gx_arr_d,
+                             gx_arr.data(),
+                             rho_basis->ngg);
+        syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, r_d, r, mesh);
+        syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rab_d, rab, mesh);
+        syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, rhoc_d, rhoc, mesh);
+    }
+
+	if(this->device == base_device::GpuDevice) {
+		hamilt::cal_stress_drhoc_aux_op<FPTYPE, Device>()(
+			r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,ucell_in.omega,2);
+		syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0);	
+
+	} else {
+		hamilt::cal_stress_drhoc_aux_op<FPTYPE, Device>()(
+			r,rhoc,gx_arr.data()+igl0,rab,drhocg+igl0,mesh,igl0,rho_basis->ngg-igl0,ucell_in.omega,2);
+
+	}
+
+    delmem_var_op()(this->ctx, r_d);
+    delmem_var_op()(this->ctx, rhoc_d);
+    delmem_var_op()(this->ctx, rab_d);
+    delmem_var_op()(this->ctx, gx_arr_d);
+    delmem_var_op()(this->ctx, drhocg_d);
+    return;
+}
+
+template class Forces<double, base_device::DEVICE_CPU>;
+#if ((defined __CUDA) || (defined __ROCM))
+template class Forces<double, base_device::DEVICE_GPU>;
+#endif
\ No newline at end of file
diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
index a507feefc7..57f24b9bd9 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
+++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
@@ -371,15 +371,17 @@ __global__ void cal_stress_drhoc_aux0(
     
     for( int ir = 0;ir< mesh; ir++)
     {
-        aux_d [ir%2]  = r [ir] * rhoc [ir] * (r [ir] * cos (gx_arr[idx] * r [ir] ) / gx_arr[idx] - sin (gx_arr[idx] * r [ir] ) / pow(gx_arr[idx],2));
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+        aux_d [ir_2]  = r [ir] * rhoc [ir] * (r [ir] * cos (gx_r) / gx_arr[idx] - sin (gx_r) / pow(gx_arr[idx],2));
 
         if(ir==0){
-            f_0 = aux_d[ir%2]*rab[ir];
+            f_0 = aux_d[ir_2]*rab[ir];
         } else if(ir==mesh-2){
-            f_2 = aux_d[ir%2]*rab[ir];
+            f_2 = aux_d[ir_2]*rab[ir];
         } else if(ir==mesh-1) {
-            f_1 = aux_d[ir%2]*rab[ir];
-        } else if(ir%2==0){
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
             const double f1 = aux_d[1]*rab[ir-1];
             rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
         }
@@ -410,16 +412,19 @@ __global__ void cal_stress_drhoc_aux1(
     
     for( int ir = 0;ir< mesh; ir++)
     {
-        aux_d [ir%2] = ir!=0 ? sin(gx_arr[idx] * r[ir]) / (gx_arr[idx] * r[ir]) : 1.0;
-        aux_d [ir%2] = r[ir] * r[ir] * rhoc [ir] * aux_d [ir%2];
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+
+        aux_d [ir_2] = ir!=0 ? sin(gx_r) / (gx_r) : 1.0;
+        aux_d [ir_2] = r[ir] * r[ir] * rhoc [ir] * aux_d [ir_2];
 
         if(ir==0){
-            f_0 = aux_d[ir%2]*rab[ir];
+            f_0 = aux_d[ir_2]*rab[ir];
         } else if(ir==mesh-2){
-            f_2 = aux_d[ir%2]*rab[ir];
+            f_2 = aux_d[ir_2]*rab[ir];
         } else if(ir==mesh-1) {
-            f_1 = aux_d[ir%2]*rab[ir];
-        } else if(ir%2==0){
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
             const double f1 = aux_d[1]*rab[ir-1];
             rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
         }
@@ -434,6 +439,47 @@ __global__ void cal_stress_drhoc_aux1(
 }
 
 
+template <typename FPTYPE>
+__global__ void cal_stress_drhoc_aux2(
+        const FPTYPE* r, const FPTYPE* rhoc, 
+        const FPTYPE *gx_arr, const FPTYPE *rab, FPTYPE *drhocg, 
+        const int mesh, const int igl0, const int ngg, const double omega
+){
+    const double FOUR_PI =  4.0 * 3.14159265358979323846;
+
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+    FPTYPE aux_d[2];
+    FPTYPE rhocg1=0.0, f_0=0.0, f_2=0.0, f_1=0.0;
+
+    if (idx >= ngg) {return;}
+    
+    for( int ir = 0;ir< mesh; ir++)
+    {
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+
+        aux_d [ir_2] = r[ir] < 1.0e-8 ? rhoc [ir] : rhoc [ir] * sin(gx_r) / (gx_r);
+        if(ir==0){
+            f_0 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-2){
+            f_2 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-1) {
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
+            const double f1 = aux_d[1]*rab[ir-1];
+            rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
+        }
+
+    }//ir
+    rhocg1 += f_2+f_2;
+    rhocg1 += rhocg1;
+    rhocg1 += f_0 + f_1;
+    rhocg1/=3.0;
+
+    drhocg [idx] = rhocg1;
+}
+
 
 template <typename FPTYPE>
 void cal_vkb_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
@@ -548,8 +594,11 @@ void cal_stress_drhoc_aux_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
         cal_stress_drhoc_aux1<FPTYPE><<<block,THREADS_PER_BLOCK>>>(
             r,rhoc,gx_arr,rab,drhocg,mesh,igl0,ngg,omega
         );        
+    } else if(type == 2 ){
+        cal_stress_drhoc_aux2<FPTYPE><<<block,THREADS_PER_BLOCK>>>(
+            r,rhoc,gx_arr,rab,drhocg,mesh,igl0,ngg,omega
+        );        
     }
-
     return ;
 }
 
diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp
index 6f48277a3d..5567f51ef9 100644
--- a/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp
+++ b/source/module_hamilt_pw/hamilt_stodft/sto_forces.cpp
@@ -40,7 +40,7 @@ void Sto_Forces::cal_stoforce(ModuleBase::matrix& force,
     this->cal_force_ew(forceion, rho_basis, p_sf);
     this->cal_sto_force_nl(forcenl, wg, pkv, wfc_basis, psi_in, stowf);
     this->cal_force_cc(forcecc, rho_basis, chr);
-    this->cal_force_scc(forcescc, rho_basis, elec.vnew, elec.vnew_exist);
+    this->cal_force_scc(forcescc, rho_basis, elec.vnew, elec.vnew_exist,GlobalC::ucell);
 
     //impose total force = 0
     int iat = 0;
@@ -139,10 +139,12 @@ void Sto_Forces::cal_stoforce(ModuleBase::matrix& force,
         ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "NLCC     FORCE (Ry/Bohr)", forcecc);
         ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "ION      FORCE (Ry/Bohr)", forceion);
         ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "SCC      FORCE (Ry/Bohr)", forcescc);
-        if (GlobalV::EFIELD_FLAG)
+        if (GlobalV::EFIELD_FLAG) {
             ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "EFIELD   FORCE (Ry/Bohr)", force_e);
-        if (GlobalV::GATE_FLAG)
+}
+        if (GlobalV::GATE_FLAG) {
             ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "GATEFIELD   FORCE (Ry/Bohr)", force_gate);
+}
     }
 
     // output force in unit eV/Angstrom
@@ -150,21 +152,23 @@ void Sto_Forces::cal_stoforce(ModuleBase::matrix& force,
     
 	if(GlobalV::TEST_FORCE)
 	{
-        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "LOCAL    FORCE (eV/Angstrom)", forcelc, 0);
-        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "NONLOCAL FORCE (eV/Angstrom)", forcenl, 0);
-        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "NLCC     FORCE (eV/Angstrom)", forcecc, 0);
-        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "ION      FORCE (eV/Angstrom)", forceion, 0);
-        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "SCC      FORCE (eV/Angstrom)", forcescc, 0);
-        if (GlobalV::EFIELD_FLAG)
-            ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "EFIELD   FORCE (eV/Angstrom)", force_e, 0);
-        if (GlobalV::GATE_FLAG)
+        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "LOCAL    FORCE (eV/Angstrom)", forcelc, false);
+        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "NONLOCAL FORCE (eV/Angstrom)", forcenl, false);
+        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "NLCC     FORCE (eV/Angstrom)", forcecc, false);
+        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "ION      FORCE (eV/Angstrom)", forceion, false);
+        ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "SCC      FORCE (eV/Angstrom)", forcescc, false);
+        if (GlobalV::EFIELD_FLAG) {
+            ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "EFIELD   FORCE (eV/Angstrom)", force_e, false);
+}
+        if (GlobalV::GATE_FLAG) {
             ModuleIO::print_force(GlobalV::ofs_running,
                                   GlobalC::ucell,
                                   "GATEFIELD   FORCE (eV/Angstrom)",
                                   force_gate,
-                                  0);
+                                  false);
+}
     }
-    ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "TOTAL-FORCE (eV/Angstrom)", force, 0);
+    ModuleIO::print_force(GlobalV::ofs_running, GlobalC::ucell, "TOTAL-FORCE (eV/Angstrom)", force, false);
     ModuleBase::timer::tick("Sto_Force", "cal_force");
     return;
 }
@@ -181,13 +185,15 @@ void Sto_Forces::cal_sto_force_nl(ModuleBase::matrix& forcenl,
 
     const int nkb = GlobalC::ppcell.nkb;
     int* nchip = stowf.nchip;
-	if(nkb == 0) return; // mohan add 2010-07-25
+	if(nkb == 0) { return; // mohan add 2010-07-25
+}
 	
 	const int npwx = wfc_basis->npwk_max;
 	// vkb1: |Beta(nkb,npw)><Beta(nkb,npw)|psi(nbnd,npw)>
 	ModuleBase::ComplexMatrix vkb1( nkb, npwx );
 	int nksbands = psi_in->get_nbands();
-	if(GlobalV::MY_STOGROUP != 0) nksbands = 0;
+	if(GlobalV::MY_STOGROUP != 0) { nksbands = 0;
+}
     
 
     for (int ik = 0;ik < wfc_basis->nks;ik++)
@@ -243,18 +249,21 @@ void Sto_Forces::cal_sto_force_nl(ModuleBase::matrix& forcenl,
                 std::complex<double>* pvkb = &GlobalC::ppcell.vkb(i,0);
 				if (ipol==0)
 				{
-					for (int ig=0; ig<npw; ig++)
+					for (int ig=0; ig<npw; ig++) {
                         pvkb1[ig] = pvkb[ig] * ModuleBase::NEG_IMAG_UNIT * wfc_basis->getgcar(ik, ig)[0];
+}
                 }
 				if (ipol==1)
 				{
-					for (int ig=0; ig<npw; ig++)
+					for (int ig=0; ig<npw; ig++) {
                         pvkb1[ig] = pvkb[ig] * ModuleBase::NEG_IMAG_UNIT * wfc_basis->getgcar(ik,ig)[1];
+}
                 }
 				if (ipol==2)
 				{
-					for (int ig=0; ig<npw; ig++)
+					for (int ig=0; ig<npw; ig++) {
                         pvkb1[ig] = pvkb[ig] * ModuleBase::NEG_IMAG_UNIT * wfc_basis->getgcar(ik,ig)[2];
+}
                 }
 			}
             //KS orbitals
@@ -278,10 +287,11 @@ void Sto_Forces::cal_sto_force_nl(ModuleBase::matrix& forcenl,
 		for (int ib=0; ib<nbandstot; ib++)
 		{
 			double fac;
-			if(ib < nksbands)
+			if(ib < nksbands) {
 				fac = wg(ik, ib) * 2.0 * GlobalC::ucell.tpiba;
-			else
+			} else {
                 fac = p_kv->wk[ik] * 2.0 * GlobalC::ucell.tpiba;
+}
             int iat = 0;
         	int sum = 0;
 			for (int it=0; it< GlobalC::ucell.ntype; it++)

From 030ed38b61d3c9b71b67237a71e23e66f07b75ce Mon Sep 17 00:00:00 2001
From: GRYS <57103981+grysgreat@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:35:44 +0800
Subject: [PATCH 4/6] Fix: dcu error in pw_force calculation (#4786)

* fix dcu.

* avoid some repeat calculations.

* fix an error of multipul.
---
 .../kernels/rocm/stress_op.hip.cu             | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/rocm/stress_op.hip.cu b/source/module_hamilt_pw/hamilt_pwdft/kernels/rocm/stress_op.hip.cu
index 973a77225d..b55528baa8 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/kernels/rocm/stress_op.hip.cu
+++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/rocm/stress_op.hip.cu
@@ -352,6 +352,134 @@ __global__ void cal_vq_deri(
 }
 
 
+
+template <typename FPTYPE>
+__global__ void cal_stress_drhoc_aux0(
+        const FPTYPE* r, const FPTYPE* rhoc, 
+        const FPTYPE *gx_arr, const FPTYPE *rab, FPTYPE *drhocg, 
+        const int mesh, const int igl0, const int ngg, const double omega
+){
+    const double FOUR_PI =  4.0 * 3.14159265358979323846;
+
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+    FPTYPE aux_d[2];
+    FPTYPE rhocg1=0.0, f_0=0.0, f_2=0.0, f_1=0.0;
+
+    if (idx >= ngg) {return;}
+    
+    for( int ir = 0;ir< mesh; ir++)
+    {
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+        aux_d [ir_2]  = r [ir] * rhoc [ir] * (r [ir] * cos (gx_r) / gx_arr[idx] - sin (gx_r) / (gx_arr[idx] * gx_arr[idx]));
+
+        if(ir==0){
+            f_0 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-2){
+            f_2 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-1) {
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
+            const double f1 = aux_d[1]*rab[ir-1];
+            rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
+        }
+
+    }//ir
+    rhocg1 += f_2+f_2;
+    rhocg1 += rhocg1;
+    rhocg1 += f_0 + f_1;
+    rhocg1/=3.0;
+
+    drhocg [idx] = FOUR_PI / omega * rhocg1;
+}
+
+template <typename FPTYPE>
+__global__ void cal_stress_drhoc_aux1(
+        const FPTYPE* r, const FPTYPE* rhoc, 
+        const FPTYPE *gx_arr, const FPTYPE *rab, FPTYPE *drhocg, 
+        const int mesh, const int igl0, const int ngg, const double omega
+){
+    const double FOUR_PI =  4.0 * 3.14159265358979323846;
+
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+    FPTYPE aux_d[2];
+    FPTYPE rhocg1=0.0, f_0=0.0, f_2=0.0, f_1=0.0;
+
+    if (idx >= ngg) {return;}
+    
+    for( int ir = 0;ir< mesh; ir++)
+    {
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+
+        aux_d [ir_2] = ir!=0 ? sin(gx_r) / (gx_r) : 1.0;
+        aux_d [ir_2] = r[ir] * r[ir] * rhoc [ir] * aux_d [ir_2];
+
+        if(ir==0){
+            f_0 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-2){
+            f_2 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-1) {
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
+            const double f1 = aux_d[1]*rab[ir-1];
+            rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
+        }
+
+    }//ir
+    rhocg1 += f_2+f_2;
+    rhocg1 += rhocg1;
+    rhocg1 += f_0 + f_1;
+    rhocg1/=3.0;
+
+    drhocg [idx] = FOUR_PI * rhocg1 / omega;
+}
+
+
+template <typename FPTYPE>
+__global__ void cal_stress_drhoc_aux2(
+        const FPTYPE* r, const FPTYPE* rhoc, 
+        const FPTYPE *gx_arr, const FPTYPE *rab, FPTYPE *drhocg, 
+        const int mesh, const int igl0, const int ngg, const double omega
+){
+    const double FOUR_PI =  4.0 * 3.14159265358979323846;
+
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+
+    FPTYPE aux_d[2];
+    FPTYPE rhocg1=0.0, f_0=0.0, f_2=0.0, f_1=0.0;
+
+    if (idx >= ngg) {return;}
+    
+    for( int ir = 0;ir< mesh; ir++)
+    {
+        const int ir_2 = ir%2;
+        const FPTYPE gx_r = gx_arr[idx] * r [ir];
+
+        aux_d [ir_2] = r[ir] < 1.0e-8 ? rhoc [ir] : rhoc [ir] * sin(gx_r) / (gx_r);
+        if(ir==0){
+            f_0 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-2){
+            f_2 = aux_d[ir_2]*rab[ir];
+        } else if(ir==mesh-1) {
+            f_1 = aux_d[ir_2]*rab[ir];
+        } else if(ir_2==0){
+            const double f1 = aux_d[1]*rab[ir-1];
+            rhocg1 += f1 + f1 + aux_d[0]*rab[ir];
+        }
+
+    }//ir
+    rhocg1 += f_2+f_2;
+    rhocg1 += rhocg1;
+    rhocg1 += f_0 + f_1;
+    rhocg1/=3.0;
+
+    drhocg [idx] = rhocg1;
+}
+
+
 template <typename FPTYPE>
 void cal_vkb_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
         const base_device::DEVICE_GPU* ctx,
@@ -447,6 +575,31 @@ void cal_vq_deri_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
     return ;
 }
 
+template <typename FPTYPE>
+void cal_stress_drhoc_aux_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
+        const FPTYPE* r, const FPTYPE* rhoc,  
+        const FPTYPE *gx_arr, const FPTYPE *rab, FPTYPE *drhocg, 
+        const int mesh, const int igl0, const int ngg, const double omega,
+        int type
+    )
+{
+    const int block = (ngg + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+
+
+
+    if(type == 0) {
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(cal_stress_drhoc_aux0<FPTYPE>),block,THREADS_PER_BLOCK,0,0,
+            r,rhoc,gx_arr,rab,drhocg,mesh,igl0,ngg,omega
+        );
+    } else if(type == 1 ){
+        hipLaunchKernelGGL(HIP_KERNEL_NAME(cal_stress_drhoc_aux1<FPTYPE>),block,THREADS_PER_BLOCK,0,0,
+            r,rhoc,gx_arr,rab,drhocg,mesh,igl0,ngg,omega
+        );     
+    }
+
+    return ;
+}
+
 
 template struct cal_vq_op<double, base_device::DEVICE_GPU>;
 template struct cal_vq_op<float, base_device::DEVICE_GPU>;
@@ -460,6 +613,8 @@ template struct cal_vkb_op<float, base_device::DEVICE_GPU>;
 template struct cal_vkb_deri_op<double, base_device::DEVICE_GPU>;
 template struct cal_vkb_deri_op<float, base_device::DEVICE_GPU>;
 
+template struct cal_stress_drhoc_aux_op<double, base_device::DEVICE_GPU>;
+template struct cal_stress_drhoc_aux_op<float, base_device::DEVICE_GPU>;
 
 template <>
 void pointer_array_malloc<base_device::DEVICE_GPU>::operator()(

From 62046418c0b76fd6fe090b5f1677ff844881bdd6 Mon Sep 17 00:00:00 2001
From: dzzz2001 <153698752+dzzz2001@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:40:39 +0800
Subject: [PATCH 5/6] use openmp to accelerate fvnl_dbeta_gamma.cpp (#4814)

---
 .../hamilt_lcaodft/fvnl_dbeta_gamma.cpp       | 58 +++++++++++++------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp
index 9a874a8bcc..2594879c22 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp
@@ -20,33 +20,38 @@ void Force_LCAO<double>::cal_fvnl_dbeta(const elecstate::DensityMatrix<double, d
     ModuleBase::TITLE("Force_LCAO", "cal_fvnl_dbeta");
     ModuleBase::timer::tick("Force_LCAO", "cal_fvnl_dbeta");
 
-    double r0[3];
-    double r1[3];
-
+// use schedule(dynamic) for load balancing because adj_num is various
+#pragma omp parallel
+{ 
+    ModuleBase::matrix local_svnl_dbeta(3, 3);
+    #pragma omp for schedule(dynamic)
     for (int iat = 0; iat < ucell.nat; iat++)
     {
         const int it = ucell.iat2it[iat];
         const int ia = ucell.iat2ia[iat];
         const int T0 = it;
         const int I0 = ia;
+        double r0[3]{0};
+        double r1[3]{0};
 
         const ModuleBase::Vector3<double> tau0 = ucell.atoms[it].tau[ia];
         // find ajacent atom of atom ia
         // gd.Find_atom( ucell.atoms[it].tau[ia] );
-        gd.Find_atom(ucell, tau0, it, ia);
+        AdjacentAtomInfo adjs;
+        gd.Find_atom(ucell, tau0, it, ia, &adjs);
         const double Rcut_Beta = ucell.infoNL.Beta[it].get_rcut_max();
 
         std::vector<std::unordered_map<int, std::vector<std::vector<double>>>> nlm_tot;
-        nlm_tot.resize(gd.getAdjacentNum() + 1); // this saves <psi_i|beta> and <psi_i|\nabla|beta>
+        nlm_tot.resize(adjs.adj_num + 1); // this saves <psi_i|beta> and <psi_i|\nabla|beta>
 
         // Step 1 : Calculate and save <psi_i|beta> and <psi_i|\nabla|beta>
-        for (int ad1 = 0; ad1 < gd.getAdjacentNum() + 1; ad1++)
+        for (int ad1 = 0; ad1 < adjs.adj_num + 1; ad1++)
         {
-            const int T1 = gd.getType(ad1);
+            const int T1 = adjs.ntype[ad1];
             const Atom* atom1 = &ucell.atoms[T1];
-            const int I1 = gd.getNatom(ad1);
+            const int I1 = adjs.natom[ad1];
             const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-            const ModuleBase::Vector3<double> tau1 = gd.getAdjacentTau(ad1);
+            const ModuleBase::Vector3<double> tau1 = adjs.adjacent_tau[ad1];
             const double Rcut_AO1 = orb.Phi[T1].getRcut();
 
             nlm_tot[ad1].clear();
@@ -87,22 +92,22 @@ void Force_LCAO<double>::cal_fvnl_dbeta(const elecstate::DensityMatrix<double, d
         } // ad
 
         // Step 2 : sum to get beta<psi_i|beta><beta|\nabla|psi_j>
-        for (int ad1 = 0; ad1 < gd.getAdjacentNum() + 1; ++ad1)
+        for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
         {
-            const int T1 = gd.getType(ad1);
+            const int T1 = adjs.ntype[ad1];
             const Atom* atom1 = &ucell.atoms[T1];
-            const int I1 = gd.getNatom(ad1);
+            const int I1 = adjs.natom[ad1];
             const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-            const ModuleBase::Vector3<double> tau1 = gd.getAdjacentTau(ad1);
+            const ModuleBase::Vector3<double> tau1 = adjs.adjacent_tau[ad1];
             const double Rcut_AO1 = orb.Phi[T1].getRcut();
 
-            for (int ad2 = 0; ad2 < gd.getAdjacentNum() + 1; ad2++)
+            for (int ad2 = 0; ad2 < adjs.adj_num + 1; ad2++)
             {
-                const int T2 = gd.getType(ad2);
+                const int T2 = adjs.ntype[ad2];
                 const Atom* atom2 = &ucell.atoms[T2];
-                const int I2 = gd.getNatom(ad2);
+                const int I2 = adjs.natom[ad2];
                 const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
-                const ModuleBase::Vector3<double> tau2 = gd.getAdjacentTau(ad2);
+                const ModuleBase::Vector3<double> tau2 = adjs.adjacent_tau[ad2];
                 const double Rcut_AO2 = orb.Phi[T2].getRcut();
 
                 const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
@@ -207,8 +212,8 @@ void Force_LCAO<double>::cal_fvnl_dbeta(const elecstate::DensityMatrix<double, d
                             {
                                 for (int jpol = ipol; jpol < 3; jpol++)
                                 {
-                                    svnl_dbeta(ipol, jpol)
-                                        += sum / 2.0 * (nlm[ipol] * r0[jpol] + nlm_t[ipol] * r1[jpol]);
+                                    local_svnl_dbeta(ipol, jpol)
+                                    += sum / 2.0 * (nlm[ipol] * r0[jpol] + nlm_t[ipol] * r1[jpol]);
                                 }
                             }
                         }
@@ -218,6 +223,21 @@ void Force_LCAO<double>::cal_fvnl_dbeta(const elecstate::DensityMatrix<double, d
         }             // ad1
     }                 // iat
 
+    // sum up local_svnl_dbeta to svnl_dbeta
+    if (isstress)
+    {
+        #pragma omp critical
+        {
+            for (int ipol = 0; ipol < 3; ipol++)
+            {
+                for (int jpol = ipol; jpol < 3; jpol++)
+                {
+                    svnl_dbeta(ipol, jpol) += local_svnl_dbeta(ipol, jpol);
+                }
+            }
+        }
+    }
+}
     if (isstress)
     {
         StressTools::stress_fill(ucell.lat0, ucell.omega, svnl_dbeta);

From 3845b7658de156bf68e7d073cddd29c451274fa2 Mon Sep 17 00:00:00 2001
From: Qianrui Liu <76200646+Qianruipku@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:43:45 +0800
Subject: [PATCH 6/6] Refactor: Remove INPUT class (#4815)

* Refactor: update md parameters to apply const param_in

* Tests: update unittests

* Tests: update tests

* Tests: update other tests

* [pre-commit.ci lite] apply automatic fixes

* refactor: remove all INPUT

* [pre-commit.ci lite] apply automatic fixes

* delete input.h

* [pre-commit.ci lite] apply automatic fixes

* fix compile failure

* [pre-commit.ci lite] apply automatic fixes

---------

Co-authored-by: YuLiu98 <liuyu@stu.pku.edu.cn>
Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
---
 source/Makefile.Objects                       |   4 +-
 source/driver.cpp                             |   2 -
 source/driver_run.cpp                         |   1 -
 .../potentials/H_TDDFT_pw.cpp                 |  20 +-
 .../module_elecstate/potentials/H_TDDFT_pw.h  |   1 -
 source/module_esolver/esolver.h               |   1 -
 source/module_esolver/esolver_fp.cpp          |   1 -
 source/module_esolver/esolver_ks.cpp          |   1 -
 source/module_esolver/esolver_sdft_pw.cpp     |   6 +-
 source/module_esolver/print_funcs.h           |   2 +-
 .../module_surchem/test/setcell.h             |   1 -
 .../module_vdw/test/vdw_test.cpp              |   1 -
 .../operator_lcao/test/test_dftu.cpp          |   8 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  |   5 +-
 .../module_dftu/dftu_tools.cpp                |  50 ++-
 .../module_dftu/dftu_yukawa.cpp               |  61 +--
 .../hamilt_stodft/sto_elecond.cpp             | 377 ++++++++++++++----
 .../hamilt_stodft/sto_elecond.h               |  57 ++-
 .../hamilt_stodft/sto_iter.cpp                |   2 +-
 source/module_io/CMakeLists.txt               |   2 -
 source/module_io/input.cpp                    |   3 -
 source/module_io/input.h                      |  31 --
 source/module_io/input_conv.cpp               |  15 +-
 source/module_io/input_conv_tmp.cpp           |  27 --
 source/module_io/json_output/init_info.cpp    |   1 -
 source/module_io/json_output/output_info.cpp  |   1 -
 source/module_io/json_output/readin_info.cpp  |   1 -
 source/module_io/parse_args.cpp               |   1 -
 source/module_io/test/CMakeLists.txt          |   2 +-
 source/module_md/test/CMakeLists.txt          |   1 -
 30 files changed, 448 insertions(+), 238 deletions(-)
 mode change 100755 => 100644 source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
 delete mode 100644 source/module_io/input.cpp
 delete mode 100644 source/module_io/input.h
 delete mode 100644 source/module_io/input_conv_tmp.cpp

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index e78fb55c9b..6c9de9c849 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -460,9 +460,7 @@ OBJS_XC=xc_functional.o\
     xc_funct_corr_gga.o\
     xc_funct_hcth.o\
 
-OBJS_IO=input.o\
-    input_conv_tmp.o\
-    input_conv.o\
+OBJS_IO=input_conv.o\
     berryphase.o\
     bessel_basis.o\
     cal_test.o\
diff --git a/source/driver.cpp b/source/driver.cpp
index 96f5104404..e48880e181 100644
--- a/source/driver.cpp
+++ b/source/driver.cpp
@@ -6,7 +6,6 @@
 #include "module_esolver/esolver.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_io/cal_test.h"
-#include "module_io/input.h"
 #include "module_io/input_conv.h"
 #include "module_io/para_json.h"
 #include "module_io/print_info.h"
@@ -132,7 +131,6 @@ void Driver::reading()
     read_input.write_parameters(PARAM, ss1.str());
 
     // (*temp*) copy the variables from INPUT to each class
-    Input_Conv::tmp_convert();
     Input_Conv::Convert();
 
     // (4) define the 'DIAGONALIZATION' world in MPI
diff --git a/source/driver_run.cpp b/source/driver_run.cpp
index 68dfab2539..8ae1dc89b6 100644
--- a/source/driver_run.cpp
+++ b/source/driver_run.cpp
@@ -2,7 +2,6 @@
 #include "module_cell/check_atomic_stru.h"
 #include "module_cell/module_neighbor/sltk_atom_arrange.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_io/input.h"
 #include "module_parameter/parameter.h"
 #include "module_io/para_json.h"
 #include "module_io/print_info.h"
diff --git a/source/module_elecstate/potentials/H_TDDFT_pw.cpp b/source/module_elecstate/potentials/H_TDDFT_pw.cpp
index 104898432f..b0a33902d6 100644
--- a/source/module_elecstate/potentials/H_TDDFT_pw.cpp
+++ b/source/module_elecstate/potentials/H_TDDFT_pw.cpp
@@ -4,7 +4,6 @@
 #include "module_base/timer.h"
 #include "module_hamilt_lcao/module_tddft/evolve_elec.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_io/input.h"
 #include "module_io/input_conv.h"
 
 namespace elecstate
@@ -80,7 +79,8 @@ void H_TDDFT_pw::cal_fixed_v(double *vl_pseudo)
     ModuleBase::TITLE("H_TDDFT_pw", "cal_fixed_v");
 
     //skip if velocity_gague
-    if(stype==1)return;
+    if(stype==1) {return;
+}
 
     // time evolve
     H_TDDFT_pw::istep++;
@@ -233,7 +233,7 @@ int H_TDDFT_pw::check_ncut(int t_type)
     }
     return ncut;
 }
-void H_TDDFT_pw::update_At(void)
+void H_TDDFT_pw::update_At()
 {
     std::cout << "calculate electric potential" << std::endl;
     // time evolve
@@ -345,7 +345,8 @@ double H_TDDFT_pw::cal_v_time_Gauss(const bool last)
 
     double gauss_t = (istep_int - t0 * ncut) * dt_int;
     vext_time = cos(omega * gauss_t + phase) * exp(-gauss_t * gauss_t * 0.5 / (sigma * sigma)) * amp;
-    if(last)gauss_count++;
+    if(last) {gauss_count++;
+}
 
     return vext_time;
 }
@@ -375,7 +376,8 @@ double H_TDDFT_pw::cal_v_time_trapezoid(const bool last)
     }
 
     vext_time = vext_time * amp * cos(omega * istep_int * dt_int + phase);
-    if(last)trape_count++;
+    if(last) {trape_count++;
+}
 
     return vext_time;
 }
@@ -392,7 +394,8 @@ double H_TDDFT_pw::cal_v_time_trigonometric(const bool last)
     const double timenow = istep_int * dt_int;
 
     vext_time = amp * cos(omega1 * timenow + phase1) * sin(omega2 * timenow + phase2) * sin(omega2 * timenow + phase2);
-    if(last)trigo_count++;
+    if(last) {trigo_count++;
+}
 
     return vext_time;
 }
@@ -402,10 +405,11 @@ double H_TDDFT_pw::cal_v_time_heaviside()
     double t0 = *(heavi_t0.begin() + heavi_count);
     double amp = *(heavi_amp.begin() + heavi_count);
     double vext_time = 0.0;
-    if (istep < t0)
+    if (istep < t0) {
         vext_time = amp;
-    else if (istep >= t0)
+    } else if (istep >= t0) {
         vext_time = 0.0;
+}
     heavi_count++;
 
     return vext_time;
diff --git a/source/module_elecstate/potentials/H_TDDFT_pw.h b/source/module_elecstate/potentials/H_TDDFT_pw.h
index 8b3ffcaf42..4925da63bd 100644
--- a/source/module_elecstate/potentials/H_TDDFT_pw.h
+++ b/source/module_elecstate/potentials/H_TDDFT_pw.h
@@ -1,7 +1,6 @@
 #ifndef H_TDDFT_PW_H
 #define H_TDDFT_PW_H
 
-#include "module_io/input.h"
 #include "module_io/input_conv.h"
 #include "pot_base.h"
 
diff --git a/source/module_esolver/esolver.h b/source/module_esolver/esolver.h
index 214db6b13c..7e6f984a1e 100644
--- a/source/module_esolver/esolver.h
+++ b/source/module_esolver/esolver.h
@@ -3,7 +3,6 @@
 
 #include "module_base/matrix.h"
 #include "module_cell/unitcell.h"
-#include "module_io/input.h"
 #include "module_parameter/parameter.h"
 
 namespace ModuleESolver
diff --git a/source/module_esolver/esolver_fp.cpp b/source/module_esolver/esolver_fp.cpp
index 34c75a602e..cdbc7d2040 100644
--- a/source/module_esolver/esolver_fp.cpp
+++ b/source/module_esolver/esolver_fp.cpp
@@ -2,7 +2,6 @@
 
 #include "module_base/global_variable.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_io/input.h"
 #include "module_parameter/parameter.h"
 namespace ModuleESolver
 {
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 736110f61d..c77e3a2cd3 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -9,7 +9,6 @@
 #include <iostream>
 
 #include "module_base/timer.h"
-#include "module_io/input.h"
 #include "module_io/json_output/init_info.h"
 #include "module_io/print_info.h"
 #include "module_parameter/parameter.h"
diff --git a/source/module_esolver/esolver_sdft_pw.cpp b/source/module_esolver/esolver_sdft_pw.cpp
index c286984685..5252de71e3 100644
--- a/source/module_esolver/esolver_sdft_pw.cpp
+++ b/source/module_esolver/esolver_sdft_pw.cpp
@@ -315,15 +315,13 @@ void ESolver_SDFT_PW::after_all_runners()
                                 this->p_hamilt,
                                 (hsolver::HSolverPW_SDFT*)phsol,
                                 &stowf);
-        sto_elecond
-            .decide_nche(PARAM.inp.cond_dt, INPUT.cond_dtbatch, 1e-8, this->nche_sto, PARAM.inp.emin_sto, PARAM.inp.emax_sto);
+        sto_elecond.decide_nche(PARAM.inp.cond_dt, 1e-8, this->nche_sto, PARAM.inp.emin_sto, PARAM.inp.emax_sto);
         sto_elecond.sKG(PARAM.inp.cond_smear,
                         PARAM.inp.cond_fwhm,
                         PARAM.inp.cond_wcut,
                         PARAM.inp.cond_dw,
                         PARAM.inp.cond_dt,
                         PARAM.inp.cond_nonlocal,
-                        INPUT.cond_dtbatch,
                         PARAM.inp.npart_sto);
     }
 }
@@ -353,7 +351,7 @@ void ESolver_SDFT_PW::nscf()
 
     const int iter = 1;
 
-    const double diag_thr = std::max(std::min(1e-5, 0.1 *PARAM.inp.scf_thr / std::max(1.0, GlobalV::nelec)), 1e-12);
+    const double diag_thr = std::max(std::min(1e-5, 0.1 * PARAM.inp.scf_thr / std::max(1.0, GlobalV::nelec)), 1e-12);
 
     std::cout << " DIGA_THR          : " << diag_thr << std::endl;
 
diff --git a/source/module_esolver/print_funcs.h b/source/module_esolver/print_funcs.h
index e39de71d1b..5a13df560a 100644
--- a/source/module_esolver/print_funcs.h
+++ b/source/module_esolver/print_funcs.h
@@ -1,7 +1,7 @@
 #ifndef PRINT_FUNCTIONS_H
 #define PRINT_FUNCTIONS_H
 
-#include "module_io/input.h"
+#include "module_parameter/input_parameter.h"
 #include "module_basis/module_pw/pw_basis_k.h"
 
 namespace Print_functions
diff --git a/source/module_hamilt_general/module_surchem/test/setcell.h b/source/module_hamilt_general/module_surchem/test/setcell.h
index 07594e164e..b758b62af7 100644
--- a/source/module_hamilt_general/module_surchem/test/setcell.h
+++ b/source/module_hamilt_general/module_surchem/test/setcell.h
@@ -7,7 +7,6 @@
 #include "module_cell/module_neighbor/sltk_atom_arrange.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_cell/unitcell.h"
-#include "module_io/input.h"
 #include "module_hamilt_pw/hamilt_pwdft/structure_factor.h"
 
 namespace GlobalC
diff --git a/source/module_hamilt_general/module_vdw/test/vdw_test.cpp b/source/module_hamilt_general/module_vdw/test/vdw_test.cpp
index 35c040f494..3eb7c1e5ec 100644
--- a/source/module_hamilt_general/module_vdw/test/vdw_test.cpp
+++ b/source/module_hamilt_general/module_vdw/test/vdw_test.cpp
@@ -1,6 +1,5 @@
 #include "module_cell/unitcell.h"
 #include "module_cell/setup_nonlocal.h"
-#include "module_io/input.h"
 #include "module_base/mathzone.h"
 #include "module_base/vector3.h"
 #include"gtest/gtest.h"
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
old mode 100755
new mode 100644
index 1b0fa6e23a..7a3dc9f902
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
@@ -95,8 +95,8 @@ class DFTUTest : public ::testing::Test
                 GlobalC::dftu.locale[iat][l][0][1].create(2 * l + 1, 2 * l + 1);
             }
         }
-        GlobalC::dftu.U = &U_test;
-        GlobalC::dftu.orbital_corr = &orbital_c_test;
+        GlobalC::dftu.U = {U_test};
+        GlobalC::dftu.orbital_corr = {orbital_c_test};
 
         PARAM.input.onsite_radius = 1.0;
     }
@@ -150,7 +150,7 @@ TEST_F(DFTUTest, constructHRd2d)
     // test for nspin=1
     GlobalV::NSPIN = 1;
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    hamilt::HS_Matrix_K<double> hsk(paraV, 1);
+    hamilt::HS_Matrix_K<double> hsk(paraV, true);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     // check some input values
@@ -218,7 +218,7 @@ TEST_F(DFTUTest, constructHRd2cd)
     // test for nspin=2
     GlobalV::NSPIN = 2;
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, true);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index c673b36eb9..6b3bbe2eff 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -16,6 +16,7 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/force_stress_arrays.h" // mohan add 2024-06-15
 
 #include <string>
+#include <vector>
 
 //==========================================================
 // CLASS :
@@ -47,9 +48,9 @@ class DFTU
     void uramping_update(); // update U by uramping
     bool u_converged(); // check if U is converged
 
-    double* U; // U (Hubbard parameter U)
+    std::vector<double> U = {}; // U (Hubbard parameter U)
     std::vector<double> U0; // U0 (target Hubbard parameter U0)
-    int* orbital_corr; //
+    std::vector<int> orbital_corr = {}; //
     double uramping; // increase U by uramping, default is -1.0
     int omc; // occupation matrix control
     int mixing_dftu; //whether to mix locale
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp b/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
index 9efbdb224f..f2590db530 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
@@ -12,28 +12,32 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
 
     for (int it = 0; it < GlobalC::ucell.ntype; ++it)
     {
-        if (INPUT.orbital_corr[it] == -1)
+        if (PARAM.inp.orbital_corr[it] == -1) {
             continue;
+}
         for (int ia = 0; ia < GlobalC::ucell.atoms[it].na; ia++)
         {
             const int iat = GlobalC::ucell.itia2iat(it, ia);
             for (int L = 0; L <= GlobalC::ucell.atoms[it].nwl; L++)
             {
-                if (L != INPUT.orbital_corr[it])
+                if (L != PARAM.inp.orbital_corr[it]) {
                     continue;
+}
 
                 for (int n = 0; n < GlobalC::ucell.atoms[it].l_nchi[L]; n++)
                 {
-                    if (n != 0)
+                    if (n != 0) {
                         continue;
+}
 
                     for (int m1 = 0; m1 < 2 * L + 1; m1++)
                     {
                         for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                         {
                             const int mu = this->paraV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
-                            if (mu < 0)
+                            if (mu < 0) {
                                 continue;
+}
 
                             for (int m2 = 0; m2 < 2 * L + 1; m2++)
                             {
@@ -41,8 +45,9 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
                                 {
                                     const int nu
                                         = this->paraV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
-                                    if (nu < 0)
+                                    if (nu < 0) {
                                         continue;
+}
 
                                     int m1_all = m1 + (2 * L + 1) * ipol1;
                                     int m2_all = m2 + (2 * L + 1) * ipol2;
@@ -68,36 +73,41 @@ void DFTU::cal_VU_pot_mat_real(const int spin, const bool newlocale, double* VU)
 
     for (int it = 0; it < GlobalC::ucell.ntype; ++it)
     {
-        if (INPUT.orbital_corr[it] == -1)
+        if (PARAM.inp.orbital_corr[it] == -1) {
             continue;
+}
         for (int ia = 0; ia < GlobalC::ucell.atoms[it].na; ia++)
         {
             const int iat = GlobalC::ucell.itia2iat(it, ia);
             for (int L = 0; L <= GlobalC::ucell.atoms[it].nwl; L++)
             {
-                if (L != INPUT.orbital_corr[it])
+                if (L != PARAM.inp.orbital_corr[it]) {
                     continue;
+}
 
                 for (int n = 0; n < GlobalC::ucell.atoms[it].l_nchi[L]; n++)
                 {
-                    if (n != 0)
+                    if (n != 0) {
                         continue;
+}
 
                     for (int m1 = 0; m1 < 2 * L + 1; m1++)
                     {
                         for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                         {
                             const int mu = this->paraV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
-                            if (mu < 0)
+                            if (mu < 0) {
                                 continue;
+}
                             for (int m2 = 0; m2 < 2 * L + 1; m2++)
                             {
                                 for (int ipol2 = 0; ipol2 < GlobalV::NPOL; ipol2++)
                                 {
                                     const int nu
                                         = this->paraV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
-                                    if (nu < 0)
+                                    if (nu < 0) {
                                         continue;
+}
 
                                     int m1_all = m1 + (2 * L + 1) * ipol1;
                                     int m2_all = m2 + (2 * L + 1) * ipol2;
@@ -145,37 +155,41 @@ double DFTU::get_onebody_eff_pot(const int T,
         {
             if (Yukawa)
             {
-                if (m0 == m1)
+                if (m0 == m1) {
                     VU = (this->U_Yukawa[T][L][N] - this->J_Yukawa[T][L][N])
                          * (0.5 - this->locale[iat][L][N][spin](m0, m1));
-                else
+                } else {
                     VU = -(this->U_Yukawa[T][L][N] - this->J_Yukawa[T][L][N]) * this->locale[iat][L][N][spin](m0, m1);
+}
             }
             else
             {
-                if (m0 == m1)
+                if (m0 == m1) {
                     VU = (this->U[T]) * (0.5 - this->locale[iat][L][N][spin](m0, m1));
-                else
+                } else {
                     VU = -(this->U[T]) * this->locale[iat][L][N][spin](m0, m1);
+}
             }
         }
         else
         {
             if (Yukawa)
             {
-                if (m0 == m1)
+                if (m0 == m1) {
                     VU = (this->U_Yukawa[T][L][N] - this->J_Yukawa[T][L][N])
                          * (0.5 - this->locale_save[iat][L][N][spin](m0, m1));
-                else
+                } else {
                     VU = -(this->U_Yukawa[T][L][N] - this->J_Yukawa[T][L][N])
                          * this->locale_save[iat][L][N][spin](m0, m1);
+}
             }
             else
             {
-                if (m0 == m1)
+                if (m0 == m1) {
                     VU = (this->U[T]) * (0.5 - this->locale_save[iat][L][N][spin](m0, m1));
-                else
+                } else {
                     VU = -(this->U[T]) * this->locale_save[iat][L][N][spin](m0, m1);
+}
             }
         }
 
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_yukawa.cpp b/source/module_hamilt_lcao/module_dftu/dftu_yukawa.cpp
index cff6efe8ee..e6ea99857f 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_yukawa.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_yukawa.cpp
@@ -13,8 +13,8 @@
 #include <iomanip>
 #include <iostream>
 #include <sstream>
-#include <stdio.h>
-#include <string.h>
+#include <cstdio>
+#include <cstring>
 
 namespace ModuleDFTU
 {
@@ -33,7 +33,8 @@ void DFTU::cal_yukawa_lambda(double** rho, const int& nrxx)
     double sum_rho_lambda = 0.0;
     for (int is = 0; is < GlobalV::NSPIN; is++)
     {
-        if(GlobalV::NSPIN == 4 && is > 0) continue;// for non-collinear spin case, first spin contains the charge density
+        if(GlobalV::NSPIN == 4 && is > 0) { continue;// for non-collinear spin case, first spin contains the charge density
+}
         for (int ir = 0; ir < nrxx; ir++)
         {
             double rho_ir = rho[is][ir];
@@ -111,8 +112,9 @@ void DFTU::cal_slater_Fk(const int L, const int T)
 void DFTU::cal_slater_UJ(double** rho, const int& nrxx)
 {
     ModuleBase::TITLE("DFTU", "cal_slater_UJ");
-    if (!Yukawa)
+    if (!Yukawa) {
         return;
+}
 
     this->cal_yukawa_lambda(rho, nrxx);
 
@@ -138,16 +140,18 @@ void DFTU::cal_slater_UJ(double** rho, const int& nrxx)
         {
             const int N = GlobalC::ucell.atoms[T].l_nchi[L];
 
-            if (L >= INPUT.orbital_corr[T] && INPUT.orbital_corr[T] != -1)
+            if (L >= PARAM.inp.orbital_corr[T] && PARAM.inp.orbital_corr[T] != -1)
             {
-                if (L != INPUT.orbital_corr[T])
+                if (L != PARAM.inp.orbital_corr[T]) {
                     continue;
+}
                 this->cal_slater_Fk(L, T);
 
                 for (int n = 0; n < N; n++)
                 {
-                    if (n != 0)
+                    if (n != 0) {
                         continue;
+}
 
                     switch (L)
                     {
@@ -162,8 +166,9 @@ void DFTU::cal_slater_UJ(double** rho, const int& nrxx)
                         break;
 
                     case 3: // f electrons
-                        if (Yukawa)
+                        if (Yukawa) {
                             this->U_Yukawa[T][L][n] = this->Fk[T][L][n][0];
+}
                         this->J_Yukawa[T][L][n] = (286.0 * this->Fk[T][L][n][1] + 195.0 * this->Fk[T][L][n][2]
                                                    + 250.0 * this->Fk[T][L][n][3])
                                                   / 6435.0;
@@ -191,33 +196,37 @@ double DFTU::spherical_Bessel(const int k, const double r, const double lambda)
     double x = r * lambda;
     if (k == 0)
     {
-        if (x < 1.0e-3)
+        if (x < 1.0e-3) {
             val = 1 + pow(x, 2) / 6.0;
-        else
+        } else {
             val = sinh(x) / x;
+}
     }
     else if (k == 2)
     {
-        if (x < 1.0e-2)
+        if (x < 1.0e-2) {
             val = -pow(x, 2) / 15.0 - pow(x, 4) / 210.0 - pow(x, 6) / 7560.0;
-        else
+        } else {
             val = 3 * cosh(x) / pow(x, 2) + (-3 - pow(x, 2)) * sinh(x) / pow(x, 3);
+}
     }
     else if (k == 4)
     {
-        if (x < 5.0e-1)
+        if (x < 5.0e-1) {
             val = pow(x, 4) / 945.0 + pow(x, 6) / 20790.0 + pow(x, 8) / 1081080.0 + pow(x, 10) / 97297200.0;
-        else
+        } else {
             val = -5 * (21 + 2 * pow(x, 2)) * cosh(x) / pow(x, 4)
                   + (105 + 45 * pow(x, 2) + pow(x, 4)) * sinh(x) / pow(x, 5);
+}
     }
     else if (k == 6)
     {
-        if (x < 9.0e-1)
+        if (x < 9.0e-1) {
             val = -pow(x, 6) / 135135.0 - pow(x, 8) / 4054050.0 - pow(x, 10) / 275675400.0;
-        else
+        } else {
             val = 21 * (495 + 60 * pow(x, 2) + pow(x, 4)) * cosh(x) / pow(x, 6)
                   + (-10395 - 4725 * pow(x, 2) - 210 * pow(x, 4) - pow(x, 6)) * sinh(x) / pow(x, 7);
+}
     }
     return val;
 }
@@ -230,36 +239,40 @@ double DFTU::spherical_Hankel(const int k, const double r, const double lambda)
     double x = r * lambda;
     if (k == 0)
     {
-        if (x < 1.0e-3)
+        if (x < 1.0e-3) {
             val = -1 / x + 1 - x / 2.0 + pow(x, 2) / 6.0;
-        else
+        } else {
             val = -exp(-x) / x;
+}
     }
     else if (k == 2)
     {
-        if (x < 1.0e-2)
+        if (x < 1.0e-2) {
             val = 3 / pow(x, 3) - 1 / (2 * x) + x / 8 - pow(x, 2) / 15.0 + pow(x, 3) / 48.0;
-        else
+        } else {
             val = exp(-x) * (3 + 3 * x + pow(x, 2)) / pow(x, 3);
+}
     }
     else if (k == 4)
     {
-        if (x < 5.0e-1)
+        if (x < 5.0e-1) {
             val = -105 / pow(x, 5) + 15 / (2 * pow(x, 3)) - 3 / (8 * x) + x / 48 - pow(x, 3) / 384.0
                   + pow(x, 4) / 945.0;
-        else
+        } else {
             val = -exp(-x) * (105 + 105 * x + 45 * pow(x, 2) + 10 * pow(x, 3) + pow(x, 4)) / pow(x, 5);
+}
     }
     else if (k == 6)
     {
-        if (x < 9.0e-1)
+        if (x < 9.0e-1) {
             val = 10395 / pow(x, 7) - 945 / (2 * pow(x, 5)) + 105 / (8 * pow(x, 3)) - 5 / (16 * x) + x / 128.0
                   - pow(x, 3) / 3840.0 + pow(x, 5) / 46080.0 - pow(x, 6) / 135135.0;
-        else
+        } else {
             val = exp(-x)
                   * (10395 + 10395 * x + 4725 * pow(x, 2) + 1260 * pow(x, 3) + 210 * pow(x, 4) + 21 * pow(x, 5)
                      + pow(x, 6))
                   / pow(x, 7);
+}
     }
     return val;
 }
diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_elecond.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_elecond.cpp
index aa4f15e252..a3c21f43e5 100644
--- a/source/module_hamilt_pw/hamilt_stodft/sto_elecond.cpp
+++ b/source/module_hamilt_pw/hamilt_stodft/sto_elecond.cpp
@@ -12,10 +12,15 @@
 #define TWOSQRT2LN2 2.354820045030949 // FWHM = 2sqrt(2ln2) * \sigma
 #define FACTOR 1.839939223835727e7
 
-Sto_EleCond::Sto_EleCond(UnitCell* p_ucell_in, K_Vectors* p_kv_in, elecstate::ElecState* p_elec_in,
-                         ModulePW::PW_Basis_K* p_wfcpw_in, psi::Psi<std::complex<double>>* p_psi_in,
-                         pseudopot_cell_vnl* p_ppcell_in, hamilt::Hamilt<std::complex<double>>* p_hamilt_in,
-                         hsolver::HSolverPW_SDFT* p_hsol_in, Stochastic_WF* p_stowf_in)
+Sto_EleCond::Sto_EleCond(UnitCell* p_ucell_in,
+                         K_Vectors* p_kv_in,
+                         elecstate::ElecState* p_elec_in,
+                         ModulePW::PW_Basis_K* p_wfcpw_in,
+                         psi::Psi<std::complex<double>>* p_psi_in,
+                         pseudopot_cell_vnl* p_ppcell_in,
+                         hamilt::Hamilt<std::complex<double>>* p_hamilt_in,
+                         hsolver::HSolverPW_SDFT* p_hsol_in,
+                         Stochastic_WF* p_stowf_in)
     : EleCond(p_ucell_in, p_kv_in, p_elec_in, p_wfcpw_in, p_psi_in, p_ppcell_in)
 {
     this->p_hamilt = p_hamilt_in;
@@ -25,7 +30,10 @@ Sto_EleCond::Sto_EleCond(UnitCell* p_ucell_in, K_Vectors* p_kv_in, elecstate::El
     this->nbands_sto = p_stowf_in->nchi;
 }
 
-void Sto_EleCond::decide_nche(const double dt, int& nbatch, const double cond_thr, const int& fd_nche, double try_emin,
+void Sto_EleCond::decide_nche(const double dt,
+                              const double cond_thr,
+                              const int& fd_nche,
+                              double try_emin,
                               double try_emax)
 {
     int nche_guess = 1000;
@@ -33,6 +41,7 @@ void Sto_EleCond::decide_nche(const double dt, int& nbatch, const double cond_th
     Stochastic_Iter& stoiter = p_hsol->stoiter;
     const double mu = this->p_elec->eferm.ef;
     stoiter.stofunc.mu = mu;
+    int& nbatch = this->cond_dtbatch;
     // try to find nbatch
     if (nbatch == 0)
     {
@@ -80,8 +89,14 @@ void Sto_EleCond::decide_nche(const double dt, int& nbatch, const double cond_th
     int nche_new = 0;
 loop:
     // re-set Emin & Emax both in stohchi & stofunc
-    check_che(std::max(nche_old * 2, fd_nche), try_emin, try_emax, this->nbands_sto, this->p_kv, this->p_stowf,
-              this->p_hamilt, this->p_hsol);
+    check_che(std::max(nche_old * 2, fd_nche),
+              try_emin,
+              try_emax,
+              this->nbands_sto,
+              this->p_kv,
+              this->p_stowf,
+              this->p_hamilt,
+              this->p_hsol);
 
     // second try to find nche with new Emin & Emax
     getnche(nche_new);
@@ -113,18 +128,29 @@ void Sto_EleCond::decide_nche(const double dt, int& nbatch, const double cond_th
 }
 
 void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
-                              const psi::Psi<std::complex<float>>& vkspsi, const double* en, const double* en_all,
-                              std::complex<double>* leftfact, std::complex<double>* rightfact,
-                              const psi::Psi<std::complex<double>>& leftchi, psi::Psi<std::complex<double>>& rightchi,
-                              psi::Psi<std::complex<double>>& left_hchi, psi::Psi<std::complex<double>>& batch_vchi,
+                              const psi::Psi<std::complex<float>>& vkspsi,
+                              const double* en,
+                              const double* en_all,
+                              std::complex<double>* leftfact,
+                              std::complex<double>* rightfact,
+                              const psi::Psi<std::complex<double>>& leftchi,
+                              psi::Psi<std::complex<double>>& rightchi,
+                              psi::Psi<std::complex<double>>& left_hchi,
+                              psi::Psi<std::complex<double>>& batch_vchi,
                               psi::Psi<std::complex<double>>& batch_vhchi,
 #ifdef __MPI
-                              psi::Psi<std::complex<float>>& chi_all, psi::Psi<std::complex<float>>& hchi_all,
-                              void* gatherinfo_ks, void* gatherinfo_sto,
+                              psi::Psi<std::complex<float>>& chi_all,
+                              psi::Psi<std::complex<float>>& hchi_all,
+                              void* gatherinfo_ks,
+                              void* gatherinfo_sto,
 #endif
-                              const int& bsize_psi, std::vector<std::complex<float>>& j1,
-                              std::vector<std::complex<float>>& j2, hamilt::Velocity& velop, const int& ik,
-                              const std::complex<double>& factor, const int bandinfo[6])
+                              const int& bsize_psi,
+                              std::vector<std::complex<float>>& j1,
+                              std::vector<std::complex<float>>& j2,
+                              hamilt::Velocity& velop,
+                              const int& ik,
+                              const std::complex<double>& factor,
+                              const int bandinfo[6])
 {
     ModuleBase::timer::tick("Sto_EleCond", "cal_jmatrix");
     const char transa = 'C';
@@ -168,8 +194,14 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
     {
         vec_rightf_all.resize(allbands_ks);
         rightf_all = vec_rightf_all.data();
-        MPI_Allgatherv(rightfact, perbands_ks, MPI_DOUBLE_COMPLEX, rightf_all, ks_fact->nrecv, ks_fact->displs,
-                       MPI_DOUBLE_COMPLEX, PARAPW_WORLD);
+        MPI_Allgatherv(rightfact,
+                       perbands_ks,
+                       MPI_DOUBLE_COMPLEX,
+                       rightf_all,
+                       ks_fact->nrecv,
+                       ks_fact->displs,
+                       MPI_DOUBLE_COMPLEX,
+                       PARAPW_WORLD);
     }
 #endif
 
@@ -188,8 +220,19 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
             std::complex<float>* j1mat = &j1[id * dim_jmatrix];
             std::complex<float>* j2mat = &j2[id * dim_jmatrix];
             //(<\psi|v|\chi>)^T
-            cgemm_(&transa, &transb, &allbands_sto, &perbands_ks, &npw, &conjfactor, rightchi_all->get_pointer(), &npwx,
-                   &vkspsi(idnb, 0), &npwx, &zero, j1mat, &allbands_sto);
+            cgemm_(&transa,
+                   &transb,
+                   &allbands_sto,
+                   &perbands_ks,
+                   &npw,
+                   &conjfactor,
+                   rightchi_all->get_pointer(),
+                   &npwx,
+                   &vkspsi(idnb, 0),
+                   &npwx,
+                   &zero,
+                   j1mat,
+                   &allbands_sto);
 
             //(<\psi|Hv|\chi>)^T
             // for(int i = 0 ; i < perbands_ks ; ++i)
@@ -204,8 +247,19 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
             // }
 
             //(<\psi|vH|\chi>)^T
-            cgemm_(&transa, &transb, &allbands_sto, &perbands_ks, &npw, &conjfactor, righthchi_all->get_pointer(),
-                   &npwx, &vkspsi(idnb, 0), &npwx, &zero, j2mat, &allbands_sto);
+            cgemm_(&transa,
+                   &transb,
+                   &allbands_sto,
+                   &perbands_ks,
+                   &npw,
+                   &conjfactor,
+                   righthchi_all->get_pointer(),
+                   &npwx,
+                   &vkspsi(idnb, 0),
+                   &npwx,
+                   &zero,
+                   j2mat,
+                   &allbands_sto);
         }
     }
 
@@ -231,8 +285,19 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
                 std::complex<float>* j1mat = &j1[id * dim_jmatrix + jbais];
                 std::complex<float>* j2mat = &j2[id * dim_jmatrix + jbais];
                 //<\chi|v|\psi>
-                cgemm_(&transa, &transb, &tmpnb, &allbands_ks, &npw, &float_factor, &f_batch_vchi(idnb, 0), &npwx,
-                       kspsi_all.get_pointer(), &npwx, &zero, j1mat, &perbands_sto);
+                cgemm_(&transa,
+                       &transb,
+                       &tmpnb,
+                       &allbands_ks,
+                       &npw,
+                       &float_factor,
+                       &f_batch_vchi(idnb, 0),
+                       &npwx,
+                       kspsi_all.get_pointer(),
+                       &npwx,
+                       &zero,
+                       j1mat,
+                       &perbands_sto);
 
                 //<\chi|vH|\psi> = \epsilon * <\chi|v|\psi>
                 // for(int i = 0 ; i < allbands_ks ; ++i)
@@ -247,8 +312,19 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
                 // }
 
                 //<\chi|Hv|\psi>
-                cgemm_(&transa, &transb, &tmpnb, &allbands_ks, &npw, &float_factor, &f_batch_vhchi(idnb, 0), &npwx,
-                       kspsi_all.get_pointer(), &npwx, &zero, j2mat, &perbands_sto);
+                cgemm_(&transa,
+                       &transb,
+                       &tmpnb,
+                       &allbands_ks,
+                       &npw,
+                       &float_factor,
+                       &f_batch_vhchi(idnb, 0),
+                       &npwx,
+                       kspsi_all.get_pointer(),
+                       &npwx,
+                       &zero,
+                       j2mat,
+                       &perbands_sto);
             }
         }
 
@@ -262,16 +338,49 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
             std::complex<float>* j2mat = &j2[id * dim_jmatrix + jbais];
             std::complex<float>* tmpjmat = &tmpj[id * allbands_sto * perbands_sto + startnb];
             //<\chi|v|\chi>
-            cgemm_(&transa, &transb, &tmpnb, &allbands_sto, &npw, &float_factor, &f_batch_vchi(idnb, 0), &npwx,
-                   rightchi_all->get_pointer(), &npwx, &zero, j1mat, &perbands_sto);
+            cgemm_(&transa,
+                   &transb,
+                   &tmpnb,
+                   &allbands_sto,
+                   &npw,
+                   &float_factor,
+                   &f_batch_vchi(idnb, 0),
+                   &npwx,
+                   rightchi_all->get_pointer(),
+                   &npwx,
+                   &zero,
+                   j1mat,
+                   &perbands_sto);
 
             //<\chi|Hv|\chi>
-            cgemm_(&transa, &transb, &tmpnb, &allbands_sto, &npw, &float_factor, &f_batch_vhchi(idnb, 0), &npwx,
-                   rightchi_all->get_pointer(), &npwx, &zero, j2mat, &perbands_sto);
+            cgemm_(&transa,
+                   &transb,
+                   &tmpnb,
+                   &allbands_sto,
+                   &npw,
+                   &float_factor,
+                   &f_batch_vhchi(idnb, 0),
+                   &npwx,
+                   rightchi_all->get_pointer(),
+                   &npwx,
+                   &zero,
+                   j2mat,
+                   &perbands_sto);
 
             //<\chi|vH|\chi>
-            cgemm_(&transa, &transb, &tmpnb, &allbands_sto, &npw, &float_factor, &f_batch_vchi(idnb, 0), &npwx,
-                   righthchi_all->get_pointer(), &npwx, &zero, tmpjmat, &perbands_sto);
+            cgemm_(&transa,
+                   &transb,
+                   &tmpnb,
+                   &allbands_sto,
+                   &npw,
+                   &float_factor,
+                   &f_batch_vchi(idnb, 0),
+                   &npwx,
+                   righthchi_all->get_pointer(),
+                   &npwx,
+                   &zero,
+                   tmpjmat,
+                   &perbands_sto);
         }
 
         remain -= tmpnb;
@@ -352,8 +461,13 @@ void Sto_EleCond::cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
     return;
 }
 
-void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double& wcut, const double& dw_in,
-                      const double& dt_in, const bool& nonlocal, const int& nbatch, const int& npart_sto)
+void Sto_EleCond::sKG(const int& smear_type,
+                      const double& fwhmin,
+                      const double& wcut,
+                      const double& dw_in,
+                      const double& dt_in,
+                      const bool& nonlocal,
+                      const int& npart_sto)
 {
     ModuleBase::TITLE("Sto_EleCond", "sKG");
     ModuleBase::timer::tick("Sto_EleCond", "sKG");
@@ -367,6 +481,7 @@ void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double&
     //                    Init
     //------------------------------------------------------------------
     // Parameters
+    const int nbatch = this->cond_dtbatch;
     int nw = ceil(wcut / dw_in);
     double dw = dw_in / ModuleBase::Ry_to_eV; // converge unit in eV to Ry
     double sigma = fwhmin / TWOSQRT2LN2 / ModuleBase::Ry_to_eV;
@@ -614,13 +729,23 @@ void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double&
         }
 
         che.calcoef_real(&stoiter.stofunc, &Sto_Func<double>::nroot_fd);
-        che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_norm, stopsi->get_pointer(), sfchi.get_pointer(), npw,
-                             npwx, perbands_sto);
+        che.calfinalvec_real(&stohchi,
+                             &Stochastic_hchi::hchi_norm,
+                             stopsi->get_pointer(),
+                             sfchi.get_pointer(),
+                             npw,
+                             npwx,
+                             perbands_sto);
 
         che.calcoef_real(&stoiter.stofunc, &Sto_Func<double>::nroot_mfd);
 
-        che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_norm, stopsi->get_pointer(), smfchi.get_pointer(), npw,
-                             npwx, perbands_sto);
+        che.calfinalvec_real(&stohchi,
+                             &Stochastic_hchi::hchi_norm,
+                             stopsi->get_pointer(),
+                             smfchi.get_pointer(),
+                             npw,
+                             npwx,
+                             perbands_sto);
 
         //------------------------  allocate ------------------------
         psi::Psi<std::complex<double>>& expmtsfchi = sfchi;
@@ -699,14 +824,34 @@ void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double&
             // Sto
             if (nbatch == 1)
             {
-                chemt.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, expmtsfchi.get_pointer(),
-                                          expmtsfchi.get_pointer(), npw, npwx, perbands_sto);
-                chemt.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, expmtsmfchi.get_pointer(),
-                                          expmtsmfchi.get_pointer(), npw, npwx, perbands_sto);
-                chet.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, exptsfchi.get_pointer(),
-                                         exptsfchi.get_pointer(), npw, npwx, perbands_sto);
-                chet.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, exptsmfchi.get_pointer(),
-                                         exptsmfchi.get_pointer(), npw, npwx, perbands_sto);
+                chemt.calfinalvec_complex(&stohchi,
+                                          &Stochastic_hchi::hchi_norm,
+                                          expmtsfchi.get_pointer(),
+                                          expmtsfchi.get_pointer(),
+                                          npw,
+                                          npwx,
+                                          perbands_sto);
+                chemt.calfinalvec_complex(&stohchi,
+                                          &Stochastic_hchi::hchi_norm,
+                                          expmtsmfchi.get_pointer(),
+                                          expmtsmfchi.get_pointer(),
+                                          npw,
+                                          npwx,
+                                          perbands_sto);
+                chet.calfinalvec_complex(&stohchi,
+                                         &Stochastic_hchi::hchi_norm,
+                                         exptsfchi.get_pointer(),
+                                         exptsfchi.get_pointer(),
+                                         npw,
+                                         npwx,
+                                         perbands_sto);
+                chet.calfinalvec_complex(&stohchi,
+                                         &Stochastic_hchi::hchi_norm,
+                                         exptsmfchi.get_pointer(),
+                                         exptsmfchi.get_pointer(),
+                                         npw,
+                                         npwx,
+                                         perbands_sto);
             }
             else
             {
@@ -720,14 +865,34 @@ void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double&
                 std::complex<double>* stoexptsmfchi = exptsmfchi.get_pointer();
                 if ((it - 1) % nbatch == 0)
                 {
-                    chet.calpolyvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, stoexptsfchi, tmppolyexptsfchi, npw,
-                                            npwx, perbands_sto);
-                    chet.calpolyvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, stoexptsmfchi, tmppolyexptsmfchi,
-                                            npw, npwx, perbands_sto);
-                    chemt.calpolyvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, stoexpmtsfchi, tmppolyexpmtsfchi,
-                                             npw, npwx, perbands_sto);
-                    chemt.calpolyvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, stoexpmtsmfchi, tmppolyexpmtsmfchi,
-                                             npw, npwx, perbands_sto);
+                    chet.calpolyvec_complex(&stohchi,
+                                            &Stochastic_hchi::hchi_norm,
+                                            stoexptsfchi,
+                                            tmppolyexptsfchi,
+                                            npw,
+                                            npwx,
+                                            perbands_sto);
+                    chet.calpolyvec_complex(&stohchi,
+                                            &Stochastic_hchi::hchi_norm,
+                                            stoexptsmfchi,
+                                            tmppolyexptsmfchi,
+                                            npw,
+                                            npwx,
+                                            perbands_sto);
+                    chemt.calpolyvec_complex(&stohchi,
+                                             &Stochastic_hchi::hchi_norm,
+                                             stoexpmtsfchi,
+                                             tmppolyexpmtsfchi,
+                                             npw,
+                                             npwx,
+                                             perbands_sto);
+                    chemt.calpolyvec_complex(&stohchi,
+                                             &Stochastic_hchi::hchi_norm,
+                                             stoexpmtsmfchi,
+                                             tmppolyexpmtsmfchi,
+                                             npw,
+                                             npwx,
+                                             perbands_sto);
                 }
 
                 std::complex<double>* tmpcoef = batchcoef.data() + (it - 1) % nbatch * cond_nche;
@@ -737,33 +902,105 @@ void Sto_EleCond::sKG(const int& smear_type, const double& fwhmin, const double&
                 const int M = perbands_sto * npwx;
                 const int N = cond_nche;
                 const int inc = 1;
-                zgemv_(&transa, &M, &N, &ModuleBase::ONE, tmppolyexptsfchi, &LDA, tmpcoef, &inc, &ModuleBase::ZERO,
-                       stoexptsfchi, &inc);
-                zgemv_(&transa, &M, &N, &ModuleBase::ONE, tmppolyexptsmfchi, &LDA, tmpcoef, &inc, &ModuleBase::ZERO,
-                       stoexptsmfchi, &inc);
-                zgemv_(&transa, &M, &N, &ModuleBase::ONE, tmppolyexpmtsfchi, &LDA, tmpmcoef, &inc, &ModuleBase::ZERO,
-                       stoexpmtsfchi, &inc);
-                zgemv_(&transa, &M, &N, &ModuleBase::ONE, tmppolyexpmtsmfchi, &LDA, tmpmcoef, &inc, &ModuleBase::ZERO,
-                       stoexpmtsmfchi, &inc);
+                zgemv_(&transa,
+                       &M,
+                       &N,
+                       &ModuleBase::ONE,
+                       tmppolyexptsfchi,
+                       &LDA,
+                       tmpcoef,
+                       &inc,
+                       &ModuleBase::ZERO,
+                       stoexptsfchi,
+                       &inc);
+                zgemv_(&transa,
+                       &M,
+                       &N,
+                       &ModuleBase::ONE,
+                       tmppolyexptsmfchi,
+                       &LDA,
+                       tmpcoef,
+                       &inc,
+                       &ModuleBase::ZERO,
+                       stoexptsmfchi,
+                       &inc);
+                zgemv_(&transa,
+                       &M,
+                       &N,
+                       &ModuleBase::ONE,
+                       tmppolyexpmtsfchi,
+                       &LDA,
+                       tmpmcoef,
+                       &inc,
+                       &ModuleBase::ZERO,
+                       stoexpmtsfchi,
+                       &inc);
+                zgemv_(&transa,
+                       &M,
+                       &N,
+                       &ModuleBase::ONE,
+                       tmppolyexpmtsmfchi,
+                       &LDA,
+                       tmpmcoef,
+                       &inc,
+                       &ModuleBase::ZERO,
+                       stoexpmtsmfchi,
+                       &inc);
             }
             ModuleBase::timer::tick("Sto_EleCond", "evolution");
 
             // calculate i<\psi|sqrt(f) exp(-iHt/2)*J*exp(iHt/2) sqrt(1-f)|\psi>^+
             //         = i<\psi|sqrt(1-f) exp(-iHt/2)*J*exp(iHt/2) sqrt(f)|\psi>
-            cal_jmatrix(*kspsi_all, f_vkspsi, en.data(), en_all, nullptr, nullptr, exptsmfchi, exptsfchi, tmphchil,
-                        batch_vchi, batch_vhchi,
+            cal_jmatrix(*kspsi_all,
+                        f_vkspsi,
+                        en.data(),
+                        en_all,
+                        nullptr,
+                        nullptr,
+                        exptsmfchi,
+                        exptsfchi,
+                        tmphchil,
+                        batch_vchi,
+                        batch_vhchi,
 #ifdef __MPI
-                        chi_all, hchi_all, (void*)&ks_fact, (void*)&sto_npwx,
+                        chi_all,
+                        hchi_all,
+                        (void*)&ks_fact,
+                        (void*)&sto_npwx,
 #endif
-                        bsize_psi, j1l, j2l, velop, ik, ModuleBase::IMAG_UNIT, bandsinfo);
+                        bsize_psi,
+                        j1l,
+                        j2l,
+                        velop,
+                        ik,
+                        ModuleBase::IMAG_UNIT,
+                        bandsinfo);
 
             // calculate <\psi|sqrt(1-f) exp(iHt/2)*J*exp(-iHt/2) sqrt(f)|\psi>
-            cal_jmatrix(*kspsi_all, f_vkspsi, en.data(), en_all, expmtmf_fact.data(), expmtf_fact.data(), expmtsmfchi,
-                        expmtsfchi, tmphchil, batch_vchi, batch_vhchi,
+            cal_jmatrix(*kspsi_all,
+                        f_vkspsi,
+                        en.data(),
+                        en_all,
+                        expmtmf_fact.data(),
+                        expmtf_fact.data(),
+                        expmtsmfchi,
+                        expmtsfchi,
+                        tmphchil,
+                        batch_vchi,
+                        batch_vhchi,
 #ifdef __MPI
-                        chi_all, hchi_all, (void*)&ks_fact, (void*)&sto_npwx,
+                        chi_all,
+                        hchi_all,
+                        (void*)&ks_fact,
+                        (void*)&sto_npwx,
 #endif
-                        bsize_psi, j1r, j2r, velop, ik, ModuleBase::ONE, bandsinfo);
+                        bsize_psi,
+                        j1r,
+                        j2r,
+                        velop,
+                        ik,
+                        ModuleBase::ONE,
+                        bandsinfo);
 
             // prepare for parallel
             int num_per = parajmat.num_per;
diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_elecond.h b/source/module_hamilt_pw/hamilt_stodft/sto_elecond.h
index 8a2bf518cb..6f06aca238 100644
--- a/source/module_hamilt_pw/hamilt_stodft/sto_elecond.h
+++ b/source/module_hamilt_pw/hamilt_stodft/sto_elecond.h
@@ -9,10 +9,15 @@
 class Sto_EleCond : protected EleCond
 {
   public:
-    Sto_EleCond(UnitCell* p_ucell_in, K_Vectors* p_kv_in, elecstate::ElecState* p_elec_in,
-                ModulePW::PW_Basis_K* p_wfcpw_in, psi::Psi<std::complex<double>>* p_psi_in,
-                pseudopot_cell_vnl* p_ppcell_in, hamilt::Hamilt<std::complex<double>>* p_hamilt_in,
-                hsolver::HSolverPW_SDFT* p_hsol_in, Stochastic_WF* p_stowf_in);
+    Sto_EleCond(UnitCell* p_ucell_in,
+                K_Vectors* p_kv_in,
+                elecstate::ElecState* p_elec_in,
+                ModulePW::PW_Basis_K* p_wfcpw_in,
+                psi::Psi<std::complex<double>>* p_psi_in,
+                pseudopot_cell_vnl* p_ppcell_in,
+                hamilt::Hamilt<std::complex<double>>* p_hamilt_in,
+                hsolver::HSolverPW_SDFT* p_hsol_in,
+                Stochastic_WF* p_stowf_in);
     ~Sto_EleCond(){};
     /**
      * @brief Set the N order of Chebyshev expansion for conductivities
@@ -26,8 +31,7 @@ class Sto_EleCond : protected EleCond
      * @param try_emax trial Emax
      *
      */
-    void decide_nche(const double dt, int& nbatch, const double cond_thr, const int& fd_nche, double try_emin,
-                     double try_emax);
+    void decide_nche(const double dt, const double cond_thr, const int& fd_nche, double try_emin, double try_emax);
     /**
      * @brief calculate Stochastic Kubo-Greenwood
      *
@@ -40,14 +44,20 @@ class Sto_EleCond : protected EleCond
      * @param nbatch t step batch
      * @param npart_sto number stochastic wavefunctions parts to evalution simultaneously
      */
-    void sKG(const int& smear_type, const double& fwhmin, const double& wcut, const double& dw_in, const double& dt_in,
-             const bool& nonlocal, const int& nbatch, const int& npart_sto);
+    void sKG(const int& smear_type,
+             const double& fwhmin,
+             const double& wcut,
+             const double& dw_in,
+             const double& dt_in,
+             const bool& nonlocal,
+             const int& npart_sto);
 
   protected:
     int nbands_ks = 0;                              ///< number of KS bands
     int nbands_sto = 0;                             ///< number of stochastic bands
     int cond_nche = 0;                              ///< number of Chebyshev orders for conductivities
     int fd_nche = 0;                                ///< number of Chebyshev orders for Fermi-Dirac function
+    int cond_dtbatch = 0;                           ///< number of time steps in a batch
     hamilt::Hamilt<std::complex<double>>* p_hamilt; ///< pointer to the Hamiltonian
     hsolver::HSolverPW_SDFT* p_hsol = nullptr;      ///< pointer to the Hamiltonian solver
     Stochastic_WF* p_stowf = nullptr;               ///< pointer to the stochastic wavefunctions
@@ -57,16 +67,29 @@ class Sto_EleCond : protected EleCond
      * @brief calculate Jmatrix  <leftv|J|rightv>
      *
      */
-    void cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all, const psi::Psi<std::complex<float>>& vkspsi,
-                     const double* en, const double* en_all, std::complex<double>* leftfact,
-                     std::complex<double>* rightfact, const psi::Psi<std::complex<double>>& leftchi,
-                     psi::Psi<std::complex<double>>& rightchi, psi::Psi<std::complex<double>>& left_hchi,
-                     psi::Psi<std::complex<double>>& batch_vchi, psi::Psi<std::complex<double>>& batch_vhchi,
+    void cal_jmatrix(const psi::Psi<std::complex<float>>& kspsi_all,
+                     const psi::Psi<std::complex<float>>& vkspsi,
+                     const double* en,
+                     const double* en_all,
+                     std::complex<double>* leftfact,
+                     std::complex<double>* rightfact,
+                     const psi::Psi<std::complex<double>>& leftchi,
+                     psi::Psi<std::complex<double>>& rightchi,
+                     psi::Psi<std::complex<double>>& left_hchi,
+                     psi::Psi<std::complex<double>>& batch_vchi,
+                     psi::Psi<std::complex<double>>& batch_vhchi,
 #ifdef __MPI
-                     psi::Psi<std::complex<float>>& chi_all, psi::Psi<std::complex<float>>& hchi_all,
-                     void* gatherinfo_ks, void* gatherinfo_sto,
+                     psi::Psi<std::complex<float>>& chi_all,
+                     psi::Psi<std::complex<float>>& hchi_all,
+                     void* gatherinfo_ks,
+                     void* gatherinfo_sto,
 #endif
-                     const int& bsize_psi, std::vector<std::complex<float>>& j1, std::vector<std::complex<float>>& j2,
-                     hamilt::Velocity& velop, const int& ik, const std::complex<double>& factor, const int bandinfo[6]);
+                     const int& bsize_psi,
+                     std::vector<std::complex<float>>& j1,
+                     std::vector<std::complex<float>>& j2,
+                     hamilt::Velocity& velop,
+                     const int& ik,
+                     const std::complex<double>& factor,
+                     const int bandinfo[6]);
 };
 #endif // ELECOND_H
\ No newline at end of file
diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp
index 8a27902004..c4185181e2 100644
--- a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp
+++ b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp
@@ -38,7 +38,7 @@ Stochastic_Iter::~Stochastic_Iter()
 
 void Stochastic_Iter::init(const int method_in, K_Vectors* pkv_in, ModulePW::PW_Basis_K* wfc_basis, Stochastic_WF& stowf)
 {
-    p_che = new ModuleBase::Chebyshev<double>(INPUT.nche_sto);
+    p_che = new ModuleBase::Chebyshev<double>(PARAM.inp.nche_sto);
     nchip = stowf.nchip;
     targetne = GlobalV::nelec;
     this->pkv = pkv_in;
diff --git a/source/module_io/CMakeLists.txt b/source/module_io/CMakeLists.txt
index 9d8a66972a..401c87cb4e 100644
--- a/source/module_io/CMakeLists.txt
+++ b/source/module_io/CMakeLists.txt
@@ -1,6 +1,4 @@
 list(APPEND objects
-    input.cpp
-    input_conv_tmp.cpp
     input_conv.cpp
     bessel_basis.cpp
     cal_test.cpp
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
deleted file mode 100644
index bf5e804389..0000000000
--- a/source/module_io/input.cpp
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "module_io/input.h"
-#include "input_conv.h"
-Input INPUT;
\ No newline at end of file
diff --git a/source/module_io/input.h b/source/module_io/input.h
deleted file mode 100644
index c4b7eb6669..0000000000
--- a/source/module_io/input.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef INPUT_H
-#define INPUT_H
-
-#include <fstream>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "module_base/vector3.h"
-#include "module_parameter/md_parameter.h"
-
-class Input
-{
-  public:
-    ~Input()
-    {
-        delete[] hubbard_u;
-        delete[] orbital_corr;
-    }
-    
-    // They will be removed.
-    int cond_dtbatch;
-    int nche_sto;
-    int* orbital_corr = nullptr; ///< which correlated orbitals need corrected ;
-    double* hubbard_u = nullptr; ///< Hubbard Coulomb interaction parameter U(ev)
-    std::string stru_file;     // file contains atomic positions -- xiaohui modify
-
-};
-
-extern Input INPUT;
-#endif // INPUT
\ No newline at end of file
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index 471cae2007..55430e192d 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -8,7 +8,6 @@
 #include "module_hamilt_general/module_surchem/surchem.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_io/berryphase.h"
-#include "module_io/input.h"
 #include "module_parameter/parameter.h"
 #include "module_relax/relax_old/ions_move_basic.h"
 #include "module_relax/relax_old/lattice_change_basic.h"
@@ -187,10 +186,10 @@ void Input_Conv::Convert()
         MD_func::current_md_info(GlobalV::MY_RANK, GlobalV::global_readin_dir, istep, temperature);
         if (PARAM.inp.read_file_dir == "auto")
         {
-            GlobalV::stru_file = INPUT.stru_file = GlobalV::global_stru_dir + "STRU_MD_" + std::to_string(istep);
+            GlobalV::stru_file = GlobalV::global_stru_dir + "STRU_MD_" + std::to_string(istep);
         }
-    } else if (INPUT.stru_file != "") {
-        GlobalV::stru_file = INPUT.stru_file;
+    } else if (PARAM.inp.stru_file != "") {
+        GlobalV::stru_file = PARAM.inp.stru_file;
     }
     if (PARAM.inp.kpoint_file != "")
     {
@@ -326,14 +325,14 @@ void Input_Conv::Convert()
         GlobalV::dft_plus_u = PARAM.inp.dft_plus_u;
         GlobalC::dftu.Yukawa = PARAM.inp.yukawa_potential;
         GlobalC::dftu.omc = PARAM.inp.omc;
-        GlobalC::dftu.orbital_corr = INPUT.orbital_corr;
+        GlobalC::dftu.orbital_corr = PARAM.inp.orbital_corr;
         GlobalC::dftu.uramping = PARAM.globalv.uramping;
         GlobalC::dftu.mixing_dftu = PARAM.inp.mixing_dftu;
-        GlobalC::dftu.U = INPUT.hubbard_u;
-        GlobalC::dftu.U0 = std::vector<double>(INPUT.hubbard_u, INPUT.hubbard_u + GlobalC::ucell.ntype);
+        GlobalC::dftu.U = PARAM.globalv.hubbard_u;
+        GlobalC::dftu.U0 = PARAM.globalv.hubbard_u;
         if (PARAM.globalv.uramping > 0.01)
         {
-            ModuleBase::GlobalFunc::ZEROS(GlobalC::dftu.U, GlobalC::ucell.ntype);
+            ModuleBase::GlobalFunc::ZEROS(GlobalC::dftu.U.data(), GlobalC::ucell.ntype);
         }
     }
 #endif
diff --git a/source/module_io/input_conv_tmp.cpp b/source/module_io/input_conv_tmp.cpp
deleted file mode 100644
index 7049c91d71..0000000000
--- a/source/module_io/input_conv_tmp.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <sstream>
-#include <string>
-
-#include "input.h"
-#include "input_conv.h"
-#include "module_parameter/parameter.h"
-
-void Input_Conv::tmp_convert()
-{
-    INPUT.stru_file = PARAM.inp.stru_file;
-    INPUT.cond_dtbatch = PARAM.inp.cond_dtbatch;
-    INPUT.nche_sto = PARAM.inp.nche_sto;
-
-    const int ntype = PARAM.inp.ntype;
-    delete[] INPUT.orbital_corr;
-    INPUT.orbital_corr = new int[ntype];
-    for (int i = 0; i < ntype; ++i)
-    {
-        INPUT.orbital_corr[i] = PARAM.inp.orbital_corr[i];
-    }
-    delete[] INPUT.hubbard_u;
-    INPUT.hubbard_u = new double[ntype];
-    for (int i = 0; i < ntype; ++i)
-    {
-        INPUT.hubbard_u[i] = PARAM.globalv.hubbard_u[i];
-    }
-}
diff --git a/source/module_io/json_output/init_info.cpp b/source/module_io/json_output/init_info.cpp
index 9cff3c5192..5905bdfed3 100644
--- a/source/module_io/json_output/init_info.cpp
+++ b/source/module_io/json_output/init_info.cpp
@@ -2,7 +2,6 @@
 
 #include "../para_json.h"
 #include "abacusjson.h"
-#include "module_io/input.h"
 
 // Add json objects to init
 namespace Json
diff --git a/source/module_io/json_output/output_info.cpp b/source/module_io/json_output/output_info.cpp
index 74bd3bd994..085bb93526 100644
--- a/source/module_io/json_output/output_info.cpp
+++ b/source/module_io/json_output/output_info.cpp
@@ -1,5 +1,4 @@
 #include "output_info.h"
-#include "module_io/input.h"
 #include "../para_json.h"
 #include "abacusjson.h"
 
diff --git a/source/module_io/json_output/readin_info.cpp b/source/module_io/json_output/readin_info.cpp
index d43a23a1c0..88e400026d 100644
--- a/source/module_io/json_output/readin_info.cpp
+++ b/source/module_io/json_output/readin_info.cpp
@@ -1,5 +1,4 @@
 #include "readin_info.h"
-#include "module_io/input.h"
 #include "../para_json.h"
 #include "abacusjson.h"
 
diff --git a/source/module_io/parse_args.cpp b/source/module_io/parse_args.cpp
index 7d784408dd..d762c19ce1 100644
--- a/source/module_io/parse_args.cpp
+++ b/source/module_io/parse_args.cpp
@@ -3,7 +3,6 @@
 #include <cstdlib>
 #include <iostream>
 
-#include "module_io/input.h"
 #include "module_io/read_input.h"
 #include "version.h"
 
diff --git a/source/module_io/test/CMakeLists.txt b/source/module_io/test/CMakeLists.txt
index c065e06f14..1d10a3a5bc 100644
--- a/source/module_io/test/CMakeLists.txt
+++ b/source/module_io/test/CMakeLists.txt
@@ -56,7 +56,7 @@ AddTest(
 AddTest(
   TARGET io_print_info
   LIBS ${math_libs} base device symmetry cell_info parameter
-  SOURCES print_info_test.cpp ../print_info.cpp ../input.cpp ../output.cpp ../../module_cell/klist.cpp ../../module_cell/parallel_kpoints.cpp
+  SOURCES print_info_test.cpp ../print_info.cpp ../output.cpp ../../module_cell/klist.cpp ../../module_cell/parallel_kpoints.cpp
 )
 
 AddTest(
diff --git a/source/module_md/test/CMakeLists.txt b/source/module_md/test/CMakeLists.txt
index c8de79f750..a74a67aced 100644
--- a/source/module_md/test/CMakeLists.txt
+++ b/source/module_md/test/CMakeLists.txt
@@ -4,7 +4,6 @@ remove_definitions(-DUSE_PAW)
 
 list(APPEND depend_files 
   ../md_func.cpp
-  ../../module_io/input.cpp
   ../../module_cell/unitcell.cpp
   ../../module_cell/atom_spec.cpp
   ../../module_cell/atom_pseudo.cpp