From cd87aebbd8435ce2115987df2cd2ef122adb0494 Mon Sep 17 00:00:00 2001
From: Matthew R Hermes <mrhermes@uchicago.edu>
Date: Thu, 28 Sep 2023 18:01:47 -0500
Subject: [PATCH] csf_solver memory reforms

1. more conservative guardrail than last commit
2. transform_opmat_det2csf_pspace use integer range rather than
bool array for indexing, since everything is supposed to be
contiguous anyway
3. demote the eigenvalue test to even higher debug, since it
becomes impossible fast
---
 my_pyscf/fci/csf.py       |  7 ++++---
 my_pyscf/fci/csfstring.py | 17 ++++++++++-------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/my_pyscf/fci/csf.py b/my_pyscf/fci/csf.py
index f714d3f2..beda9e2e 100644
--- a/my_pyscf/fci/csf.py
+++ b/my_pyscf/fci/csf.py
@@ -268,7 +268,8 @@ def pspace (fci, h1e, eri, norb, nelec, transformer, hdiag_det=None, hdiag_csf=N
     strb = cistring.addrs2str(norb, nelecb, addrb)
     npsp_det = len(det_addr)
     safety_factor = 1.2
-    mem_h0 = safety_factor * (npsp_det**2 * np.dtype (float).itemsize) / 1e6
+    nfloats_h0 = (npsp_det+npsp)**2
+    mem_h0 = safety_factor * nfloats_h0 * np.dtype (float).itemsize / 1e6
     mem_remaining = max_memory - lib.current_memory ()[0]
     if mem_h0 > mem_remaining:
         raise MemoryError (("pspace_size of {} CSFs -> {} determinants requires {} MB > {} MB "
@@ -297,7 +298,7 @@ def pspace (fci, h1e, eri, norb, nelec, transformer, hdiag_det=None, hdiag_csf=N
     h0 = lib.hermi_triu(h0)
 
     try:
-        if fci.verbose >= lib.logger.DEBUG: evals_before = scipy.linalg.eigh (h0)[0]
+        if fci.verbose > lib.logger.DEBUG1: evals_before = scipy.linalg.eigh (h0)[0]
     except ValueError as e:
         lib.logger.debug1 (fci, ("ERROR: h0 has {} infs, {} nans; h1e_a has {} infs, {} nans; "
             "h1e_b has {} infs, {} nans; g2e has {} infs, {} nans, norb = {}, npsp_det = {}").format (
@@ -311,7 +312,7 @@ def pspace (fci, h1e, eri, norb, nelec, transformer, hdiag_det=None, hdiag_csf=N
     h0, csf_addr = transformer.mat_det2csf_confspace (h0, econf_addr)
     t0 = lib.logger.timer_debug1 (fci, "csf.pspace: transform pspace Hamiltonian into CSF basis", *t0)
 
-    if fci.verbose > lib.logger.DEBUG:
+    if fci.verbose > lib.logger.DEBUG1:
         lib.logger.debug1 (fci, "csf.pspace: eigenvalues of h0 before transformation %s", evals_before)
         evals_after = scipy.linalg.eigh (h0)[0]
         lib.logger.debug1 (fci, "csf.pspace: eigenvalues of h0 after transformation %s", evals_after)
diff --git a/my_pyscf/fci/csfstring.py b/my_pyscf/fci/csfstring.py
index c28c6c1b..8ed694ae 100644
--- a/my_pyscf/fci/csfstring.py
+++ b/my_pyscf/fci/csfstring.py
@@ -690,7 +690,7 @@ def transform_opmat_det2csf_pspace (op, econfs, norb, neleca, nelecb, smult, csd
     _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult)
     npair_econf_size = npair_dconf_size * npair_sconf_size
     max_npair = min (neleca, nelecb)
-    csf_idx = np.zeros (ncsf_all, dtype=np.bool_)
+    assert (np.count_nonzero (reduced_csd_mask[1:] - reduced_csd_mask[:-1] - 1)==0)
     def ax_b (mat):
         nrow = mat.shape[0]
         assert (mat.shape[1] == ndet_all)
@@ -710,15 +710,17 @@ def ax_b (mat):
             if nconf == 0 or ncsf == 0 or ndet == 0:
                 continue
 
-            csf_idx[:] = False
-            csf_idx[csf_offset:csf_offset+nconf*ncsf] = True
+            ci = csf_offset
+            cj = ci + nconf*ncsf
+
+            di = reduced_csd_mask[det_offset]
+            dj = di + nconf*ndet
+            mat_ij = mat[:,di:dj].reshape (nrow, nconf, ndet)
 
-            det_idx = reduced_csd_mask[det_offset:det_offset+nconf*ndet].reshape (nconf, ndet, order='C')
-    
             nspin = neleca + nelecb - 2*npair
             umat = np.asarray_chkfinite (get_spin_evecs (nspin, neleca, nelecb, smult))
 
-            outmat[:,csf_idx] = np.tensordot (mat[:,det_idx], umat, axes=1).reshape (nrow, ncsf*nconf, order='C')
+            outmat[:,ci:cj] = np.tensordot (mat_ij, umat, axes=1).reshape (nrow, ncsf*nconf, order='C')
 
             det_offset += nconf*ndet
             csf_offset += nconf*ncsf
@@ -727,7 +729,8 @@ def ax_b (mat):
         assert (csf_offset == ncsf_all), "{} {}".format (csf_offset, ncsf_all)
         return outmat
 
-    op = ax_b (ax_b (op).conj ().T).conj ().T
+    op = ax_b (op).conj ().T
+    op = ax_b (op).conj ().T
     return op, csf_addrs