diff --git a/recirq/qcqmc/data.py b/recirq/qcqmc/data.py index 19391336..d470291f 100644 --- a/recirq/qcqmc/data.py +++ b/recirq/qcqmc/data.py @@ -13,36 +13,43 @@ # limitations under the License. import abc -import dataclasses import hashlib import pathlib -from dataclasses import dataclass from pathlib import Path +import attrs import numpy as np DEFAULT_BASE_DATA_DIR = (Path(__file__).parent / "data").resolve() def get_integrals_path( - name: str, *, base_data_dir: str = DEFAULT_BASE_DATA_DIR + name: str, *, base_data_dir: pathlib.Path = DEFAULT_BASE_DATA_DIR ) -> Path: """Find integral data file by name. Args: name: The molecule name. + base_data_dir: The base directory where the data folder should live. """ - return Path(base_data_dir) / "integrals" / name / "hamiltonian.chk" + return base_data_dir / "integrals" / name / "hamiltonian.chk" _MOLECULE_INFO = {"fh_sto3g"} # should go in Dedicated module -@dataclass(frozen=True, repr=False) +@attrs.frozen class Params(abc.ABC): + """Store named parameters which will potentiall be read from a file. + + Args: + name: The name of the parameters + """ + name: str + # TODO: Is this necessary? def __eq__(self, other): """A helper method to compare two dataclasses which might contain arrays.""" if self is other: @@ -53,9 +60,7 @@ def __eq__(self, other): return all( array_compatible_eq(thing1, thing2) - for thing1, thing2 in zip( - dataclasses.astuple(self), dataclasses.astuple(other) - ) + for thing1, thing2 in zip(attrs.astuple(self), attrs.astuple(other)) ) @property @@ -78,7 +83,7 @@ def __repr__(self): # This custom repr lets us add fields with default values without changing # the repr. This, in turn, lets us use the hash_key reliably even when # we add new fields with a default value. - fields = dataclasses.fields(self) + fields = attrs.fields(self) # adjusted_fields = [f for f in fields if getattr(self, f.name) != f.default] adjusted_fields = [ f @@ -99,8 +104,14 @@ def _json_namespace_(cls): # Should go in dedicated module -@dataclass(frozen=True, eq=False) +@attrs.frozen class Data(abc.ABC): + """A container for different forms of data used in qcqmc (hamiltonian, trial wavefunction, ...). + + Args: + params: The name of the parameters + """ + params: Params def __eq__(self, other): @@ -113,9 +124,7 @@ def __eq__(self, other): return all( array_compatible_eq(thing1, thing2) - for thing1, thing2 in zip( - dataclasses.astuple(self), dataclasses.astuple(other) - ) + for thing1, thing2 in zip(attrs.astuple(self), attrs.astuple(other)) ) @classmethod diff --git a/recirq/qcqmc/hamiltonian.py b/recirq/qcqmc/hamiltonian.py new file mode 100644 index 00000000..cf5d8b3e --- /dev/null +++ b/recirq/qcqmc/hamiltonian.py @@ -0,0 +1,401 @@ +# Copyright 2024 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from pathlib import Path +from typing import Tuple, Union + +import attrs +import fqe +import h5py +import numpy as np +import openfermion as of +from fqe.hamiltonians.restricted_hamiltonian import RestrictedHamiltonian +from fqe.openfermion_utils import integrals_to_fqe_restricted +from pyscf import ao2mo, fci, gto, scf + +from recirq.qcqmc import config, data + +# xyz coordinates of an atom +PositionT = Tuple[float, float, float] +# [atom name, xyz] +GeomT = Tuple[str, PositionT] +HamiltonianParams = Union["HamiltonianFileParams", "PyscfHamiltonianParams"] + + +def compute_integrals( + pyscf_molecule: gto.Mole, pyscf_scf: Union[scf.RHF, scf.ROHF] +) -> Tuple[np.ndarray, np.ndarray]: + """Compute the (restricted) 1-electron and 2-electron integrals in the spatial orbital basis. + + From: openfermionpyscf _run_pyscf. + + Args: + pyscf_molecule: A pyscf Mole instance. + pyscf_scf: A PySCF "SCF" calculation object. + + Returns: + one_electron_integrals: An N by N array storing h_{pq} + two_electron_integrals: A 4-D tensor storing h_{pqrs} in using the openfermion convention. + """ + # Get one electrons integrals. + n_orbitals = pyscf_scf.mo_coeff.shape[1] + one_electron_compressed = functools.reduce( + np.dot, (pyscf_scf.mo_coeff.T, pyscf_scf.get_hcore(), pyscf_scf.mo_coeff) + ) + one_electron_integrals = one_electron_compressed.reshape( + n_orbitals, n_orbitals + ).astype(float) + + # Get two electron integrals in compressed format. + two_electron_compressed = ao2mo.kernel(pyscf_molecule, pyscf_scf.mo_coeff) + + two_electron_integrals = ao2mo.restore( + 1, two_electron_compressed, n_orbitals # no permutational symmetry + ) + # See PQRS convention in OpenFermion.hamiltonians._molecular_data + # h[p,q,r,s] = (ps|qr) + two_electron_integrals = np.asarray( + two_electron_integrals.transpose(0, 2, 3, 1), order="C" + ) + + return one_electron_integrals, two_electron_integrals + + +@attrs.frozen +class HamiltonianFileParams(data.Params): + """Class for storing the parameters that specify loading hamiltonian integrals from a file. + + Args: + name: A descriptive name for the hamiltonian. + integral_key: A string specifying the directory in data/integrals where + the integrals should live. + n_orb: The number of spatial orbitals. + n_elec: The total number of electrons. + do_eri_restore: Whether to restore 8-fold symmetry to the integrals. + path_prefix: An optional path string to prepend to the default + hamiltonian data directory (by default the integrals will be written / + read to ./data/hamiltonians.) + """ + + name: str + integral_key: str + n_orb: int + n_elec: int + do_eri_restore: bool = False + path_prefix: str = "" + + def _json_dict_(self): + return attrs.asdict(self) + + @property + def path_string(self) -> str: + return ( + self.path_prefix + config.OUTDIRS.DEFAULT_HAMILTONIAN_DIRECTORY + self.name + ) + + +@attrs.frozen +class PyscfHamiltonianParams(data.Params): + """Class for describing the parameters to get a Hamiltonian from Pyscf. + + Args: + name: A name for the Hamiltonian + n_orb: The number of orbitals (redundant with later parameters but used + for validation). + n_elec: The number of electrons (redundant with later parameters but + used for validation). + geometry: The coordinates of each atom. An example is [('H', (0, 0, 0)), + ('H', (0, 0, 0.7414))]. Distances are in angstrom. Use atomic + symbols to specify atoms. + basis: The basis set, e.g., 'cc-pvtz'. + multiplicity: The spin multiplicity + charge: The total molecular charge. + rhf: Whether to use RHF (True) or ROHF (False). + verbose_scf: Setting passed to pyscf's scf verbose attribute. + save_chkfile: If True, then pyscf will save a chk file for this Hamiltonian. + overwrite_chk_file: If save_chkfile and overwrite_chk_file are both true then Pyscf + will be allowed to overwrite the previously saved chk file. Otherwise, if save_chkfile + is True and overwrite_chk_file is False, then we raise a FileExistsError if the + chk file already exists. + path_prefix: A prefix to prepend to the path where the data will be saved (if any) + """ + + name: str + n_orb: int + n_elec: int + geometry: Tuple[GeomT, ...] = attrs.field( + converter=lambda x: tuple((at[0], tuple(at[1])) for at in x) + ) + basis: str + multiplicity: int + charge: int = 0 + rhf: bool = True + verbose_scf: int = 0 + save_chkfile: bool = False + overwrite_chk_file: bool = False + path_prefix: str = "" + + def _json_dict_(self): + return attrs.asdict(self) + + @property + def path_string(self) -> str: + """Output path string""" + return ( + self.path_prefix + config.OUTDIRS.DEFAULT_HAMILTONIAN_DIRECTORY + self.name + ) + + @property + def chk_path(self) -> Path: + """Path string to any checkpoint file is saved.""" + parent = self.base_path.parent + if not parent.is_dir(): + parent.mkdir(exist_ok=True, parents=True) + return self.base_path.with_suffix(".chk") + + @property + def pyscf_molecule(self) -> gto.Mole: + """Underlying pyscf.gto.Mole. instance.""" + molecule = gto.Mole() + + molecule.atom = self.geometry + molecule.basis = self.basis + molecule.spin = self.multiplicity - 1 + molecule.charge = self.charge + molecule.symmetry = False + + molecule.build() + + assert int(molecule.nao_nr()) == self.n_orb + assert molecule.nelectron == self.n_elec + + return molecule + + +@attrs.frozen +class HamiltonianData(data.Data): + """Data class that contains information about a Hamiltonian. + + Args: + params: parameters defining the hamiltonian. + e_core: The nuclear-nuclear repulsion energy (or any constant term from the Hamiltonian.) + one_body_integrals: The one-electron integrals as a matrix. + two_body_integrals: The two-electron integrals using the openfermion convention. + e_hf: The restricted hartree fock energy of the molecule. + e_fci: The FCI energy for the molecule / hamiltonian. + """ + + params: HamiltonianParams + e_core: float + one_body_integrals: np.ndarray = attrs.field(converter=np.asarray) + two_body_integrals_pqrs: np.ndarray = attrs.field(converter=np.asarray) + e_hf: float + e_fci: float + + def _json_dict_(self): + return attrs.asdict(self) + + def get_molecular_hamiltonian(self) -> of.InteractionOperator: + """Gets the OpenFermion Hamiltonian from a HamiltonianData object.""" + one_body_coefficients, two_body_coefficients = spinorb_from_spatial( + one_body_integrals=self.one_body_integrals, + two_body_integrals=self.two_body_integrals_pqrs, + ) + + molecular_hamiltonian = of.InteractionOperator( + constant=self.e_core, + one_body_tensor=one_body_coefficients, + two_body_tensor=1 / 2 * two_body_coefficients, + ) + + return molecular_hamiltonian + + def get_restricted_fqe_hamiltonian(self) -> RestrictedHamiltonian: + """Gets an Fqe RestrictedHamiltonian from a HamiltonianData object.""" + + return integrals_to_fqe_restricted( + h1e=self.one_body_integrals, h2e=self.two_body_integrals_pqrs + ) + + +def load_integrals( + *, filepath: Union[str, Path] +) -> Tuple[float, np.ndarray, np.ndarray, float]: + """Load integrals from a checkpoint file. + + Args: + filepath: The path of the .h5 file containing the integrals. + """ + with h5py.File(filepath, "r") as f: + ecore = float(f["ecore"][()]) + one_body_integrals = np.asarray(f["h1"][:, :]) + efci = float(f["efci"][()]) + two_body_integrals = np.asarray(f["h2"][:, :]) + + return ecore, one_body_integrals, two_body_integrals, efci + + +def spinorb_from_spatial( + one_body_integrals: np.ndarray, + two_body_integrals: np.ndarray, + truncation_tol: float = 1e-9, +) -> Tuple[np.ndarray, np.ndarray]: + """Convert integrals from spatial to spin-orbital form. + + Args: + one_body_integrals: The one-electron integrals in spatial orbital format. + two_body_integrals: The two-electron integrals in spatial oribtal format. + truncation_tol: The value below which integrals are considered zero. + + Returns: + one_body_coefficients: The one-body coefficients in spin-orbital format. + two_body_coefficients: The two-body coefficients in spin-orbital format. + """ + + n_qubits = 2 * one_body_integrals.shape[0] + # Initialize Hamiltonian coefficients. + one_body_coefficients = np.zeros((n_qubits, n_qubits)) + two_body_coefficients = np.zeros((n_qubits, n_qubits, n_qubits, n_qubits)) + # Loop through integrals. + for p in range(n_qubits // 2): + for q in range(n_qubits // 2): + # Populate 1-body coefficients. Require p and q have same spin. + one_body_coefficients[2 * p, 2 * q] = one_body_integrals[p, q] + one_body_coefficients[2 * p + 1, 2 * q + 1] = one_body_integrals[p, q] + # Continue looping to prepare 2-body coefficients. + for r in range(n_qubits // 2): + for s in range(n_qubits // 2): + # Mixed spin + two_body_coefficients[2 * p, 2 * q + 1, 2 * r + 1, 2 * s] = ( + two_body_integrals[p, q, r, s] + ) + two_body_coefficients[2 * p + 1, 2 * q, 2 * r, 2 * s + 1] = ( + two_body_integrals[p, q, r, s] + ) + + # Same spin + two_body_coefficients[2 * p, 2 * q, 2 * r, 2 * s] = ( + two_body_integrals[p, q, r, s] + ) + two_body_coefficients[ + 2 * p + 1, 2 * q + 1, 2 * r + 1, 2 * s + 1 + ] = two_body_integrals[p, q, r, s] + + # Truncate. + one_body_coefficients[np.absolute(one_body_coefficients) < truncation_tol] = 0.0 + two_body_coefficients[np.absolute(two_body_coefficients) < truncation_tol] = 0.0 + + return one_body_coefficients, two_body_coefficients + + +def _cast_to_float(x: np.complex_, tol=1e-8) -> float: + assert np.abs(x.imag) < tol, "Large imaginary component found." + return float(x.real) + + +def build_hamiltonian_from_file( + params: HamiltonianFileParams, +) -> HamiltonianData: + """Function for loading a Hamiltonian from a file. + + Args: + params: parameters defining the hamiltonian. + + Returns: + properly constructed HamiltonianFileParams object. + """ + filepath = data.get_integrals_path(name=params.integral_key) + e_core, one_body_integrals, two_body_integrals_psqr, e_fci = load_integrals( + filepath=filepath + ) + + n_orb = params.n_orb + + if params.do_eri_restore: + two_body_integrals_psqr = ao2mo.restore(1, two_body_integrals_psqr, n_orb) + # This step may be necessary depending on the format in which the integrals were stored. + + # We have to reshape to a four index tensor and reorder the indices from + # chemist's ordering to physicist's. + two_body_integrals_psqr = two_body_integrals_psqr.reshape( + (n_orb, n_orb, n_orb, n_orb) + ) + two_body_integrals_pqrs = np.einsum("psqr->pqrs", two_body_integrals_psqr) + + fqe_ham = integrals_to_fqe_restricted( + h1e=one_body_integrals, h2e=two_body_integrals_pqrs + ) + + initial_wf = fqe.Wavefunction([[params.n_elec, 0, params.n_orb]]) + initial_wf.set_wfn(strategy="hartree-fock") + e_hf = _cast_to_float(initial_wf.expectationValue(fqe_ham) + e_core) + + return HamiltonianData( + params=params, + e_core=e_core, + one_body_integrals=one_body_integrals, + two_body_integrals_pqrs=two_body_integrals_pqrs, + e_hf=e_hf, + e_fci=e_fci, + ) + + +def build_hamiltonian_from_pyscf(params: PyscfHamiltonianParams) -> HamiltonianData: + """Construct a HamiltonianData object from pyscf molecule. + + Runs a RHF or ROHF simulation, performs an integral transformation and computes the FCI energy. + + Args: + params: A PyscfHamiltonianParams object specifying the molecule. pyscf_molecule is required. + + Returns: + A HamiltonianData object. + """ + molecule = params.pyscf_molecule + + if params.rhf: + pyscf_scf = scf.RHF(molecule) + else: + pyscf_scf = scf.ROHF(molecule) + + pyscf_scf.verbose = params.verbose_scf + if params.save_chkfile: + if not params.overwrite_chk_file: + if params.chk_path.exists(): + raise FileExistsError(f"A chk file already exists at {params.chk_path}") + pyscf_scf.chkfile = str(params.chk_path.resolve()) + pyscf_scf = pyscf_scf.newton() + pyscf_scf.run() + + e_hf = float(pyscf_scf.e_tot) + + one_body_integrals, two_body_integrals = compute_integrals( + pyscf_molecule=molecule, pyscf_scf=pyscf_scf + ) + + e_core = float(molecule.energy_nuc()) + + pyscf_fci = fci.FCI(molecule, pyscf_scf.mo_coeff) + pyscf_fci.verbose = 0 + e_fci = pyscf_fci.kernel()[0] + + return HamiltonianData( + params=params, + e_core=e_core, + one_body_integrals=one_body_integrals, + two_body_integrals_pqrs=two_body_integrals, + e_hf=e_hf, + e_fci=e_fci, + ) diff --git a/recirq/qcqmc/hamiltonian_test.py b/recirq/qcqmc/hamiltonian_test.py new file mode 100644 index 00000000..17215659 --- /dev/null +++ b/recirq/qcqmc/hamiltonian_test.py @@ -0,0 +1,168 @@ +# Copyright 2024 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest +from openfermion import ( + get_fermion_operator, + get_ground_state, + get_number_preserving_sparse_operator, +) + +from recirq.qcqmc.hamiltonian import ( + HamiltonianFileParams, + PyscfHamiltonianParams, + build_hamiltonian_from_file, + build_hamiltonian_from_pyscf, +) + + +def test_load_from_file_hamiltonian_runs(): + params = HamiltonianFileParams( + name="test hamiltonian", integral_key="fh_sto3g", n_orb=2, n_elec=2 + ) + + hamiltonian_data = build_hamiltonian_from_file(params) + + assert hamiltonian_data.one_body_integrals.shape == (2, 2) + assert hamiltonian_data.two_body_integrals_pqrs.shape == (2, 2, 2, 2) + + +@pytest.mark.parametrize( + "integral_key, n_orb, n_elec, do_eri_restore", + [ + ("fh_sto3g", 2, 2, False), + ("pabi_def2svp", 2, 2, False), + ("h4_sto3g", 4, 4, False), + ("diamond_dzvp/cas66", 6, 6, True), + ], +) +def test_hamiltonian_energy_consistent( + integral_key: str, n_orb: int, n_elec: int, do_eri_restore: bool +): + params = HamiltonianFileParams( + name="test hamiltonian", + integral_key=integral_key, + n_orb=n_orb, + n_elec=n_elec, + do_eri_restore=do_eri_restore, + ) + + hamiltonian_data = build_hamiltonian_from_file(params) + + molecular_hamiltonian = hamiltonian_data.get_molecular_hamiltonian() + + ham_matrix = get_number_preserving_sparse_operator( + get_fermion_operator(molecular_hamiltonian), n_orb * 2, n_elec, True + ) + energy, _ = get_ground_state(ham_matrix) + + np.testing.assert_almost_equal(energy, hamiltonian_data.e_fci) + + +def test_pyscf_h4_consistent_with_file(): + pyscf_params = PyscfHamiltonianParams( + name="TEST_Square H4", + n_orb=4, + n_elec=4, + geometry=( + ("H", (0, 0, 0)), + ("H", (0, 0, 1.23)), + ("H", (1.23, 0, 0)), + ("H", (1.23, 0, 1.23)), + ), + basis="sto3g", + multiplicity=1, + charge=0, + ) + + pyscf_hamiltonian = build_hamiltonian_from_pyscf(pyscf_params) + + from_file_params = HamiltonianFileParams( + name="test hamiltonian", + integral_key="h4_sto3g", + n_orb=4, + n_elec=4, + do_eri_restore=False, + ) + + from_file_hamiltonian = build_hamiltonian_from_file(from_file_params) + + np.testing.assert_almost_equal( + pyscf_hamiltonian.e_core, from_file_hamiltonian.e_core, decimal=10 + ) + np.testing.assert_almost_equal( + pyscf_hamiltonian.e_hf, from_file_hamiltonian.e_hf, decimal=10 + ) + np.testing.assert_almost_equal( + pyscf_hamiltonian.e_fci, from_file_hamiltonian.e_fci, decimal=10 + ) + + +def test_pyscf_saves_chk_without_overwrite(tmp_path): + pyscf_params = PyscfHamiltonianParams( + name="TEST Square H4 chk save test", + n_orb=4, + n_elec=4, + geometry=( + ("H", (0, 0, 0)), + ("H", (0, 0, 1.23)), + ("H", (1.23, 0, 0)), + ("H", (1.23, 0, 1.23)), + ), + basis="sto3g", + multiplicity=1, + charge=0, + save_chkfile=True, + path_prefix=str(tmp_path), + ) + + chk_path = pyscf_params.base_path.with_suffix(".chk") + chk_path.unlink(missing_ok=True) + + build_hamiltonian_from_pyscf(pyscf_params) + assert chk_path.exists() + + with pytest.raises(FileExistsError): + pyscf_hamiltonian = build_hamiltonian_from_pyscf(pyscf_params) + + chk_path.unlink() + + +def test_pyscf_saves_chk_with_overwrite(tmp_path): + pyscf_params = PyscfHamiltonianParams( + name="TEST Square H4 chk save test", + n_orb=4, + n_elec=4, + geometry=( + ("H", (0, 0, 0)), + ("H", (0, 0, 1.23)), + ("H", (1.23, 0, 0)), + ("H", (1.23, 0, 1.23)), + ), + basis="sto3g", + multiplicity=1, + charge=0, + save_chkfile=True, + overwrite_chk_file=True, + path_prefix=str(tmp_path), + ) + + chk_path = pyscf_params.base_path.with_suffix(".chk") + chk_path.unlink(missing_ok=True) + + build_hamiltonian_from_pyscf(pyscf_params) + + assert chk_path.exists() + + chk_path.unlink() diff --git a/recirq/qcqmc/shadow_tomography.py b/recirq/qcqmc/shadow_tomography.py index 9524570b..7a2eb940 100644 --- a/recirq/qcqmc/shadow_tomography.py +++ b/recirq/qcqmc/shadow_tomography.py @@ -1,3 +1,16 @@ +# Copyright 2024 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from math import gcd from typing import Dict, Iterable, List, Sequence, Type, cast diff --git a/recirq/qcqmc/shadow_tomography_test.py b/recirq/qcqmc/shadow_tomography_test.py index 67516903..1db41ba1 100644 --- a/recirq/qcqmc/shadow_tomography_test.py +++ b/recirq/qcqmc/shadow_tomography_test.py @@ -1,3 +1,17 @@ +# Copyright 2024 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import cirq import numpy as np import quaff