From 85f10024e56dedae2ea35ae4fb0e380b1e4a014a Mon Sep 17 00:00:00 2001 From: jamesETsmith Date: Fri, 1 Nov 2024 15:57:27 -0400 Subject: [PATCH] [feature/SPO_apply] First stab at C++ impl of optimized squaring of SummedPauliOp --- fast_pauli/cpp/include/__summed_pauli_op.hpp | 78 +++++++++++++++++++ fast_pauli/cpp/src/fast_pauli.cpp | 8 ++ fast_pauli/cpp/tests/test_summed_pauli_op.cpp | 61 +++++++++++++++ 3 files changed, 147 insertions(+) diff --git a/fast_pauli/cpp/include/__summed_pauli_op.hpp b/fast_pauli/cpp/include/__summed_pauli_op.hpp index bb3230c..49a32ea 100644 --- a/fast_pauli/cpp/include/__summed_pauli_op.hpp +++ b/fast_pauli/cpp/include/__summed_pauli_op.hpp @@ -20,6 +20,7 @@ #include #include +#include #include "__pauli_string.hpp" #include "__type_traits.hpp" @@ -45,6 +46,7 @@ template struct SummedPauliOp // TODO dangerous size_t _dim; size_t _n_operators; + size_t _n_qubits; void __check_ctor_inputs(std::vector const &pauli_strings, Tensor<2> const coeffs) { @@ -88,6 +90,7 @@ template struct SummedPauliOp // TODO add more checks size_t const n_pauli_strings = pauli_strings.size(); _dim = pauli_strings[0].dim(); + _n_qubits = pauli_strings[0].n_qubits(); _n_operators = coeffs_raw.size() / n_pauli_strings; coeffs = Tensor<2>(this->coeffs_raw.data(), n_pauli_strings, _n_operators); @@ -111,6 +114,7 @@ template struct SummedPauliOp // _dim = pauli_strings[0].dim(); + _n_qubits = pauli_strings[0].n_qubits(); _n_operators = coeffs.extent(1); // Copy over the coeffs so our std::mdspan points at the memory owned by @@ -145,6 +149,7 @@ template struct SummedPauliOp // _dim = this->pauli_strings.front().dim(); + _n_qubits = this->pauli_strings.front().n_qubits(); _n_operators = coeffs.extent(1); // Copy over the coeffs so our std::mdspan points at the memory owned by @@ -168,6 +173,16 @@ template struct SummedPauliOp return _dim; } + /** + * @brief Return the number of qubits in the SummedPauliOp + * + * @return size_t + */ + size_t n_qubits() const noexcept + { + return _n_qubits; + } + /** * @brief Return the number of Pauli operators in the SummedPauliOp * @@ -188,6 +203,69 @@ template struct SummedPauliOp return pauli_strings.size(); } + fast_pauli::SummedPauliOp square() const + { + // Get the squared pauli strings + size_t weight = std::reduce( + pauli_strings.begin(), pauli_strings.end(), size_t(0), + [](size_t a, fast_pauli::PauliString const &ps) { return std::max(a, static_cast(ps.weight)); }); + fmt::println("weight: {}", weight); + std::vector pauli_strings_sq = + fast_pauli::calculate_pauli_strings_max_weight(_n_qubits, std::min(_n_qubits, 2UL * weight)); + + std::unordered_map sq_idx_map; + for (size_t i = 0; i < pauli_strings_sq.size(); ++i) + { + sq_idx_map[pauli_strings_sq[i]] = i; + } + + // Create the T_aij tensor that maps the coeffiencts from h_i * h_j to h'_a + // these are the same for all k operators + std::vector> t_aij_raw(pauli_strings_sq.size() * pauli_strings.size() * pauli_strings.size()); + Tensor<3> t_aij(t_aij_raw.data(), pauli_strings_sq.size(), pauli_strings.size(), pauli_strings.size()); + + // #pragma omp parallel for collapse(2) + for (size_t i = 0; i < pauli_strings.size(); ++i) + { + for (size_t j = 0; j < pauli_strings.size(); ++j) + { + std::complex phase; + fast_pauli::PauliString prod; + std::tie(phase, prod) = pauli_strings[i] * pauli_strings[j]; + size_t a = sq_idx_map[prod]; + t_aij(a, i, j) = phase; + } + } + + // Contract the T_aij tensor with the coeffs to get the new coeffs + std::vector> coeffs_sq_raw(pauli_strings_sq.size() * _n_operators); + Tensor<2> coeffs_sq(coeffs_sq_raw.data(), pauli_strings_sq.size(), _n_operators); + + // We want to do the following einsum: + // coeffs_sq(a, k) = sum_{i,j} t_aij(a, i, j) * coeffs(i, k) * coeffs(j, k) + // If we break it up into two steps, the intermediates will be too big, so we're + // just going to do it all at once + + // #pragma omp parallel for collapse(2) + fmt::println("{} {} {} {}", pauli_strings_sq.size(), _n_operators, pauli_strings.size(), pauli_strings.size()); + for (size_t a = 0; a < pauli_strings_sq.size(); ++a) + { + for (size_t k = 0; k < _n_operators; ++k) + { + for (size_t i = 0; i < pauli_strings.size(); ++i) + { + for (size_t j = 0; j < pauli_strings.size(); ++j) + { + // fmt::println("{} {} {} {}", a, k, i, j); + coeffs_sq(a, k) += t_aij(a, i, j) * coeffs(i, k) * coeffs(j, k); + } + } + } + } + + return SummedPauliOp(pauli_strings_sq, coeffs_sq); + } + /** * @brief Apply the SummedPauliOp to a set of states, mathematically * \f$ \big(\sum_k \sum_i h_{ik} \mathcal{\hat{P}}_i \big) \ket{\psi_t} \f$ diff --git a/fast_pauli/cpp/src/fast_pauli.cpp b/fast_pauli/cpp/src/fast_pauli.cpp index 7b02abc..f5a82d0 100644 --- a/fast_pauli/cpp/src/fast_pauli.cpp +++ b/fast_pauli/cpp/src/fast_pauli.cpp @@ -1119,6 +1119,14 @@ Returns ------- np.ndarray 3D numpy array of complex numbers with a shape of (n_operators, 2^n_qubits, 2^n_qubits) +)%") + // + .def("square", &fp::SummedPauliOp::square, R"%(Square the SummedPauliOp. + +Returns +------- +SummedPauliOp + New SummedPauliOp instance )%"); // ; diff --git a/fast_pauli/cpp/tests/test_summed_pauli_op.cpp b/fast_pauli/cpp/tests/test_summed_pauli_op.cpp index d34f089..c354334 100644 --- a/fast_pauli/cpp/tests/test_summed_pauli_op.cpp +++ b/fast_pauli/cpp/tests/test_summed_pauli_op.cpp @@ -407,3 +407,64 @@ TEST_CASE("expectation values multiple operators and states") elapsed = end - start; fmt::println("Time taken for parallel execution: {} seconds", elapsed.count()); } + +void __check_square(size_t const n_qubits, size_t const n_operators) +{ + // Check that the square function works by use op.to_tensor() to create a dense tensor and then squaring the + // operators manually + size_t const dim = 1UL << n_qubits; + std::vector pauli_strings = fast_pauli::calculate_pauli_strings_max_weight(n_qubits, 2); + std::vector> coeff_raw; + std::mdspan coeff = fast_pauli::rand, 2>(coeff_raw, {pauli_strings.size(), n_operators}); + SummedPauliOp op(pauli_strings, coeff); + + fmt::println("Squaring operator"); + SummedPauliOp op2 = op.square(); + + fmt::println("Converting to dense tensor"); + std::vector> op1_dense_raw(n_operators * dim * dim); + std::mdspan op1_dense(op1_dense_raw.data(), n_operators, dim, dim); + op.to_tensor(op1_dense); + + std::vector> op2_dense_raw(n_operators * dim * dim); + std::mdspan op2_dense(op2_dense_raw.data(), n_operators, dim, dim); + op2.to_tensor(op2_dense); + + std::vector> op1_sq_raw(n_operators * dim * dim); + std::mdspan op1_sq(op1_sq_raw.data(), n_operators, dim, dim); + + fmt::println("Squaring dense tensor"); + // Doing the einsum + // op1_sq(k,a,c) = sum_b op1_dense(k,a,b) * op1_dense(k,b,c) +#pragma omp parallel for collapse(3) + for (size_t k = 0; k < n_operators; ++k) + { + for (size_t i = 0; i < dim; ++i) + { + for (size_t j = 0; j < dim; ++j) + { + for (size_t b = 0; b < dim; ++b) + { + op1_sq(k, i, j) += op1_dense(k, i, b) * op1_dense(k, b, j); + } + } + } + } + + fmt::println("Checking results"); + // Check the results + for (size_t k = 0; k < n_operators; ++k) + { + for (size_t i = 0; i < dim; ++i) + { + for (size_t j = 0; j < dim; ++j) + { + CHECK(abs(op1_sq(k, i, j) - op2_dense(k, i, j)) < 1e-6); + } + } + } +} +TEST_CASE("square") +{ + __check_square(4, 10); +}