diff --git a/fast_pauli/cpp/examples/03_summed_pauli_op.cpp b/fast_pauli/cpp/examples/03_summed_pauli_op.cpp index 2885e65..32a0bb6 100644 --- a/fast_pauli/cpp/examples/03_summed_pauli_op.cpp +++ b/fast_pauli/cpp/examples/03_summed_pauli_op.cpp @@ -28,8 +28,7 @@ int main() // User settings // size_t const n_operators = 1000; - // size_t const n_paulis_per_operator = 631; - size_t const n_qubits = 14; + size_t const n_qubits = 12; size_t const weight = 2; size_t const n_states = 1000; using fp_type = double; diff --git a/fast_pauli/cpp/include/__summed_pauli_op.hpp b/fast_pauli/cpp/include/__summed_pauli_op.hpp index 011ef10..bb3230c 100644 --- a/fast_pauli/cpp/include/__summed_pauli_op.hpp +++ b/fast_pauli/cpp/include/__summed_pauli_op.hpp @@ -336,8 +336,9 @@ template struct SummedPauliOp #pragma omp parallel { + // Contract the coeffs with the data since we can reuse this below -#pragma omp for schedule(static) collapse(2) +#pragma omp for schedule(dynamic) collapse(2) for (size_t j = 0; j < n_ps; ++j) { for (size_t t = 0; t < n_data; ++t) @@ -345,6 +346,7 @@ template struct SummedPauliOp for (size_t k = 0; k < n_ops; ++k) { // TODO we should transpose the data here for better memory access + // initial tests didn't show much improvement, but something to try later weighted_coeffs(j, t) += coeffs(j, k) * data(k, t); } } @@ -354,13 +356,10 @@ template struct SummedPauliOp std::vector> new_states_j_raw(n_data * n_dim); Tensor<2> new_states_j(new_states_j_raw.data(), n_dim, n_data); - // std::vector> states_j_T_raw(n_data * n_dim); - // Tensor<2> states_j_T(states_j_T_raw.data(), n_data, n_dim); - std::mdspan new_states_th_local = std::submdspan(new_states_th, omp_get_thread_num(), std::full_extent, std::full_extent); -#pragma omp for schedule(static) +#pragma omp for schedule(dynamic) for (size_t j = 0; j < n_ps; ++j) { // new psi_prime @@ -375,9 +374,8 @@ template struct SummedPauliOp } } } - -// Reduce -#pragma omp for schedule(static) collapse(2) + // Reduce +#pragma omp for schedule(dynamic) collapse(2) for (size_t l = 0; l < n_dim; ++l) { for (size_t t = 0; t < n_data; ++t) @@ -490,22 +488,37 @@ template struct SummedPauliOp if constexpr (is_parallel_execution_policy_v) { -#pragma omp parallel for schedule(static) - for (size_t j = 0; j < n_pauli_strings(); ++j) +#pragma omp parallel { - std::mdspan expectation_vals_j = std::submdspan(expectation_vals, j, std::full_extent); - pauli_strings[j].expectation_value(expectation_vals_j, states); - } +#pragma omp for schedule(static) + for (size_t j = 0; j < n_pauli_strings(); ++j) + { + std::mdspan expectation_vals_j = std::submdspan(expectation_vals, j, std::full_extent); + pauli_strings[j].expectation_value(expectation_vals_j, states); + } - // einsum("jk,jt->kt", coeffs, expectation_vals) -#pragma omp parallel for collapse(2) - for (size_t t = 0; t < n_data; ++t) - { + // einsum("jk,jt->kt", coeffs, expectation_vals) + std::vector> coeffs_T_raw(n_ops * n_pauli_strings()); + std::mdspan coeffs_T(coeffs_T_raw.data(), n_ops, n_pauli_strings()); + +#pragma omp for schedule(static) collapse(2) for (size_t k = 0; k < n_ops; ++k) { for (size_t j = 0; j < n_pauli_strings(); ++j) { - expectation_vals_out(k, t) += coeffs(j, k) * expectation_vals(j, t); + coeffs_T(k, j) = coeffs(j, k); + } + } + +#pragma omp for collapse(2) schedule(static) + for (size_t k = 0; k < n_ops; ++k) + { + for (size_t t = 0; t < n_data; ++t) + { + for (size_t j = 0; j < n_pauli_strings(); ++j) + { + expectation_vals_out(k, t) += coeffs(j, k) * expectation_vals(j, t); + } } } } diff --git a/fast_pauli/cpp/src/fast_pauli.cpp b/fast_pauli/cpp/src/fast_pauli.cpp index dd27bbf..7b02abc 100644 --- a/fast_pauli/cpp/src/fast_pauli.cpp +++ b/fast_pauli/cpp/src/fast_pauli.cpp @@ -1068,7 +1068,7 @@ np.ndarray { auto states_mdspan = fp::__detail::ndarray_to_mdspan(states); auto states_mdspan_2d = std::mdspan(states_mdspan.data_handle(), states_mdspan.size(), 1); - std::array out_shape = {1}; + std::array out_shape = {self.n_operators()}; auto expected_vals_out = fp::__detail::owning_ndarray_from_shape(out_shape); auto expected_vals_out_mdspan = std::mdspan(expected_vals_out.data(), expected_vals_out.size(), 1); self.expectation_value(std::execution::par, expected_vals_out_mdspan, states_mdspan_2d); diff --git a/fast_pauli/cpp/tests/test_summed_pauli_op.cpp b/fast_pauli/cpp/tests/test_summed_pauli_op.cpp index a8c9670..df4a82f 100644 --- a/fast_pauli/cpp/tests/test_summed_pauli_op.cpp +++ b/fast_pauli/cpp/tests/test_summed_pauli_op.cpp @@ -287,10 +287,10 @@ TEST_CASE("apply weighted many operators many PauliString") { fmt::print("\n\napply many operators many PauliString\n"); // Setup operator - std::vector pauli_strings = fast_pauli::calculate_pauli_strings_max_weight(8, 2); + std::vector pauli_strings = fast_pauli::calculate_pauli_strings_max_weight(6, 2); std::vector> coeff_raw; - std::mdspan coeff = fast_pauli::rand, 2>(coeff_raw, {pauli_strings.size(), 100}); + std::mdspan coeff = fast_pauli::rand, 2>(coeff_raw, {pauli_strings.size(), 1000}); auto start_seq = std::chrono::high_resolution_clock::now(); __check_apply_weighted(std::execution::seq, pauli_strings, coeff, 100); diff --git a/tests/fast_pauli/test_summed_pauli_op.py b/tests/fast_pauli/test_summed_pauli_op.py index 8faed20..a1a0981 100644 --- a/tests/fast_pauli/test_summed_pauli_op.py +++ b/tests/fast_pauli/test_summed_pauli_op.py @@ -150,28 +150,29 @@ def test_expectation_values( n_strings = len(pauli_strings) coeffs_2d = np.random.rand(n_strings, n_operators).astype(np.complex128) - psi = np.random.rand(2**n_qubits, n_states).astype(np.complex128) - - # For a single state, test that a 1D array works - if n_states == 1: - psi = psi[:, 0] - - op = summed_pauli_op(pauli_strings, coeffs_2d) - - # The expectation values we want to test - expectation_vals = op.expectation_value(psi) - - # The "trusted" expectation_values - expectation_vals_naive = np.zeros((n_operators, n_states), dtype=np.complex128) - # For a single state, test that a 1D array works if n_states == 1: - expectation_vals_naive = expectation_vals_naive[:, 0] - - # Calculate using brute force - for k in range(n_operators): - A_k = pp.helpers.naive_pauli_operator(coeffs_2d[:, k], pauli_strings_str) - expectation_vals_naive[k] = np.einsum("it,ij,jt->t", psi.conj(), A_k, psi) + psi = np.random.rand(2**n_qubits).astype(np.complex128) + op = summed_pauli_op(pauli_strings, coeffs_2d) + # The expectation values we want to test + expectation_vals = op.expectation_value(psi) + # The "trusted" expectation_values + expectation_vals_naive = np.zeros((n_operators), dtype=np.complex128) + for k in range(n_operators): + A_k = pp.helpers.naive_pauli_operator(coeffs_2d[:, k], pauli_strings_str) + expectation_vals_naive[k] = np.einsum("i,ij,j->", psi.conj(), A_k, psi) + + else: + psi = np.random.rand(2**n_qubits, n_states).astype(np.complex128) + op = summed_pauli_op(pauli_strings, coeffs_2d) + # The expectation values we want to test + expectation_vals = op.expectation_value(psi) + # The "trusted" expectation_values + expectation_vals_naive = np.zeros((n_operators, n_states), dtype=np.complex128) + + for k in range(n_operators): + A_k = pp.helpers.naive_pauli_operator(coeffs_2d[:, k], pauli_strings_str) + expectation_vals_naive[k] = np.einsum("it,ij,jt->t", psi.conj(), A_k, psi) # Check np.testing.assert_allclose(expectation_vals, expectation_vals_naive, atol=1e-13)