diff --git a/include/ad.hpp b/include/ad.hpp index 5275e8b..6ed88a4 100644 --- a/include/ad.hpp +++ b/include/ad.hpp @@ -835,7 +835,8 @@ void propagateAD(pVector& m0, pMatrix& cv0, System& sys, drivers.integrate(m0); if (degree > 1) { - for (size_t p = 0; p < dim; ++p) { + for (size_t p = start; p < end; ++p) { + // for (size_t p = 0; p < dim; ++p) { for (size_t i = 0; i < dim; ++i) { for (size_t j = 0; j < dim; ++j) { m0[p] += (1.0/2.0)*H[p][i][j]*cv0[i][j]; @@ -843,6 +844,7 @@ void propagateAD(pVector& m0, pMatrix& cv0, System& sys, } } } + paduprop_gather(m0); global_prof.end("propagateMU"); if(paduprop_getrank() == 0) { @@ -852,8 +854,8 @@ void propagateAD(pVector& m0, pMatrix& cv0, System& sys, // Propagate covariance global_prof.begin("propagateCOV"); - for (size_t pn = 0; pn < dim; ++pn) { - for (size_t pm = 0; pm < dim; ++pm) { + for (size_t pm = start; pm < end; ++pm) { + for (size_t pn = 0; pn < dim; ++pn) { for (size_t i = 0; i < dim; ++i) { for (size_t j = 0; j < dim; ++j) { cv_temp[pn][pm] += 0.5*((J[pn][j]*J[pm][i] + @@ -862,6 +864,7 @@ void propagateAD(pVector& m0, pMatrix& cv0, System& sys, } } } + paduprop_gather(cv_temp); if (degree > 2) { if(paduprop_getrank() == 0) { diff --git a/include/parallel.hpp b/include/parallel.hpp index d1a0e32..385c96b 100644 --- a/include/parallel.hpp +++ b/include/parallel.hpp @@ -14,6 +14,9 @@ size_t paduprop_getend(size_t dim); int paduprop_getrank(); int paduprop_getcommsize(); int paduprop_sum(pMatrix&); +int paduprop_sum(pVector &vec); int paduprop_gather(pTensor3 &ten); +int paduprop_gather(pMatrix &mat); +int paduprop_gather(pVector &vec); #endif // ADUPROP_PARALLEL_HPP_ diff --git a/src/parallel.cpp b/src/parallel.cpp index 1c0e27c..201c705 100644 --- a/src/parallel.cpp +++ b/src/parallel.cpp @@ -87,6 +87,12 @@ int paduprop_sum(pMatrix &mat) { return MPI_Allreduce(MPI_IN_PLACE, ptr, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } +int paduprop_sum(pVector &vec) { + size_t size = vec.dim(); + double *ptr = vec.get_datap(); + return MPI_Allreduce(MPI_IN_PLACE, ptr, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +} + int paduprop_gather(pTensor3 &ten) { size_t dim = ten.get_d1(); size_t start = paduprop_getstart(dim); @@ -110,3 +116,51 @@ int paduprop_gather(pTensor3 &ten) { delete [] sendbuf; return ierr; } + +int paduprop_gather(pMatrix &mat) { + size_t dim = mat.nrows(); + size_t start = paduprop_getstart(dim); + size_t end = paduprop_getend(dim); + size_t size = (end-start)*dim; + double *sendbuf = new double[size]; + for(size_t i = 0; i < size; ++i) sendbuf[i] = *(&mat[0][start] + i); + int *recvcounts = new int[paduprop_getcommsize()]; + int *displs = new int[paduprop_getcommsize()]; + int count = 0; + for(int i = 0; i < paduprop_getcommsize(); ++i) { + displs[i] = count; + recvcounts[i] = (paduprop_getend(dim,i) - paduprop_getstart(dim,i))*dim; + count += recvcounts[i]; + } + int ierr = MPI_Allgatherv(sendbuf, size, MPI_DOUBLE, + &mat[0][0], recvcounts, displs, MPI_DOUBLE, + MPI_COMM_WORLD); + delete [] recvcounts; + delete [] displs; + delete [] sendbuf; + return ierr; +} + +int paduprop_gather(pVector &vec) { + size_t dim = vec.dim(); + size_t start = paduprop_getstart(dim); + size_t end = paduprop_getend(dim); + size_t size = end-start; + double *sendbuf = new double[size]; + for(size_t i = 0; i < size; ++i) sendbuf[i] = vec[start+i]; + int *recvcounts = new int[paduprop_getcommsize()]; + int *displs = new int[paduprop_getcommsize()]; + int count = 0; + for(int i = 0; i < paduprop_getcommsize(); ++i) { + displs[i] = count; + recvcounts[i] = (paduprop_getend(dim,i) - paduprop_getstart(dim,i)); + count += recvcounts[i]; + } + int ierr = MPI_Allgatherv(sendbuf, size, MPI_DOUBLE, + &vec[0], recvcounts, displs, MPI_DOUBLE, + MPI_COMM_WORLD); + delete [] recvcounts; + delete [] displs; + delete [] sendbuf; + return ierr; +}