Skip to content

Commit

Permalink
Parallel Hessian. Changed tensor3 mem. layout
Browse files Browse the repository at this point in the history
  • Loading branch information
michel2323 committed Mar 20, 2018
1 parent 8de004a commit 491e0ea
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 17 deletions.
27 changes: 16 additions & 11 deletions include/ad.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,14 @@ void t1s_driver(const pVector<double> &xic, pMatrix<double> &J) {
\post "Hessian"
*/
void t2s_t1s_driver(const pVector<double> &xic,
pMatrix<double> &J, pTensor3<double> &H) {
pMatrix<double> &J, pTensor3<double> &H, size_t start = 0, size_t end = 0) {
prof.begin("t2s_t1s_driver");
size_t dim = xic.dim();

// no end argument is set, hence pick dim as end
if(end == 0) end = dim;
pVector<t2s> axic(dim);
for (size_t i = 0; i < dim; ++i) {
for (size_t i = start; i < end; ++i) {
for (size_t j = 0; j < dim; ++j) {
for (size_t k = 0; k < dim; ++k) {
axic[k].value().value() = xic[k];
Expand All @@ -271,7 +274,7 @@ void t2s_t1s_driver(const pVector<double> &xic,
// }
}
for (size_t j = 0; j < dim; ++j) {
J[j][i] = axic[j].value().gradient();
// J[j][i] = axic[j].value().gradient();
}
}
prof.end("t2s_t1s_driver");
Expand Down Expand Up @@ -787,14 +790,16 @@ void propagateAD(pVector<double>& m0, pMatrix<double>& cv0, System& sys,
switch(degree) {
case 3:
// drivers.t3s_t2s_t1s_driver(m0, J, H, T);
drivers.t2s_t1s_driver(m0, J, H);
drivers.t2s_t1s_driver(m0, J, H, start, end);
paduprop_gather(H);
// H.cutoff(0.7);
std::cout << "H nz: " << H.nz() << std::endl;
H_tmp=H;
std::cout << "Cutrate: " << cutrate << std::endl;
H_tmp.cutoff(cutrate);
std::cout << "H_tmp nz: " << H_tmp.nz() << std::endl;
drivers.t3s_t2s_t1s_driver(m0, J, H_tmp, T, start, end);
drivers.t1s_driver(m0, J);
// T.zeros();
break;
case 2:
Expand Down Expand Up @@ -881,12 +886,12 @@ void propagateAD(pVector<double>& m0, pMatrix<double>& cv0, System& sys,
aux += ptr_J[pn+i*dim]*ptr_T[pm*dim*dim*chunk+j*dim*chunk+k*chunk+l-start];
aux += ptr_J[pn+j*dim]*ptr_T[pm*dim*dim*chunk+i*dim*chunk+k*chunk+l-start];
aux += ptr_J[pn+k*dim]*ptr_T[pm*dim*dim*chunk+i*dim*chunk+j*chunk+l-start];
aux += ptr_H[pn*dim*dim+i*dim+j]*ptr_H[pm*dim*dim+k*dim+l];
aux += ptr_H[pn*dim*dim+i*dim+k]*ptr_H[pm*dim*dim+j*dim+l];
aux += ptr_H[pn*dim*dim+i*dim+l]*ptr_H[pm*dim*dim+j*dim+k];
aux += ptr_H[pn*dim*dim+j*dim+k]*ptr_H[pm*dim*dim+i*dim+l];
aux += ptr_H[pn*dim*dim+j*dim+l]*ptr_H[pm*dim*dim+i*dim+k];
aux += ptr_H[pn*dim*dim+k*dim+l]*ptr_H[pm*dim*dim+i*dim+j];
aux += ptr_H[pn+i*dim+j*dim*dim]*ptr_H[pm+k*dim+l*dim*dim];
aux += ptr_H[pn+i*dim+k*dim*dim]*ptr_H[pm+j*dim+l*dim*dim];
aux += ptr_H[pn+i*dim+l*dim*dim]*ptr_H[pm+j*dim+k*dim*dim];
aux += ptr_H[pn+j*dim+k*dim*dim]*ptr_H[pm+i*dim+l*dim*dim];
aux += ptr_H[pn+j*dim+l*dim*dim]*ptr_H[pm+i*dim+k*dim*dim];
aux += ptr_H[pn+k*dim+l*dim*dim]*ptr_H[pm+i*dim+j*dim*dim];
aux += ptr_T[pn*dim*dim*chunk+j*dim*chunk+k*chunk+l-start]*ptr_J[pm+i*dim];
aux += ptr_T[pn*dim*dim*chunk+i*dim*chunk+k*chunk+l-start]*ptr_J[pm+j*dim];
aux += ptr_T[pn*dim*dim*chunk+i*dim*chunk+j*chunk+l-start]*ptr_J[pm+k*dim];
Expand Down Expand Up @@ -915,7 +920,7 @@ void propagateAD(pVector<double>& m0, pMatrix<double>& cv0, System& sys,
aux += ptr_T[pn*dim*dim*chunk+i*dim*chunk+j*chunk+k-start]*ptr_J[pm+dim*l];
aux += ptr_J[pn+dim*l]*ptr_T[pm*dim*dim*chunk+i*dim*chunk+j*chunk+k-start];
aux *= (1.0/(24.0))*kurt;
aux -= (1.0/2.0)*(ptr_H[pn*dim*dim+i*dim+j]*ptr_H[pm*dim*dim+k*dim+l])*ptr_cv0[i+dim*j]*ptr_cv0[k+dim*l];
aux -= (1.0/2.0)*(ptr_H[pn+i*dim+j*dim*dim]*ptr_H[pm+k*dim+l*dim*dim])*ptr_cv0[i+dim*j]*ptr_cv0[k+dim*l];
}
ptr_cv_temp2[pn+dim*pm] += aux;
}
Expand Down
1 change: 1 addition & 0 deletions include/parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ size_t paduprop_getend(size_t dim);
int paduprop_getrank();
int paduprop_getcommsize();
int paduprop_sum(pMatrix<double>&);
int paduprop_gather(pTensor3<double> &ten);

#endif // ADUPROP_PARALLEL_HPP_
19 changes: 13 additions & 6 deletions include/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,29 @@ template <typename T> class pTensor3 {
class cd2 {
public:
T *ptr;
cd2(T *ptr_) : ptr(ptr_) {};
size_t &d2;
size_t &d3;
// cd2(T *ptr_) : ptr(ptr_) {};
cd2(T *ptr_, size_t &d2_, size_t &d3_) : ptr(ptr_), d2(d2_), d3(d3_) {};
T& operator[] (int i) {
return *(ptr+i);
return *(ptr+i*d2*d3);
}

};
T *ptr;
size_t &d1;
cd1(T *ptr_, size_t &d1_) : ptr(ptr_), d1(d1_) {};
// size_t &d1;
size_t &d2;
size_t &d3;
// cd1(T *ptr_, size_t &d1_) : ptr(ptr_), d1(d1_) {};
cd1(T *ptr_, size_t &d2_, size_t &d3_) : ptr(ptr_), d2(d2_), d3(d3_) {};
cd2 operator[] (int i) {
return cd2(ptr+i*d1);
return cd2(ptr+i*d2, d2, d3);
}

};
cd1 operator[] (int i) {
return cd1(data+i*d2*d3, d3);
// return cd1(data+i*d2*d3, d3);
return cd1(data+i, d2, d3);
}
friend std::ostream& operator<< <> ( std::ostream&, pTensor3<T>& );

Expand Down
50 changes: 50 additions & 0 deletions src/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ size_t paduprop_getstart(size_t dim) {
return start;
}

size_t paduprop_getstart(size_t dim, int rank) {
int commsize;
MPI_Comm_size(MPI_COMM_WORLD, &commsize);
size_t chunk = dim/commsize;
size_t remain = dim%commsize;
size_t addleft = 0;
if(rank <= (int) remain) addleft = rank;
if(rank > (int) remain) addleft = remain;
size_t start = rank*chunk + addleft;
if(rank == 0) start = 0;
return start;
}

size_t paduprop_getend(size_t dim) {
int rank; int commsize;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
Expand All @@ -43,6 +56,19 @@ size_t paduprop_getend(size_t dim) {
return end;
}

size_t paduprop_getend(size_t dim, int rank) {
int commsize;
MPI_Comm_size(MPI_COMM_WORLD, &commsize);
size_t chunk = dim/commsize;
size_t remain = dim%commsize;
size_t addright = 0;
if(rank < (int) remain) addright = rank+1;
if(rank >= (int) remain) addright = remain;
size_t end = (rank+1)*chunk + addright;
if(rank == commsize - 1) end = dim;
return end;
}

int paduprop_getrank() {
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
Expand All @@ -60,3 +86,27 @@ int paduprop_sum(pMatrix<double> &mat) {
double *ptr = mat.get_datap();
return MPI_Allreduce(MPI_IN_PLACE, ptr, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
}

int paduprop_gather(pTensor3<double> &ten) {
size_t dim = ten.get_d1();
size_t start = paduprop_getstart(dim);
size_t end = paduprop_getend(dim);
size_t size = (end-start)*dim*dim;
double *sendbuf = new double[size];
for(size_t i = 0; i < size; ++i) sendbuf[i] = *(&ten[0][0][start] + i);
int *recvcounts = new int[paduprop_getcommsize()];
int *displs = new int[paduprop_getcommsize()];
int count = 0;
for(int i = 0; i < paduprop_getcommsize(); ++i) {
displs[i] = count;
recvcounts[i] = (paduprop_getend(dim,i) - paduprop_getstart(dim,i))*dim*dim;
count += recvcounts[i];
}
int ierr = MPI_Allgatherv(sendbuf, size, MPI_DOUBLE,
&ten[0][0][0], recvcounts, displs, MPI_DOUBLE,
MPI_COMM_WORLD);
delete [] recvcounts;
delete [] displs;
delete [] sendbuf;
return ierr;
}

0 comments on commit 491e0ea

Please sign in to comment.