From 50a2cecdbd5203633c15c47fe4ef76fd1cfd93c3 Mon Sep 17 00:00:00 2001 From: Andrew Benson Date: Mon, 5 May 2014 12:05:09 -0700 Subject: [PATCH] Add OpenMP parallelism to "harmonize" - each OpenMP thread works on a different polygon. --- src/configure | 20 +++++++------- src/fframe.s.f | 1 + src/gspher.s.f | 2 ++ src/gsphera.s.f | 1 + src/gsubs.s.f | 3 +++ src/harmonize_polys.c | 61 +++++++++++++++++++++++++++++++++---------- src/ikrand.c | 6 +++++ 7 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/configure b/src/configure index 13aaf65..e3a0f3a 100755 --- a/src/configure +++ b/src/configure @@ -96,10 +96,10 @@ cat <> Makefile # Gnu CC = gcc -CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp F77 = gfortran -FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 +FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp STATICFLAGS:= -static #MAKE=gmake @@ -115,10 +115,10 @@ cat <> Makefile # Gnu CC = gcc -CFLAGS = -g -O3 -Wall -DMACOSX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -O3 -Wall -DMACOSX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp F77 = gfortran -FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 +FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp #STATICFLAGS:= -static-libgfortran STATICFLAGS:= -nodefaultlibs -lSystem -lgcc -lm -lgfortran_static # static linking of gfortran library to compile for distribution. @@ -147,10 +147,10 @@ cat <> Makefile # Gnu CC = gcc -CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 -fopenmp F77 = gfortran -FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c -D_FILE_OFFSET_BITS=64 +FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c -D_FILE_OFFSET_BITS=64 -fopenmp #STATICFLAGS:= -static-libgfortran STATICFLAGS:= -nodefaultlibs -lSystem -lgcc -lm -lgfortran_static # static linking of gfortran library to compile for distribution. @@ -205,7 +205,7 @@ cat <> Makefile # Gnu CC = gcc -CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp ## f2c #F77 = fort77 @@ -215,7 +215,7 @@ CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 # gnu g77 (which assumes no SAVE) F77 = g77 -FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 $MFLAG -D_FILE_OFFSET_BITS=64 +FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp #MAKE=gmake @@ -250,11 +250,11 @@ cat <> Makefile # Gnu CC = gcc -CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 +CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 -fopenmp # gnu g77 (which assumes no SAVE) F77 = g77 -FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 -D_FILE_OFFSET_BITS=64 +FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 -D_FILE_OFFSET_BITS=64 -fopenmp #MAKE=gmake diff --git a/src/fframe.s.f b/src/fframe.s.f index 260289a..8cea6e2 100644 --- a/src/fframe.s.f +++ b/src/fframe.s.f @@ -17,6 +17,7 @@ subroutine fframe(framei,azi,eli,framef,azf,elf) c saved local variables real*10 azg,elg,elp,l2z save azg,elg,elp,l2z +!$omp threadprivate(azg,elg,elp,l2z) c local (automatic) variables integer iaz real*10 date,dd,dec2k,dr,ra2k diff --git a/src/gspher.s.f b/src/gspher.s.f index ce656c6..1ea4cde 100644 --- a/src/gspher.s.f +++ b/src/gspher.s.f @@ -202,6 +202,8 @@ subroutine gspher(area,bound,vert,w,lmax1,im,nw,rp,cm,np,npc,ibv, c number of non-intersecting circles bounding area nbd0m=0 nbd0p=0 +c number of multiple intersections + nmult=0 c error check on evaluation of vertex terms ikchk=0._10 c area=sqrt(4pi)*monopole diff --git a/src/gsphera.s.f b/src/gsphera.s.f index 9c89541..e895014 100644 --- a/src/gsphera.s.f +++ b/src/gsphera.s.f @@ -18,6 +18,7 @@ subroutine gsphera(area,bound,vert,w,lmax1,im,nw,ibv, c saved variables real*10 cl,cu,dth,sl,su save cl,cu,dth,sl,su +!$omp threadprivate(cl,cu,dth,sl,su) c local (automatic) variables integer i,l,m,lm,lmax,mmax real*10 azmx,cmph,d,dph,ph,smph,thmin,thmax diff --git a/src/gsubs.s.f b/src/gsubs.s.f index 0c98ad3..0beb66a 100644 --- a/src/gsubs.s.f +++ b/src/gsubs.s.f @@ -336,7 +336,10 @@ integer function gsegij(rp,cm,np,npb,npc,i,rpi,scmi,cmi,tol,ni, real*10 bik,cmik,cmk,d,psi,psim(2),dphc,si c local variables to be saved integer jl,ju + data jl /0/ + data ju /0/ save jl,ju +!$omp threadprivate(jl,ju) c * c * Determine whether next segment of i circle c * is an edge of the polygon. diff --git a/src/harmonize_polys.c b/src/harmonize_polys.c index b5c91a5..a774b50 100644 --- a/src/harmonize_polys.c +++ b/src/harmonize_polys.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "manglefn.h" #include "pi.h" @@ -24,20 +25,15 @@ */ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int lmax, harmonic w[/*NW*/]) { - int accelerate, i, ier, ip, ipoly, iq, ir, isrect, iw, naccelerate, ndone, ner, nrect; - long double azmin, azmax, elmin, elmax, azmn, azmx, elmn, elmx, tol; - /* work array contains harmonics of single polygon */ + int accelerate, i, ier, ger, ip, ipoly, iq, ir, isrect, iw, naccelerate, ndone, ner, nrect; + long double azmin, azmax, elmin, elmax, azmn, azmx, elmn, elmx, tol; + /* work array contains harmonics of single polygon */ harmonic *dw; /* work arrays to deal with possible acceleration */ int *iord, *ir_to_ip; long double *elord; /* work arrays */ - dw = (harmonic *) malloc(sizeof(harmonic) * NW); - if (!dw) { - fprintf(stderr, "harmonize_polys: failed to allocate memory for %d harmonics\n", NW); - return(-1); - } iord = (int *) malloc(sizeof(int) * npoly); if (!iord) { fprintf(stderr, "harmonize_polys: failed to allocate memory for %d ints\n", npoly); @@ -85,14 +81,29 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l ndone = 0; naccelerate = 0; ner = 0; - if (lmax >= LMAX_ADVICE) msg("doing polygon number (of %d):\n", npoly); - for (ip = 0; ip < npoly; ip++) { + ger = 0; + if (lmax >= LMAX_ADVICE) msg("doing polygon number (of %d):\n", npoly); +#pragma omp parallel private(dw,i,ip,iq,ir,iw,ier,ipoly,azmin,azmax,elmin,elmax,azmn,azmx,elmn,elmx,accelerate,tol) + { + /* work arrays */ + dw = (harmonic *) malloc(sizeof(harmonic) * NW); + if (!dw) { + fprintf(stderr, "harmonize_polys: failed to allocate memory for %d harmonics\n", NW); +#pragma omp critical(globalError) + ger = -1; + } +#pragma omp barrier + if ( ger == 0 ) { +#pragma omp for + for (ip = 0; ip < npoly; ip++) { + if (ger != -1) { if (lmax >= LMAX_ADVICE) msg(" %d", ip); accelerate = 0; /* rectangle */ if (ip < nrect) { ir = iord[ip]; ipoly = ir_to_ip[ir]; + if ( !omp_in_parallel() ) { poly_to_rect(poly[ipoly], &azmin, &azmax, &elmin, &elmax); /* does previous rectangle have same elevation limits? */ if (ip > 0) { @@ -110,46 +121,69 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l /* if so, worth accelerating */ if (elmn == elmin && elmx == elmax) accelerate = 1; } + } /* accelerated computation */ if (accelerate) { ier = gsphra(azmin, azmax, elmin, elmax, lmax, dw); - if (ier == -1) return(-1); + if (ier == -1) { +#pragma omp critical(globalError) + ger = -1; + continue; + } /* standard computation */ } else { tol = mtol; ier = gsphr(poly[ipoly], lmax, &tol, dw); - if (ier == -1) return(-1); + if (ier == -1) { +#pragma omp critical(globalError) + ger = -1; + continue; + } } /* non-rectangle */ } else { ipoly = ir_to_ip[ip]; /* zero weight polygon requires no computation */ if (poly[ipoly]->weight == 0.) { +#pragma omp atomic ndone++; continue; } else { tol = mtol; ier = gsphr(poly[ipoly], lmax, &tol, dw); - if (ier == -1) return(-1); + if (ier == -1) { +#pragma omp critical(globalError) + ger = -1; + continue; + } } } /* computation failed */ if (ier) { +#pragma omp atomic ner++; if (lmax >= LMAX_ADVICE) msg("\n"); fprintf(stderr, "harmonize_polys: computation failed for polygon %d; discard it\n", ipoly); /* success */ } else { +#pragma omp atomic naccelerate += accelerate; +#pragma omp atomic ndone++; /* increment harmonics of region */ for (iw = 0; iw < NW; iw++) { for (i = 0; i < IM; i++) { +#pragma omp atomic w[iw][i] += dw[iw][i] * poly[ipoly]->weight; } } } + } + } + } + free(dw); } + if (ger == -1) return(-1); if (lmax >= LMAX_ADVICE) msg("\n"); /* number of computations that were accelerated */ @@ -161,7 +195,6 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l msg("spherical harmonics of %d weighted polygons accumulated\n", ndone); /* free work arrays */ - free(dw); free(iord); free(ir_to_ip); free(elord); diff --git a/src/ikrand.c b/src/ikrand.c index 8f0f7b7..3018028 100644 --- a/src/ikrand.c +++ b/src/ikrand.c @@ -17,6 +17,9 @@ void ikrand_(int *ik, double *ikran) unsigned long *likran; unsigned long long *llikran; +#pragma omp critical(ikrand) + { + /* seed random number generator with *ik */ srandom((unsigned int) *ik); @@ -31,6 +34,9 @@ void ikrand_(int *ik, double *ikran) *llikran <<= (8 * sizeof(unsigned int)); *llikran |= (unsigned int)random(); } + + } + } /*------------------------------------------------------------------------------