diff --git a/src/specfem3D/compute_add_sources_acoustic.f90 b/src/specfem3D/compute_add_sources_acoustic.f90 index 85866872d..676f57bc0 100644 --- a/src/specfem3D/compute_add_sources_acoustic.f90 +++ b/src/specfem3D/compute_add_sources_acoustic.f90 @@ -31,7 +31,9 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) use constants use specfem_par, only: station_name,network_name, & - nsources_local,tshift_src,DT,t0,SU_FORMAT,USE_LDDRK,istage, & + nsources_local,tshift_src,DT,t0, & + SU_FORMAT,READ_ADJSRC_ASDF, & + USE_LDDRK,istage, & hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local, & USE_BINARY_FOR_SEISMOGRAMS, & ibool,NSOURCES,myrank,it,ispec_selected_source,islice_selected_source, & @@ -62,20 +64,9 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) character(len=MAX_STRING_LEN) :: adj_source_file - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 - else - time_t = dble(it-1)*DT - t0 - endif - -! forward simulations + ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore pressure sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -83,6 +74,18 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else + time_t = dble(it-1)*DT - t0 + endif + ! openmp solver !$OMP PARALLEL if (NSOURCES > 100) & !$OMP DEFAULT(SHARED) & @@ -179,25 +182,34 @@ subroutine compute_add_sources_acoustic(potential_dot_dot_acoustic) ! with other partitions while we calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ACOUSTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' - !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec) + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then ! receivers act as sources do irec_local = 1, nadj_rec_local @@ -251,7 +263,8 @@ end subroutine compute_add_sources_acoustic subroutine compute_add_sources_acoustic_backward(b_potential_dot_dot_acoustic) use constants - use specfem_par, only: nsources_local,tshift_src,DT,t0,USE_LDDRK,istage, & + use specfem_par, only: nsources_local,tshift_src,DT,t0, & + USE_LDDRK,istage, & ibool,NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & sourcearrays,kappastore,SIMULATION_TYPE,NSTEP,NGLOB_AB @@ -408,13 +421,14 @@ subroutine compute_add_sources_acoustic_GPU() use constants use specfem_par, only: station_name,network_name, & - nsources_local,tshift_src,DT,t0,SU_FORMAT,USE_LDDRK,istage, & + nsources_local,tshift_src,DT,t0, & + SU_FORMAT,READ_ADJSRC_ASDF, & + USE_LDDRK,istage, & source_adjoint,nadj_rec_local,number_adjsources_global, & USE_BINARY_FOR_SEISMOGRAMS, & NSOURCES,it,SIMULATION_TYPE,NSTEP,nrec, & NTSTEP_BETWEEN_READ_ADJSRC,Mesh_pointer, & - INVERSE_FWI_FULL_PROBLEM,run_number_of_the_source, & - GPU_MODE + INVERSE_FWI_FULL_PROBLEM,run_number_of_the_source ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -438,11 +452,9 @@ subroutine compute_add_sources_acoustic_GPU() character(len=MAX_STRING_LEN) :: adj_source_file - ! checks if anything to do - if (.not. GPU_MODE) return - ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore pressure sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -525,24 +537,34 @@ subroutine compute_add_sources_acoustic_GPU() ! with other partitions while we calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ACOUSTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec) + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then ! receivers act as sources ! on GPU @@ -560,10 +582,11 @@ end subroutine compute_add_sources_acoustic_GPU subroutine compute_add_sources_acoustic_backward_GPU() use constants - use specfem_par, only: nsources_local,tshift_src,DT,t0,USE_LDDRK,istage, & + use specfem_par, only: nsources_local,tshift_src,DT,t0, & + USE_LDDRK,istage, & NSOURCES,myrank,it, & SIMULATION_TYPE,NSTEP, & - GPU_MODE,Mesh_pointer,run_number_of_the_source + Mesh_pointer,run_number_of_the_source ! undo_att use specfem_par, only: UNDO_ATTENUATION_AND_OR_PML,NSUBSET_ITERATIONS,NT_DUMP_ATTENUATION, & iteration_on_subset,it_of_this_subset @@ -587,8 +610,6 @@ subroutine compute_add_sources_acoustic_backward_GPU() ! checks if anything to do if (SIMULATION_TYPE /= 3) return - if (.not. GPU_MODE) return - ! checks if this slice has sources to add if (nsources_local == 0) return diff --git a/src/specfem3D/compute_add_sources_poroelastic.f90 b/src/specfem3D/compute_add_sources_poroelastic.f90 index d230c321d..f8def09bc 100644 --- a/src/specfem3D/compute_add_sources_poroelastic.f90 +++ b/src/specfem3D/compute_add_sources_poroelastic.f90 @@ -36,6 +36,7 @@ subroutine compute_add_sources_poroelastic() UNDO_ATTENUATION_AND_OR_PML, & NSOURCES,myrank,it,islice_selected_source,ispec_selected_source, & sourcearrays,SIMULATION_TYPE,NSTEP, & + SU_FORMAT,READ_ADJSRC_ASDF, & ispec_selected_rec, & nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC, & hxir_adjstore,hetar_adjstore,hgammar_adjstore,source_adjoint,number_adjsources_global,nadj_rec_local @@ -67,6 +68,7 @@ subroutine compute_add_sources_poroelastic() ! forward simulations if (SIMULATION_TYPE == 1 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -217,16 +219,31 @@ subroutine compute_add_sources_poroelastic() ! this must be done carefully, otherwise the adjoint sources may be added ! twice if (ibool_read_adj_arrays) then - if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use ASCII instead' - ! ASCII format - !!! read ascii adjoint sources - do irec_local = 1, nadj_rec_local - irec = number_adjsources_global(irec_local) - ! compute source arrays - ! reads in **net**.**sta**.**BH**.adj files - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) - call compute_arrays_adjoint_source(adj_source_file,irec_local) - enddo + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format + if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' + !!! read ascii adjoint sources + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + endif endif ! if (ibool_read_adj_arrays) if (it < NSTEP) then @@ -277,6 +294,7 @@ subroutine compute_add_sources_poroelastic() ! adjoint/backward simulations if (SIMULATION_TYPE == 3 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return diff --git a/src/specfem3D/compute_add_sources_viscoelastic.F90 b/src/specfem3D/compute_add_sources_viscoelastic.F90 index ac1c15bf4..745fff9b2 100644 --- a/src/specfem3D/compute_add_sources_viscoelastic.F90 +++ b/src/specfem3D/compute_add_sources_viscoelastic.F90 @@ -75,23 +75,9 @@ subroutine compute_add_sources_viscoelastic(accel) character(len=MAX_STRING_LEN) :: adj_source_file - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 - else if (LTS_MODE) then - ! current local time - time_t = current_lts_time - else - time_t = dble(it-1)*DT - t0 - endif - ! forward simulations if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -99,6 +85,21 @@ subroutine compute_add_sources_viscoelastic(accel) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + time_t = dble(it-1-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else if (LTS_MODE) then + ! current local time + time_t = current_lts_time + else + time_t = dble(it-1)*DT - t0 + endif + ! openmp solver !$OMP PARALLEL if (NSOURCES > 100) & !$OMP DEFAULT(SHARED) & @@ -201,29 +202,30 @@ subroutine compute_add_sources_viscoelastic(accel) ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then ! reads adjoint source files - if (.not. (SU_FORMAT .or. READ_ADJSRC_ASDF)) then - ! ASCII formant - if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' - !!! read ascii adjoint sources + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else if (READ_ADJSRC_ASDF) then - ! ASDF format + else + ! default ASCII format + if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' + !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - call compute_arrays_adjoint_source(adj_source_file, irec_local) - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) + endif endif ! if (ibool_read_adj_arrays) ! adds source term @@ -284,6 +286,7 @@ subroutine compute_add_sources_viscoelastic(accel) ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density ! therefore, we must add it here, before applying the inverse of mass matrix endif + ! note: NOISE_TOMOGRAPHY == 3 step is done in backward routine endif end subroutine compute_add_sources_viscoelastic @@ -340,28 +343,6 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return - ! iteration step - if (UNDO_ATTENUATION_AND_OR_PML) then - ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, - ! 1. subset, it_temp goes from 301 down to 1 - ! 2. subset, it_temp goes from 602 down to 302 - ! 3. subset, it_temp goes from 903 down to 603 - ! 4. subset, it_temp goes from 1204 down to 904 - !valid for multiples only: - !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 - ! - ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 - ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 - ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 - ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 - !works always: - it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 - else - it_tmp = it - endif - ! NOTE: adjoint sources and backward wavefield timing: ! idea is to start with the backward field b_displ,.. at time (T) ! and convolve with the adjoint field at time (T-t) @@ -386,25 +367,51 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) ! adjoint source traces which start at -t0 and end at time (NSTEP-1)*DT - t0 ! for step it=1: (NSTEP -it + 1)*DT - t0 for backward wavefields corresponds to time T - ! sets current initial time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + ! adjoint simulations + if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + + ! iteration step if (UNDO_ATTENUATION_AND_OR_PML) then - ! stepping moves forward from snapshot position - time_t = dble(NSTEP-it_tmp-1)*DT + dble(C_LDDRK(istage))*DT - t0 + ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, + ! 1. subset, it_temp goes from 301 down to 1 + ! 2. subset, it_temp goes from 602 down to 302 + ! 3. subset, it_temp goes from 903 down to 603 + ! 4. subset, it_temp goes from 1204 down to 904 + !valid for multiples only: + !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 + ! + ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 + ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 + ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 + ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 + !works always: + it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 else - time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 + it_tmp = it endif - else - time_t = dble(NSTEP-it_tmp)*DT - t0 - endif -! adjoint simulations - if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! sets current initial time + if (USE_LDDRK) then + ! LDDRK + ! note: the LDDRK scheme updates displacement after the stiffness computations and + ! after adding boundary/coupling/source terms. + ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme + ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. + if (UNDO_ATTENUATION_AND_OR_PML) then + ! stepping moves forward from snapshot position + time_t = dble(NSTEP-it_tmp-1)*DT + dble(C_LDDRK(istage))*DT - t0 + else + time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 + endif + else + ! Newmark + ! note: b_displ() is read in after Newmark time scheme, thus + ! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. + ! thus indexing is NSTEP - it , instead of NSTEP - it - 1 + time_t = dble(NSTEP-it_tmp)*DT - t0 + endif ! backward source reconstruction do isource = 1,NSOURCES @@ -429,7 +436,9 @@ subroutine compute_add_sources_viscoelastic_backward(b_accel) do j = 1,NGLLY do i = 1,NGLLX iglob = ibool(i,j,k,ispec) - b_accel(:,iglob) = b_accel(:,iglob) + sourcearrays(isource,:,i,j,k) * stf_used + b_accel(1,iglob) = b_accel(1,iglob) + sourcearrays(isource,1,i,j,k) * stf_used + b_accel(2,iglob) = b_accel(2,iglob) + sourcearrays(isource,2,i,j,k) * stf_used + b_accel(3,iglob) = b_accel(3,iglob) + sourcearrays(isource,3,i,j,k) * stf_used enddo enddo enddo @@ -472,8 +481,8 @@ subroutine compute_add_sources_viscoelastic_GPU() use shared_parameters, only: DT, & SIMULATION_TYPE,NOISE_TOMOGRAPHY,INVERSE_FWI_FULL_PROBLEM, & - USE_LDDRK,LTS_MODE,GPU_MODE,UNDO_ATTENUATION_AND_OR_PML, & - SU_FORMAT,USE_BINARY_FOR_SEISMOGRAMS, & + USE_LDDRK,LTS_MODE, & + SU_FORMAT,READ_ADJSRC_ASDF,USE_BINARY_FOR_SEISMOGRAMS, & NSTEP,NTSTEP_BETWEEN_READ_ADJSRC use specfem_par, only: station_name,network_name, & @@ -510,11 +519,20 @@ subroutine compute_add_sources_viscoelastic_GPU() character(len=MAX_STRING_LEN) :: adj_source_file - ! checks if anything to do - if (.not. GPU_MODE) return + ! note: this routine will only take care of adding contributions to accel() wavefield array. + ! it mimicks exactly what the routine compute_add_sources_viscoelastic() is doing. + ! + ! thus, it deals with the CMT/force/noise source contributions for forward simulations, + ! and the adjoint source contributions for pure adjoint or kernel simulations. + ! + ! we will not consider the backward b_accel() wavefield contributions. + ! that is, the re-injection of the CMT/force/noise source contribution into + ! the backward wavefield b_accel() is not done here. + ! those will be done in the routine compute_add_sources_viscoelastic_backward_GPU(). ! forward simulations if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -600,6 +618,7 @@ subroutine compute_add_sources_viscoelastic_GPU() ! adjoint simulations if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then + ! adds adjoint source in this partitions if (nadj_rec_local > 0) then @@ -619,24 +638,34 @@ subroutine compute_add_sources_viscoelastic_GPU() ! with other partitions while calculate for the inner part ! this must be done carefully, otherwise the adjoint sources may be added twice if (ibool_read_adj_arrays .and. .not. INVERSE_FWI_FULL_PROBLEM) then - - if (.not. SU_FORMAT) then - ! ASCII format + ! reads adjoint source files + if (SU_FORMAT) then + ! SU format + call compute_arrays_adjoint_source_SU(IDOMAIN_ELASTIC) + else if (READ_ADJSRC_ASDF) then + ! ASDF format + do irec_local = 1, nadj_rec_local + ! reads in **net**.**sta**.**BH**.adj files + irec = number_adjsources_global(irec_local) + adj_source_file = trim(network_name(irec))//'_'//trim(station_name(irec)) ! format: "net_sta" + ! compute source arrays + call compute_arrays_adjoint_source(adj_source_file,irec_local) + enddo + else + ! default ASCII format if (USE_BINARY_FOR_SEISMOGRAMS) stop 'Adjoint simulations not supported with .bin format, please use SU format instead' !!! read ascii adjoint sources do irec_local = 1, nadj_rec_local ! reads in **net**.**sta**.**BH**.adj files irec = number_adjsources_global(irec_local) - adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) + adj_source_file = trim(network_name(irec))//'.'//trim(station_name(irec)) ! format: "net.sta" + ! compute source arrays call compute_arrays_adjoint_source(adj_source_file,irec_local) enddo - else - ! SU format - call compute_arrays_adjoint_source_SU() - endif !if (.not. SU_FORMAT) - + endif endif ! if (ibool_read_adj_arrays) + ! adds source term if (it < NSTEP) then call add_sources_el_sim_type_2_or_3(Mesh_pointer, & source_adjoint, & @@ -648,50 +677,6 @@ subroutine compute_add_sources_viscoelastic_GPU() endif ! nadj_rec_local endif !adjoint -! note: b_displ() is read in after Newmark time scheme, thus -! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. -! thus indexing is NSTEP - it , instead of NSTEP - it - 1 - - ! adjoint/backward wavefield - if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then - ! ignore CMT sources for fault rupture simulations - if (FAULT_SIMULATION) return - - ! no source inside the mesh if we are coupling with DSM - ! nothing left to do, can exit routine... - if (COUPLE_WITH_INJECTION_TECHNIQUE) return - - if (NSOURCES > 0) then - do isource = 1,NSOURCES - ! current time - if (USE_LDDRK) then - ! LDDRK - ! note: the LDDRK scheme updates displacement after the stiffness computations and - ! after adding boundary/coupling/source terms. - ! thus, at each time loop step it, displ(:) is still at (n) and not (n+1) like for the Newmark scheme - ! when entering this routine. we therefore at an additional -DT to have the corresponding timing for the source. - if (UNDO_ATTENUATION_AND_OR_PML) then - ! stepping moves forward from snapshot position - time_source_dble = dble(NSTEP-it-1)*DT + dble(C_LDDRK(istage))*DT - t0 - tshift_src(isource) - else - time_source_dble = dble(NSTEP-it-1)*DT - dble(C_LDDRK(istage))*DT - t0 - tshift_src(isource) - endif - else - time_source_dble = dble(NSTEP-it)*DT - t0 - tshift_src(isource) - endif - - ! determines source time function value - stf = get_stf_viscoelastic(time_source_dble,isource,NSTEP-it+1) - - ! stores precomputed source time function factor - stf_pre_compute(isource) = stf - enddo - - ! only implements SIMTYPE=3 - call compute_add_sources_el_s3_cuda(Mesh_pointer,stf_pre_compute,NSOURCES) - endif - endif ! adjoint - ! for noise simulations if (NOISE_TOMOGRAPHY > 0) then ! we have two loops indicated by iphase ("inner elements/points" or "boundary elements/points") @@ -713,14 +698,8 @@ subroutine compute_add_sources_viscoelastic_GPU() ! note the ensemble forward sources are generally distributed on the surface of the earth ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density ! therefore, we must add it here, before applying the inverse of mass matrix - else if (NOISE_TOMOGRAPHY == 3) then - ! third step of noise tomography, i.e., read the surface movie saved at every timestep - ! use the movie to reconstruct the ensemble forward wavefield - ! the ensemble adjoint wavefield is done as usual - ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal - call noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & - Mesh_pointer,NOISE_TOMOGRAPHY) endif + ! note: NOISE_TOMOGRAPHY == 3 step is done in backward routine endif end subroutine compute_add_sources_viscoelastic_GPU @@ -731,10 +710,11 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() use constants use specfem_par, only: nsources_local,tshift_src,dt,t0, & - USE_LDDRK,istage, & - NSOURCES,it,SIMULATION_TYPE,NSTEP, & - NOISE_TOMOGRAPHY, & - Mesh_pointer,GPU_MODE + num_free_surface_faces, & + USE_LDDRK,istage, & + NSOURCES,it,SIMULATION_TYPE,NSTEP, & + NOISE_TOMOGRAPHY, & + Mesh_pointer ! coupling use shared_parameters, only: COUPLE_WITH_INJECTION_TECHNIQUE @@ -743,6 +723,8 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() use specfem_par, only: UNDO_ATTENUATION_AND_OR_PML,NSUBSET_ITERATIONS,NT_DUMP_ATTENUATION, & iteration_on_subset,it_of_this_subset + use specfem_par_noise, only: noise_surface_movie + ! faults use specfem_par, only: FAULT_SIMULATION @@ -758,7 +740,6 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! checks if anything to do if (SIMULATION_TYPE /= 3) return - if (.not. GPU_MODE) return ! ignore CMT sources for fault rupture simulations if (FAULT_SIMULATION) return @@ -767,30 +748,31 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! because the source is precisely the wavefield coming from the DSM traction file if (COUPLE_WITH_INJECTION_TECHNIQUE) return - ! iteration step - if (UNDO_ATTENUATION_AND_OR_PML) then - ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, - ! 1. subset, it_temp goes from 301 down to 1 - ! 2. subset, it_temp goes from 602 down to 302 - ! 3. subset, it_temp goes from 903 down to 603 - ! 4. subset, it_temp goes from 1204 down to 904 - !valid for multiples only: - !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 - ! - ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION - ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 - ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 - ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 - ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 - !works always: - it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 - else - it_tmp = it - endif - ! forward simulations if (NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) then + + ! iteration step + if (UNDO_ATTENUATION_AND_OR_PML) then + ! example: NSTEP is a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 1204, NSUBSET_ITERATIONS = 4, iteration_on_subset = 1 -> 4, + ! 1. subset, it_temp goes from 301 down to 1 + ! 2. subset, it_temp goes from 602 down to 302 + ! 3. subset, it_temp goes from 903 down to 603 + ! 4. subset, it_temp goes from 1204 down to 904 + !valid for multiples only: + !it_tmp = iteration_on_subset * NT_DUMP_ATTENUATION - it_of_this_subset + 1 + ! + ! example: NSTEP is **NOT** a multiple of NT_DUMP_ATTENUATION + ! NT_DUMP_ATTENUATION = 301, NSTEP = 900, NSUBSET_ITERATIONS = 3, iteration_on_subset = 1 -> 3 + ! 1. subset, it_temp goes from (900 - 602) = 298 down to 1 + ! 2. subset, it_temp goes from (900 - 301) = 599 down to 299 + ! 3. subset, it_temp goes from (900 - 0) = 900 down to 600 + !works always: + it_tmp = NSTEP - (NSUBSET_ITERATIONS - iteration_on_subset)*NT_DUMP_ATTENUATION - it_of_this_subset + 1 + else + it_tmp = it + endif + ! sets current initial time if (USE_LDDRK) then ! LDDRK @@ -805,6 +787,10 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() time_t = dble(NSTEP-it_tmp-1)*DT - dble(C_LDDRK(istage))*DT - t0 endif else + ! Newmark + ! note: b_displ() is read in after Newmark time scheme, thus + ! b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT. + ! thus indexing is NSTEP - it , instead of NSTEP - it - 1 time_t = dble(NSTEP-it_tmp)*DT - t0 endif @@ -818,13 +804,24 @@ subroutine compute_add_sources_viscoelastic_backward_GPU() ! stores precomputed source time function factor stf_pre_compute(isource) = stf enddo + ! only implements SIMTYPE=3 call compute_add_sources_el_s3_cuda(Mesh_pointer,stf_pre_compute,NSOURCES) endif ! for noise simulations if (NOISE_TOMOGRAPHY > 0) then - stop 'for NOISE simulations, backward GPU routine is not implemented yet' + ! we have two loops indicated by iphase ("inner elements/points" or "boundary elements/points") + ! here, we add all noise sources once, when we are calculating for boundary points (iphase==1), + ! because boundary points are calculated first! + if (NOISE_TOMOGRAPHY == 3) then + ! third step of noise tomography, i.e., read the surface movie saved at every timestep + ! use the movie to reconstruct the ensemble forward wavefield + ! the ensemble adjoint wavefield is done as usual + ! note instead of "NSTEP-it+1", now we use "it", since reconstruction is a reversal of reversal + call noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & + Mesh_pointer,NOISE_TOMOGRAPHY) + endif endif end subroutine compute_add_sources_viscoelastic_backward_GPU @@ -895,13 +892,13 @@ double precision function get_stf_viscoelastic(time_source_dble,isource,it_tmp_e ! Brune source time function ! hdur is the source duration or the rise time ! Frequency parameter: - f0=1.d0/hdur(isource) + f0 = 1.d0/hdur(isource) stf = comp_source_time_function_brune(time_source_dble,f0) case (6) ! Smoothed Brune source time function ! hdur is the source duration or the rise time ! Frequency parameter: - f0=1.d0/hdur(isource) + f0 = 1.d0/hdur(isource) stf = comp_source_time_function_smooth_brune(time_source_dble,f0) case default stop 'unsupported force_stf value!' diff --git a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 index 9cf371905..a51cc2f9f 100644 --- a/src/specfem3D/compute_forces_acoustic_calling_routine.f90 +++ b/src/specfem3D/compute_forces_acoustic_calling_routine.f90 @@ -606,14 +606,25 @@ subroutine compute_forces_acoustic_GPU_calling() ! local parameters integer:: iphase - ! safety check + ! runs with the additionally optimized GPU routine + ! (combines forward/backward fields in main compute_kernel_acoustic) if (.not. GPU_MODE) return - ! check + + ! safety check + if (SIMULATION_TYPE /= 3) & + call exit_MPI(myrank,'routine compute_forces_acoustic_GPU_calling() works only for SIMULATION_TYPE == 3') + + ! checks if for kernel simulation with both, forward & backward fields + if (UNDO_ATTENUATION_AND_OR_PML) return ! pure elastic / acoustic simulation w/out attenuation + if (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION) return ! single domain only - coupling requires switching ordering + + ! safety check if (PML_CONDITIONS) call exit_MPI(myrank,'PML conditions for acoustic domains not yet implemented on GPUs') ! enforces free surface (zeroes potentials at free surface) - call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) - if (SIMULATION_TYPE == 3) call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) + ! assumes SIMULATION_TYPE == 3 + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) ! 1 == forward + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) ! 3 == backward ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions do iphase = 1,2 @@ -634,8 +645,9 @@ subroutine compute_forces_acoustic_GPU_calling() ! elastic coupling if (ELASTIC_SIMULATION) then - call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) + ! assumes SIMULATION_TYPE == 3 + call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward + call compute_coupling_ac_el_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward endif ! poroelastic coupling @@ -660,7 +672,7 @@ subroutine compute_forces_acoustic_GPU_calling() ! adds sources ! note: we will add all source contributions in the first pass, when iphase == 1 ! to avoid calling the same routine twice and to check if the source element is an inner/outer element - ! + ! assumes SIMULATION_TYPE == 3 call compute_add_sources_acoustic_GPU() ! forward/adjoint sources call compute_add_sources_acoustic_backward_GPU() ! backward sources endif ! iphase @@ -680,43 +692,37 @@ subroutine compute_forces_acoustic_GPU_calling() request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh) ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call transfer_boun_pot_from_device(Mesh_pointer, & - b_buffer_send_scalar_ext_mesh, & - 3) ! -- 3 == adjoint b_accel - - call assemble_MPI_scalar_send_cuda(NPROC, & - b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, & - num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh, & - my_neighbors_ext_mesh, & - b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh) + ! assumes SIMULATION_TYPE == 3 + call transfer_boun_pot_from_device(Mesh_pointer, & + b_buffer_send_scalar_ext_mesh, & + 3) ! -- 3 == adjoint b_accel - endif + call assemble_MPI_scalar_send_cuda(NPROC, & + b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh, & + my_neighbors_ext_mesh, & + b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh) else - ! waits for send/receive requests to be completed and assembles values call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, & Mesh_pointer, & buffer_recv_scalar_ext_mesh, & num_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh, & - ! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, & - 1) + 1) ! 1 == forward ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, & - Mesh_pointer, & - b_buffer_recv_scalar_ext_mesh, & - num_interfaces_ext_mesh, & - max_nibool_interfaces_ext_mesh, & - ! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & - b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh, & - 3) - endif + ! assumes SIMULATION_TYPE == 3 + call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, & + Mesh_pointer, & + b_buffer_recv_scalar_ext_mesh, & + num_interfaces_ext_mesh, & + max_nibool_interfaces_ext_mesh, & + b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh, & + 3) ! 3 == backward endif enddo @@ -741,9 +747,10 @@ subroutine compute_forces_acoustic_GPU_calling() call kernel_3_acoustic_cuda(Mesh_pointer,deltatover2,b_deltatover2,0) ! 0 == both -! enforces free surface (zeroes potentials at free surface) - call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) - if (SIMULATION_TYPE == 3) call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) + ! enforces free surface (zeroes potentials at free surface) + ! assumes SIMULATION_TYPE == 3 + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,1) ! 1 == forward + call acoustic_enforce_free_surf_cuda(Mesh_pointer,STACEY_INSTEAD_OF_FREE_SURFACE,3) ! 3 == backward end subroutine compute_forces_acoustic_GPU_calling diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 index 873cb01fb..54bac90a7 100644 --- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 +++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 @@ -66,7 +66,7 @@ subroutine compute_forces_viscoelastic_calling() .and. .not. UNDO_ATTENUATION_AND_OR_PML & .and. .not. (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION)) then ! runs with the additionally optimized GPU routine - ! (combines forward/backward fields in main compute_kernel_acoustic) + ! (combines forward/backward fields in main compute kernels) call compute_forces_viscoelastic_GPU_calling() ! all done return @@ -484,7 +484,7 @@ subroutine compute_forces_viscoelastic_backward_calling() .and. .not. (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION)) then ! runs with the additionally optimized GPU routine ! (combines forward/backward fields in main compute_kernel_acoustic) - ! all done in compute_forces_acoustic_GPU_calling() + ! all done in compute_forces_viscoelastic_GPU_calling() return endif endif @@ -726,10 +726,18 @@ subroutine compute_forces_viscoelastic_GPU_calling() integer:: iphase + ! runs with the additionally optimized GPU routine + ! (combines forward/backward fields in main compute kernels) + if (.not. GPU_MODE) return + ! safety check if (SIMULATION_TYPE /= 3) & call exit_MPI(myrank,'routine compute_forces_viscoelastic_GPU_calling() works only for SIMULATION_TYPE == 3') + ! checks if for kernel simulation with both, forward & backward fields + if (UNDO_ATTENUATION_AND_OR_PML) return ! pure elastic / acoustic simulation w/out attenuation + if (ELASTIC_SIMULATION .and. ACOUSTIC_SIMULATION) return ! single domain only - coupling requires switching ordering + ! distinguishes two runs: for elements in contact with MPI interfaces, and elements within the partitions do iphase = 1,2 @@ -738,7 +746,7 @@ subroutine compute_forces_viscoelastic_GPU_calling() call compute_forces_viscoelastic_cuda(Mesh_pointer, iphase, deltat, & nspec_outer_elastic, & nspec_inner_elastic, & - COMPUTE_AND_STORE_STRAIN,ATTENUATION,0) ! 0 == both + COMPUTE_AND_STORE_STRAIN,ATTENUATION,0) ! 0 == both combined ! while inner elements compute "Kernel_2", we wait for MPI to ! finish and transfer the boundary terms to the device asynchronously @@ -768,15 +776,15 @@ subroutine compute_forces_viscoelastic_GPU_calling() call compute_stacey_viscoelastic_GPU(iphase,num_abs_boundary_faces, & NSTEP,it, & b_num_abs_boundary_faces,b_reclen_field,b_absorb_field, & - Mesh_pointer,0) + Mesh_pointer,0) ! 0 == both combined endif ! acoustic coupling if (ACOUSTIC_SIMULATION) then if (num_coupling_ac_el_faces > 0) then + ! assumes SIMULATION_TYPE == 3 call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,1) ! 1 == forward - if (SIMULATION_TYPE == 3) & - call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward + call compute_coupling_el_ac_cuda(Mesh_pointer,iphase,num_coupling_ac_el_faces,3) ! 3 == backward endif endif @@ -799,8 +807,9 @@ subroutine compute_forces_viscoelastic_GPU_calling() ! adds source term (single-force/moment-tensor solution) ! note: we will add all source contributions in the first pass, when iphase == 1 ! to avoid calling the same routine twice and to check if the source element is an inner/outer element - call compute_add_sources_viscoelastic_GPU() - + ! assumes SIMULATION_TYPE == 3 + call compute_add_sources_viscoelastic_GPU() ! forward/adjoint sources (into accel) + call compute_add_sources_viscoelastic_backward_GPU() ! backward sources (into b_accel) endif ! iphase ! assemble all the contributions between slices using MPI @@ -814,36 +823,36 @@ subroutine compute_forces_viscoelastic_GPU_calling() call transfer_boundary_from_device_a(Mesh_pointer,nspec_outer_elastic) ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call transfer_boun_accel_from_device(Mesh_pointer, & - b_buffer_send_vector_ext_mesh, & - 3) ! 3 == adjoint b_accel + ! assumes SIMULATION_TYPE == 3 + call transfer_boun_accel_from_device(Mesh_pointer, & + b_buffer_send_vector_ext_mesh, & + 3) ! 3 == adjoint b_accel - call assemble_MPI_vector_send_cuda(NPROC, & - b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, & - num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh, & - my_neighbors_ext_mesh, & - b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh) - endif !adjoint + call assemble_MPI_vector_send_cuda(NPROC, & + b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, & + num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh, & + my_neighbors_ext_mesh, & + b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh) else ! waits for send/receive requests to be completed and assembles values - call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer, & + call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, & + Mesh_pointer, & buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & max_nibool_interfaces_ext_mesh, & nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & request_send_vector_ext_mesh,request_recv_vector_ext_mesh, & - 1) + 1) ! 1 == forward ! adjoint simulations - if (SIMULATION_TYPE == 3) then - call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer, & - b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & - max_nibool_interfaces_ext_mesh, & - nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & - b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, & - 3) - endif !adjoint + ! assumes SIMULATION_TYPE == 3 + call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, & + Mesh_pointer, & + b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh, & + max_nibool_interfaces_ext_mesh, & + nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, & + b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, & + 3) ! 3 == backward endif enddo @@ -858,13 +867,15 @@ subroutine compute_forces_viscoelastic_GPU_calling() endif ! multiplies with inverse of mass matrix (note: rmass has been inverted already) + ! assumes SIMULATION_TYPE == 3 call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,3) ! 3 == backward + call kernel_3_a_cuda(Mesh_pointer,deltatover2,b_deltatover2,APPROXIMATE_OCEAN_LOAD,3) ! 3 == backward ! updates acceleration with ocean load term if (APPROXIMATE_OCEAN_LOAD) then + ! assumes SIMULATION_TYPE == 3 call compute_coupling_ocean_cuda(Mesh_pointer,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call compute_coupling_ocean_cuda(Mesh_pointer,3) ! 3 == backward + call compute_coupling_ocean_cuda(Mesh_pointer,3) ! 3 == backward ! updates velocities ! Newmark finite-difference time scheme with elastic domains: @@ -883,8 +894,9 @@ subroutine compute_forces_viscoelastic_GPU_calling() ! corrector: ! updates the velocity term which requires a(t+delta) ! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass + ! assumes SIMULATION_TYPE == 3 call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,1) ! 1 == forward - if (SIMULATION_TYPE == 3) call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,3) ! 3 == backward + call kernel_3_b_cuda(Mesh_pointer,deltatover2,b_deltatover2,3) ! 3 == backward endif end subroutine compute_forces_viscoelastic_GPU_calling diff --git a/src/specfem3D/noise_tomography.f90 b/src/specfem3D/noise_tomography.f90 index 97a595560..d6fc7d2ac 100644 --- a/src/specfem3D/noise_tomography.f90 +++ b/src/specfem3D/noise_tomography.f90 @@ -894,7 +894,7 @@ end subroutine noise_read_add_surface_movie ! step 2/3: calculate/reconstruct the "ensemble forward wavefield" ! read surface movie (displacement) at every time steps, injected as the source of "ensemble forward wavefield" ! in step 2, call noise_read_add_surface_movie_GPU(..., NSTEP-it+1 ,...) -! in step 3, call noise_read_add_surface_movie(..., it ,...) +! in step 3, call noise_read_add_surface_movie_GPU(..., it ,...) subroutine noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surface_faces, & Mesh_pointer,NOISE_TOMOGRAPHY) @@ -913,12 +913,11 @@ subroutine noise_read_add_surface_movie_GPU(noise_surface_movie,it,num_free_surf ! reads in ensemble noise sources at surface if (num_free_surface_faces > 0) then - ! read surface movie call read_abs(2,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLSQUARE*num_free_surface_faces,it) + ! adds noise movie field on GPU call noise_read_add_surface_movie_cu(Mesh_pointer,noise_surface_movie,NOISE_TOMOGRAPHY) - endif end subroutine noise_read_add_surface_movie_GPU