Skip to content

Commit

Permalink
Improved efficiency of writing (serial) restart files:
Browse files Browse the repository at this point in the history
 1. allow only master_task to define a serial netcdf restart file 
 2. remove unnecessary restart_format conditionals
 3. fuse master_task conditionals
 4. set MPI barriers for global gathers and scatters, wrapped with new cpp flag

The gather/scatter barriers do not affect timings in my tests, but Neil Barton reports significant performance improvements in his configuration.   Thanks to Alan Wallcraft for this fix.

M       mpi/ice_gather_scatter.F90
 - add barriers to gathers and scatters

M       io_netcdf/ice_restart.F90
M       io_binary/ice_restart.F90
 - remove unnecessary restart_format conditionals
 - fuse master_task conditionals

M       source/ice_restart_driver.F90
 - turn off min/max diagnostics for writes

M       bld/Macros.Linux.LANL.conejo
M       doc/cicedoc.pdf
M       comp_ice
 - add BARRIERS, gather_scatter_barrier cpp
  • Loading branch information
eclare108213 committed Nov 22, 2013
1 parent 41f9197 commit 740b14d
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 147 deletions.
5 changes: 4 additions & 1 deletion bld/Macros.Linux.LANL.conejo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endif
FIXEDFLAGS := -132
FREEFLAGS :=
FFLAGS := -r8 -i4 -O2 -align all -w -ftz -convert big_endian -assume byterecl -fp-model precise
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -g
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fp-model precise -g
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fpe0 -CB -traceback
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fpe0 -CB -g

Expand All @@ -41,6 +41,9 @@ LDFLAGS := $(FFLAGS) -v
ifeq ($(DITTO), yes)
CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE
endif
ifeq ($(BARRIERS), yes)
CPPDEFS := $(CPPDEFS) -Dgather_scatter_barrier
endif

ifeq ($(IO_TYPE), netcdf)
CPPDEFS := $(CPPDEFS) -Dncdf
Expand Down
1 change: 1 addition & 0 deletions comp_ice
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ setenv SHRDIR csm_share # location of CCSM shared code
setenv IO_TYPE netcdf # set to none if netcdf library is unavailable
# set to pio for parallel netcdf
setenv DITTO no # reproducible diagnostics
setenv BARRIERS no # prevent MPI buffer overflow during gather/scatter
setenv THRD no # set to yes for OpenMP threading

if ( $THRD == 'yes') setenv OMP_NUM_THREADS 2 # positive integer
Expand Down
Binary file modified doc/cicedoc.pdf
Binary file not shown.
145 changes: 24 additions & 121 deletions io_binary/ice_restart.F90
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.eap', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_eap,filename,0)

if (my_task == master_task) then
read (nu_restart_eap) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -124,12 +119,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.iage', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_age,filename,0)

if (my_task == master_task) then
read (nu_restart_age) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -145,12 +135,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.FY', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_FY,filename,0)

if (my_task == master_task) then
read (nu_restart_FY) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -166,12 +151,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.lvl', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_lvl,filename,0)

if (my_task == master_task) then
read (nu_restart_lvl) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -187,12 +167,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.pond_cesm', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_pond,filename,0)

if (my_task == master_task) then
read (nu_restart_pond) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -208,12 +183,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.pond_lvl', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_pond,filename,0)

if (my_task == master_task) then
read (nu_restart_pond) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -229,12 +199,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.pond_topo', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_pond,filename,0)

if (my_task == master_task) then
read (nu_restart_pond) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -250,12 +215,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.brine', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_hbrine,filename,0)

if (my_task == master_task) then
read (nu_restart_hbrine) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -271,12 +231,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.bgc', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_bgc,filename,0)

if (my_task == master_task) then
read (nu_restart_bgc) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand All @@ -292,12 +247,7 @@ subroutine init_restart_read(ice_ic)
string1(1:lenstr(string1)), &
restart_file(1:lenstr(restart_file)),'.aero', &
string2(1:lenstr(string2))
endif

if (restart_format == 'bin') &
call ice_open(nu_restart_aero,filename,0)

if (my_task == master_task) then
read (nu_restart_aero) iignore,rignore,rignore
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
endif
Expand Down Expand Up @@ -351,33 +301,21 @@ subroutine init_restart_write(filename_spec)
open(nu_rst_pointer,file=pointer_file)
write(nu_rst_pointer,'(a)') filename
close(nu_rst_pointer)
endif

if (restart_format == 'bin') then

call ice_open(nu_dump,filename,0)

if (my_task == master_task) then
write(nu_dump) istep1,time,time_forc
endif

endif

if (my_task == master_task) then
write(nu_dump) istep1,time,time_forc
write(nu_diag,*) 'Writing ',filename(1:lenstr(filename))
endif

! begin writing restart data

if (kdyn == 2) then

write(filename,'(a,a,a,i4.4,a,i2.2,a,i2.2,a,i5.5)') &
restart_dir(1:lenstr(restart_dir)), &
restart_file(1:lenstr(restart_file)),'.eap.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_eap,filename,0)
call ice_open(nu_dump_eap,filename,0)

if (my_task == master_task) then
write(nu_dump_eap) istep1,time,time_forc
Expand All @@ -393,10 +331,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.FY.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_FY,filename,0)
call ice_open(nu_dump_FY,filename,0)

if (my_task == master_task) then
write(nu_dump_FY) istep1,time,time_forc
Expand All @@ -412,10 +347,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.iage.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_age,filename,0)
call ice_open(nu_dump_age,filename,0)

if (my_task == master_task) then
write(nu_dump_age) istep1,time,time_forc
Expand All @@ -431,10 +363,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.lvl.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_lvl,filename,0)
call ice_open(nu_dump_lvl,filename,0)

if (my_task == master_task) then
write(nu_dump_lvl) istep1,time,time_forc
Expand All @@ -450,10 +379,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.pond_cesm.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_pond,filename,0)
call ice_open(nu_dump_pond,filename,0)

if (my_task == master_task) then
write(nu_dump_pond) istep1,time,time_forc
Expand All @@ -469,10 +395,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.pond_lvl.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_pond,filename,0)
call ice_open(nu_dump_pond,filename,0)

if (my_task == master_task) then
write(nu_dump_pond) istep1,time,time_forc
Expand All @@ -488,10 +411,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.pond_topo.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_pond,filename,0)
call ice_open(nu_dump_pond,filename,0)

if (my_task == master_task) then
write(nu_dump_pond) istep1,time,time_forc
Expand All @@ -507,10 +427,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.brine.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_hbrine,filename,0)
call ice_open(nu_dump_hbrine,filename,0)

if (my_task == master_task) then
write(nu_dump_hbrine) istep1,time,time_forc
Expand All @@ -526,10 +443,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.bgc.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_bgc,filename,0)
call ice_open(nu_dump_bgc,filename,0)

if (my_task == master_task) then
write(nu_dump_bgc) istep1,time,time_forc
Expand All @@ -545,10 +459,7 @@ subroutine init_restart_write(filename_spec)
restart_file(1:lenstr(restart_file)),'.aero.', &
iyear,'-',month,'-',mday,'-',sec

! begin writing restart data

if (restart_format == 'bin') &
call ice_open(nu_dump_aero,filename,0)
call ice_open(nu_dump_aero,filename,0)

if (my_task == master_task) then
write(nu_dump_aero) istep1,time,time_forc
Expand Down Expand Up @@ -603,7 +514,6 @@ subroutine read_restart_field(nu,nrec,work,atype,vname,ndim3, &
real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks) :: &
work2 ! input array (real, 8-byte)

if (restart_format == 'bin') then
write(nu_diag,*) 'vname ',trim(vname)
if (present(field_loc)) then
do n=1,ndim3
Expand All @@ -624,9 +534,6 @@ subroutine read_restart_field(nu,nrec,work,atype,vname,ndim3, &
work(:,:,n,:) = work2(:,:,:)
enddo
endif
else
call abort_ice('Invalid restart_format: '//restart_format)
endif

end subroutine read_restart_field

Expand Down Expand Up @@ -669,7 +576,6 @@ subroutine write_restart_field(nu,nrec,work,atype,vname,ndim3,diag)
real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks) :: &
work2 ! input array (real, 8-byte)

if (restart_format == 'bin') then
do n=1,ndim3
work2(:,:,:) = work(:,:,n,:)
if (restart_ext) then
Expand All @@ -678,9 +584,6 @@ subroutine write_restart_field(nu,nrec,work,atype,vname,ndim3,diag)
call ice_write(nu,nrec,work2,atype,diag)
endif
enddo
else
call abort_ice('Invalid restart_format: '//restart_format)
endif

end subroutine write_restart_field

Expand All @@ -696,19 +599,19 @@ subroutine final_restart()

integer (kind=int_kind) :: status

if (restart_format == 'bin') then
if (my_task == master_task) close(nu_dump)
if (my_task == master_task .and. tr_aero) close(nu_dump_aero)
if (my_task == master_task .and. tr_iage) close(nu_dump_age)
if (my_task == master_task .and. tr_FY) close(nu_dump_FY)
if (my_task == master_task .and. tr_lvl) close(nu_dump_lvl)
if (my_task == master_task .and. tr_pond_cesm) close(nu_dump_pond)
if (my_task == master_task .and. tr_pond_lvl) close(nu_dump_pond)
if (my_task == master_task .and. tr_pond_topo) close(nu_dump_pond)
endif
if (my_task == master_task) then
close(nu_dump)

if (tr_aero) close(nu_dump_aero)
if (tr_iage) close(nu_dump_age)
if (tr_FY) close(nu_dump_FY)
if (tr_lvl) close(nu_dump_lvl)
if (tr_pond_cesm) close(nu_dump_pond)
if (tr_pond_lvl) close(nu_dump_pond)
if (tr_pond_topo) close(nu_dump_pond)

if (my_task == master_task) &
write(nu_diag,*) 'Restart read/written ',istep1,time,time_forc
endif

end subroutine final_restart

Expand Down
Loading

0 comments on commit 740b14d

Please sign in to comment.