diff --git a/generic/milc_to_quda_utilities.c b/generic/milc_to_quda_utilities.c index 29cbca7e..da9473f6 100644 --- a/generic/milc_to_quda_utilities.c +++ b/generic/milc_to_quda_utilities.c @@ -50,6 +50,7 @@ int initialize_quda(void){ void finalize_quda(void){ #ifdef USE_CG_GPU + qudaCleanUpDeflationSpace(); #ifdef MULTIGRID mat_invert_mg_cleanup(); #endif diff --git a/generic_ks/d_congrad5_fn_quda.c b/generic_ks/d_congrad5_fn_quda.c index defbdea3..eb97dc74 100644 --- a/generic_ks/d_congrad5_fn_quda.c +++ b/generic_ks/d_congrad5_fn_quda.c @@ -133,6 +133,56 @@ int ks_congrad_parity_gpu(su3_vector *t_src, su3_vector *t_dest, inv_args.tadpole = u0; #endif + // Setup for deflation (and eigensolve) on GPU + int parity = qic->parity; + int blockSize = param.eigen_param.blockSize; + static Real previous_mass = -1.0; + static bool first_solve=true; + + QudaEigParam qep = newQudaEigParam(); + qep.block_size = blockSize; + qep.eig_type = ( blockSize > 1 ) ? QUDA_EIG_BLK_TR_LANCZOS : QUDA_EIG_TR_LANCZOS; /* or QUDA_EIG_IR_ARNOLDI, QUDA_EIG_BLK_IR_ARNOLDI */ + qep.spectrum = QUDA_SPECTRUM_SR_EIG; /* Smallest Real. Other options: LM, SM, LR, SR, LI, SI */ + qep.n_conv = (param.eigen_param.Nvecs_in > param.eigen_param.Nvecs) ? param.eigen_param.Nvecs_in : param.eigen_param.Nvecs; + qep.n_ev_deflate = param.eigen_param.Nvecs; + qep.n_ev = qep.n_conv; + qep.n_kr = (param.eigen_param.Nkr < qep.n_ev ) ? 2*qep.n_ev : param.eigen_param.Nkr; + qep.tol = param.eigen_param.tol; + qep.qr_tol = qep.tol; + qep.max_restarts = param.eigen_param.MaxIter; + qep.require_convergence = QUDA_BOOLEAN_TRUE; + qep.check_interval = 10; + qep.use_norm_op = ( parity == EVENANDODD ) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.use_pc = ( parity != EVENANDODD) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.use_dagger = QUDA_BOOLEAN_FALSE; + qep.compute_gamma5 = QUDA_BOOLEAN_FALSE; + qep.compute_svd = QUDA_BOOLEAN_FALSE; + qep.use_eigen_qr = QUDA_BOOLEAN_TRUE; + qep.use_poly_acc = QUDA_BOOLEAN_TRUE; + qep.poly_deg = param.eigen_param.poly.norder; + qep.a_min = param.eigen_param.poly.minE; + qep.a_max = param.eigen_param.poly.maxE; + qep.arpack_check = QUDA_BOOLEAN_FALSE; + strcpy( qep.vec_infile, param.ks_eigen_startfile ); + strcpy( qep.vec_outfile, param.ks_eigen_savefile ); + qep.io_parity_inflate = QUDA_BOOLEAN_FALSE; + qep.compute_evals_batch_size = 16; + qep.preserve_deflation = QUDA_BOOLEAN_TRUE; + qep.preserve_evals = ( first_solve || fabs(mass - previous_mass) < 1e-6 ) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.batched_rotate = param.eigen_param.batchedRotate; + qep.save_prec = QUDA_SINGLE_PRECISION; // add to input parameters? + qep.partfile = param.eigen_param.partfile ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + + inv_args.eig_param = qep; + if(param.eigen_param.eigPrec == 2)inv_args.prec_eigensolver = QUDA_DOUBLE_PRECISION; + else if(param.eigen_param.eigPrec == 1)inv_args.prec_eigensolver = QUDA_SINGLE_PRECISION; + else inv_args.prec_eigensolver = QUDA_HALF_PRECISION; + + inv_args.tol_restart = param.eigen_param.tol_restart; + + previous_mass = mass; + first_solve = false; + qudaInvert(MILC_PRECISION, quda_precision, mass, @@ -280,6 +330,56 @@ int ks_congrad_block_parity_gpu(int nsrc, su3_vector **t_src, su3_vector **t_des inv_args.tadpole = u0; #endif + // Setup for deflation (and eigensolve) on GPU + int parity = qic->parity; + int blockSize = param.eigen_param.blockSize; + static Real previous_mass = -1.0; + static bool first_solve=true; + + QudaEigParam qep = newQudaEigParam(); + qep.block_size = blockSize; + qep.eig_type = ( blockSize > 1 ) ? QUDA_EIG_BLK_TR_LANCZOS : QUDA_EIG_TR_LANCZOS; /* or QUDA_EIG_IR_ARNOLDI, QUDA_EIG_BLK_IR_ARNOLDI */ + qep.spectrum = QUDA_SPECTRUM_SR_EIG; /* Smallest Real. Other options: LM, SM, LR, SR, LI, SI */ + qep.n_conv = (param.eigen_param.Nvecs_in > param.eigen_param.Nvecs) ? param.eigen_param.Nvecs_in : param.eigen_param.Nvecs; + qep.n_ev_deflate = param.eigen_param.Nvecs; + qep.n_ev = qep.n_conv; + qep.n_kr = (param.eigen_param.Nkr < qep.n_ev ) ? 2*qep.n_ev : param.eigen_param.Nkr; + qep.tol = param.eigen_param.tol; + qep.qr_tol = qep.tol; + qep.max_restarts = param.eigen_param.MaxIter; + qep.require_convergence = QUDA_BOOLEAN_TRUE; + qep.check_interval = 10; + qep.use_norm_op = ( parity == EVENANDODD ) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.use_pc = ( parity != EVENANDODD) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.use_dagger = QUDA_BOOLEAN_FALSE; + qep.compute_gamma5 = QUDA_BOOLEAN_FALSE; + qep.compute_svd = QUDA_BOOLEAN_FALSE; + qep.use_eigen_qr = QUDA_BOOLEAN_TRUE; + qep.use_poly_acc = QUDA_BOOLEAN_TRUE; + qep.poly_deg = param.eigen_param.poly.norder; + qep.a_min = param.eigen_param.poly.minE; + qep.a_max = param.eigen_param.poly.maxE; + qep.arpack_check = QUDA_BOOLEAN_FALSE; + strcpy( qep.vec_infile, param.ks_eigen_startfile ); + strcpy( qep.vec_outfile, param.ks_eigen_savefile ); + qep.io_parity_inflate = QUDA_BOOLEAN_FALSE; + qep.compute_evals_batch_size = 16; + qep.preserve_deflation = QUDA_BOOLEAN_TRUE; + qep.preserve_evals = ( first_solve || fabs(mass - previous_mass) < 1e-6 ) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + qep.batched_rotate = param.eigen_param.batchedRotate; + qep.save_prec = QUDA_SINGLE_PRECISION; // add to input parameters? + qep.partfile = param.eigen_param.partfile ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + + inv_args.eig_param = qep; + if(param.eigen_param.eigPrec == 2)inv_args.prec_eigensolver = QUDA_DOUBLE_PRECISION; + else if(param.eigen_param.eigPrec == 1)inv_args.prec_eigensolver = QUDA_SINGLE_PRECISION; + else inv_args.prec_eigensolver = QUDA_HALF_PRECISION; + + inv_args.tol_restart = param.eigen_param.tol_restart; + + previous_mass = mass; + first_solve = false; + qudaInvertMsrc(MILC_PRECISION, quda_precision, mass, diff --git a/generic_ks/mat_invert.c b/generic_ks/mat_invert.c index 3130a249..0638bc6a 100644 --- a/generic_ks/mat_invert.c +++ b/generic_ks/mat_invert.c @@ -205,7 +205,8 @@ int mat_invert_cg_field(su3_vector *src, su3_vector *dst, ks_dirac_adj_op( src, tmp, mass, EVENANDODD, fn); /* Do deflation if we have eigenvectors and the deflate parameter is true */ - + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -222,12 +223,15 @@ int mat_invert_cg_field(su3_vector *src, su3_vector *dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* dst_e <- (M_adj M)^-1 tmp_e (even sites only) */ qic->parity = EVEN; int cgn = ks_congrad_field( tmp, dst, qic, mass, fn ); int even_iters = qic->final_iters; + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -242,6 +246,7 @@ int mat_invert_cg_field(su3_vector *src, su3_vector *dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* dst_o <- (M_adj M)^-1 tmp_o (odd sites only) */ qic->parity = ODD; @@ -277,6 +282,8 @@ int mat_invert_cgz_field(su3_vector *src, su3_vector *dst, /* Put "exact" low-mode even-site solution in tmp if deflate parameter is true */ + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -292,6 +299,7 @@ int mat_invert_cgz_field(su3_vector *src, su3_vector *dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* Solve for all modes using tmp as an initial guess */ /* tmp_e <- (M_adj M)^-1 src_e (even sites only) */ @@ -301,6 +309,8 @@ int mat_invert_cgz_field(su3_vector *src, su3_vector *dst, /* Put "exact" low-mode odd-site solution in tmp if deflate parameter is true */ + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -316,6 +326,7 @@ int mat_invert_cgz_field(su3_vector *src, su3_vector *dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* Solve for all modes using tmp as an initial guess */ /* tmp_o <- (M_adj M)^-1 src_o (odd sites only) */ @@ -427,6 +438,8 @@ int mat_invert_uml_field(su3_vector *src, su3_vector *dst, ks_dirac_adj_op( src, tmp, mass, EVENANDODD, fn ); #if EIGMODE != EIGCG + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -439,6 +452,7 @@ int mat_invert_uml_field(su3_vector *src, su3_vector *dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif #endif /* dst_e <- (M_adj M)^-1 tmp_e (even sites only) */ @@ -460,6 +474,8 @@ int mat_invert_uml_field(su3_vector *src, su3_vector *dst, } END_LOOP_OMP; #if EIGMODE != EIGCG + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); @@ -470,6 +486,7 @@ int mat_invert_uml_field(su3_vector *src, su3_vector *dst, dtime += dclock(); node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); } +#endif #endif /* Polish off odd sites to correct for possible roundoff error */ @@ -711,7 +728,8 @@ int mat_invert_block_cg(su3_vector **src, su3_vector **dst, } /* Put "exact" low-mode even-site solution in tmp if deflate parameter is true */ - + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = -dclock(); @@ -728,6 +746,7 @@ int mat_invert_block_cg(su3_vector **src, su3_vector **dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* dst_e <- (M_adj M)^-1 tmp_e (even sites only) */ qic->parity = EVEN; @@ -735,6 +754,8 @@ int mat_invert_block_cg(su3_vector **src, su3_vector **dst, int even_iters = qic->final_iters; /* Deflation on odd sites */ + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = -dclock(); @@ -750,6 +771,7 @@ int mat_invert_block_cg(su3_vector **src, su3_vector **dst, node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); #endif } +#endif /* dst_o <- (M_adj M)^-1 tmp_o (odd sites only) */ qic->parity = ODD; @@ -780,7 +802,8 @@ int mat_invert_block_cgz(su3_vector **src, su3_vector **dst, tmp[is] = create_v_field(); /* Put "exact" low-mode even-site solution in tmp if deflate parameter is true */ - + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ for(int is = 0; is < nsrc; is++){ @@ -799,6 +822,7 @@ int mat_invert_block_cgz(su3_vector **src, su3_vector **dst, #endif } } +#endif /* Solve for all modes on even sites using tmp as an initial guess */ /* tmp_e <- (M_adj M)^-1 src_e (even sites only) */ @@ -807,7 +831,8 @@ int mat_invert_block_cgz(su3_vector **src, su3_vector **dst, int even_iters = qic->final_iters; /* Put "exact" low-mode odd-site solution in tmp if deflate parameter is true */ - + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ for(int is = 0; is < nsrc; is++){ @@ -825,6 +850,7 @@ int mat_invert_block_cgz(su3_vector **src, su3_vector **dst, #endif } } +#endif /* Solve for all modes on odd sites using tmp as an initial guess */ /* dst_o <- (M_adj M)^-1 tmp_o (odd sites only) */ @@ -870,7 +896,9 @@ int mat_invert_block_uml(su3_vector **src, su3_vector **dst, for(int is = 0; is < nsrc; is++){ ks_dirac_adj_op( src[is], tmp[is], mass, EVENANDODD, fn ); - + + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); #ifdef CGTIME @@ -884,6 +912,7 @@ int mat_invert_block_uml(su3_vector **src, su3_vector **dst, dtime += dclock(); node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); } +#endif } /* dst_e <- (M_adj M)^-1 tmp_e (even sites only) */ @@ -900,6 +929,8 @@ int mat_invert_block_uml(su3_vector **src, su3_vector **dst, scalar_mult_su3_vector( dst[is]+i, 1.0/(2.0*mass), dst[is]+i ); } END_LOOP_OMP; + /* Skip MILC CPU deflation if using QUDA deflation */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0 && qic->deflate){ dtime = - dclock(); node0_printf("deflating on odd sites for mass %g with %d eigenvec\n", @@ -910,6 +941,7 @@ int mat_invert_block_uml(su3_vector **src, su3_vector **dst, dtime += dclock(); node0_printf("Time to deflate %d modes %g\n", param.eigen_param.Nvecs, dtime); } +#endif } /* Polish off odd sites to correct for possible roundoff error */ diff --git a/generic_ks/read_eigen_param.c b/generic_ks/read_eigen_param.c index 94093e9d..5c39da8e 100644 --- a/generic_ks/read_eigen_param.c +++ b/generic_ks/read_eigen_param.c @@ -31,13 +31,20 @@ int read_ks_eigen_param(ks_eigen_param *eigen_param, int status, int prompt){ IF_OK status += get_f(stdin, prompt, "Chebyshev_beta", &eigen_param->poly.maxE ); IF_OK status += get_i(stdin, prompt, "Chebyshev_order", &eigen_param->poly.norder ); IF_OK status += get_s(stdin, prompt, "diag_algorithm", param.eigen_param.diagAlg ); - + +#elif defined(HAVE_QUDA) && defined(USE_CG_GPU) && !defined(USE_EIG_GPU) + node0_printf("ERROR: When using QUDA for CG and wanting FRESH eigenvectors, only the QUDA eigensolver is allowed!\n"); + node0_printf("ERROR: Recompile with WANT_QUDA=true, WANT_CG_GPU=true, and WANT_EIG_GPU=true\n"); + terminate(1); + #elif defined(HAVE_QUDA) && defined(USE_EIG_GPU) IF_OK status += get_i(stdin, prompt, "Max_Lanczos_restart_iters", &eigen_param->MaxIter ); IF_OK status += get_f(stdin, prompt, "eigenval_tolerance", &eigen_param->tol ); IF_OK status += get_i(stdin, prompt, "Lanczos_max", &eigen_param->Nkr ); IF_OK status += get_i(stdin, prompt, "Lanczos_restart", &eigen_param->Nrestart ); + IF_OK status += get_i(stdin, prompt, "eigensolver_prec", &eigen_param->eigPrec ); + IF_OK status += get_i(stdin, prompt, "batched_rotate", &eigen_param->batchedRotate ); IF_OK status += get_f(stdin, prompt, "Chebyshev_alpha", &eigen_param->poly.minE ); IF_OK status += get_f(stdin, prompt, "Chebyshev_beta", &eigen_param->poly.maxE ); IF_OK status += get_i(stdin, prompt, "Chebyshev_order", &eigen_param->poly.norder ); diff --git a/include/imp_ferm_links.h b/include/imp_ferm_links.h index 06ca95cd..03ac7c73 100644 --- a/include/imp_ferm_links.h +++ b/include/imp_ferm_links.h @@ -346,7 +346,11 @@ typedef struct { int Nkr; /* size of the Krylov subspace */ ks_eigen_poly poly; /* Preconditioning polynomial */ int blockSize; /* block size for block variant eigensolvers */ - int parity; + int partfile; /* Whether to save in partfile or not */ + int eigPrec; /* Run the eigensolver in this precision */ + int batchedRotate; /* Size of the rotation space to use for solver */ + int parity; + double tol_restart; } ks_eigen_param; #elif defined(HAVE_QDP) #define ks_eigensolve Kalkreuter @@ -371,6 +375,7 @@ typedef struct { int Restart ; /* Restart Rayleigh every so many iterations */ int Kiters ; /* Kalkreuter iterations */ int parity; + double tol_restart; } ks_eigen_param; #endif diff --git a/ks_spectrum/control.c b/ks_spectrum/control.c index ff5be2e0..9399a4ce 100644 --- a/ks_spectrum/control.c +++ b/ks_spectrum/control.c @@ -318,6 +318,8 @@ int main(int argc, char *argv[]) #endif #if EIGMODE != EIGCG + /* If using QUDA for deflation, then eigenvectors are loaded directly by QUDA and not MILC */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) if(param.eigen_param.Nvecs > 0){ /* malloc for eigenpairs */ eigVal = (Real *)malloc(param.eigen_param.Nvecs*sizeof(double)); @@ -338,6 +340,7 @@ int main(int argc, char *argv[]) node0_printf("WARNING: Gauge fixing does not readjust the eigenvectors"); } } +#endif #endif /**************************************************************/ @@ -359,7 +362,10 @@ int main(int argc, char *argv[]) set_boundary_twist_fn(fn, bdry_phase, param.coord_origin); /* Apply the operation */ boundary_twist_fn(fn, ON); - + + // If using QUDA deflated CG + asking for QUDA to do the eigensolve, then + // the eigensolver is called from within QUDA's CG solver...not from MILC +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) && defined(USE_EIG_GPU) ) /* compute eigenpairs if requested */ if(param.ks_eigen_startflag == FRESH){ int total_R_iters; @@ -375,9 +381,11 @@ int main(int argc, char *argv[]) initialize_site_prn_from_seed(iseed); #endif } - +#endif /* Check the eigenvectors */ + /* If using QUDA for deflation, then eigenvectors are loaded directly by QUDA and not checked by MILC */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) /* Calculate and print the residues and norms of the eigenvectors */ resid = (Real *)malloc(Nvecs_curr*sizeof(double)); node0_printf("Even site residuals\n"); @@ -385,12 +393,14 @@ int main(int argc, char *argv[]) construct_eigen_other_parity(eigVec, eigVal, ¶m.eigen_param, fn); node0_printf("Odd site residuals\n"); check_eigres( resid, eigVec, eigVal, Nvecs_curr, ODD, fn ); - +#endif /* Unapply twisted boundary conditions on the fermion links and restore conventional KS phases and antiperiodic BC, if changed. */ boundary_twist_fn(fn, OFF); - + + /* If using QUDA for deflation, then eigenvalues are printed by QUDA */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) /* print eigenvalues of iDslash */ node0_printf("The above were eigenvalues of -Dslash^2 in MILC normalization\n"); node0_printf("Here we also list eigenvalues of iDslash in continuum normalization\n"); @@ -403,6 +413,7 @@ int main(int argc, char *argv[]) node0_printf("eigenval(%i): %10g\n", i, 0.0); } } +#endif #endif } @@ -947,6 +958,9 @@ int main(int argc, char *argv[]) #endif if(param.eigen_param.Nvecs > 0){ + + /* If using QUDA for deflation, then eigenvectors are loaded and saved directly by QUDA and not MILC */ +#if !( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) STARTTIME; /* save eigenvectors if requested */ @@ -961,6 +975,8 @@ int main(int argc, char *argv[]) free(eigVal); free(eigVec); free(resid); ENDTIME("save eigenvectors (if requested)"); + +#endif } /* Clean up quark sources, both base and modified */ diff --git a/ks_spectrum/setup.c b/ks_spectrum/setup.c index 7f4f8116..21e195c0 100644 --- a/ks_spectrum/setup.c +++ b/ks_spectrum/setup.c @@ -256,14 +256,38 @@ int readin(int prompt) { IF_OK if(param.eigen_param.Nvecs > 0){ + /* Additional parameters for QUDA deflation */ +#if ( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) + /* controls how often redeflation occurs during deflated inversions */ + IF_OK status += get_f(stdin, prompt,"tol_restart", ¶m.eigen_param.tol_restart); +#endif + /* eigenvector input */ IF_OK status += ask_starting_ks_eigen(stdin, prompt, ¶m.ks_eigen_startflag, param.ks_eigen_startfile); + /* Additional parameters for QUDA deflation */ +#if ( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) + if(param.ks_eigen_startflag == RELOAD_ASCII || + param.ks_eigen_startflag == RELOAD_SERIAL || + param.ks_eigen_startflag == RELOAD_PARALLEL ){ + /* allow file to have more eigenpairs than will be used for deflation */ + IF_OK status += get_i(stdin, prompt,"file_number_of_eigenpairs", ¶m.eigen_param.Nvecs_in); + } +#endif + /* eigenvector output */ IF_OK status += ask_ending_ks_eigen(stdin, prompt, ¶m.ks_eigen_saveflag, param.ks_eigen_savefile); +#if ( defined(USE_CG_GPU) && defined(HAVE_QUDA) ) + if(param.ks_eigen_saveflag == SAVE_PARTFILE_SCIDAC){ + param.eigen_param.partfile = 1; + } else { + param.eigen_param.partfile = 0; + } +#endif + /* If we are reading in eigenpairs, we don't regenerate them */ #if EIGMODE != EIGCG @@ -685,7 +709,6 @@ int readin(int prompt) { } IF_OK param.ksp[nprop].mass = atof(param.mass_label[nprop]); - IF_OK { int dir; FORALLUPDIR(dir)param.bdry_phase[nprop][dir] = bdry_phase[dir]; @@ -717,9 +740,10 @@ int readin(int prompt) { param.qic[nprop].deflate = 0; IF_OK { if(param.eigen_param.Nvecs > 0){ /* Need eigenvectors to deflate */ - IF_OK status += get_s(stdin, prompt,"deflate", savebuf); + char savebuf2[128]; + IF_OK status += get_s(stdin, prompt,"deflate", savebuf2); IF_OK { - if(strcmp(savebuf,"yes") == 0)param.qic[nprop].deflate = 1; + if(strcmp(savebuf2,"yes") == 0)param.qic[nprop].deflate = 1; } } }