diff --git a/examples/ex_eigs_dparTests.c b/examples/ex_eigs_dparTests.c index 96dea66a..90d36909 100644 --- a/examples/ex_eigs_dparTests.c +++ b/examples/ex_eigs_dparTests.c @@ -245,7 +245,9 @@ int main (int argc, char *argv[]) { fprintf(primme.outputFile, "GlobalSum Time : %-22.10E\n", primme.stats.timeGlobalSum); fprintf(primme.outputFile, "Broadcast Time : %-22.10E\n", primme.stats.timeBroadcast); fprintf(primme.outputFile, "SketchedMatvec Time : %-22.10E\n", primme.stats.timeSketchMatvec); - fprintf(primme.outputFile, "Sketching Time : %-22.10E\n", primme.stats.timeSketching); + fprintf(primme.outputFile, "Rayleigh-Ritz Time : %-22.10E\n", primme.stats.timeRR); + fprintf(primme.outputFile, "Stabilization Time : %-22.10E\n", primme.stats.timeStabilization); + fprintf(primme.outputFile, "Residual Time : %-22.10E\n", primme.stats.timeResiduals); if (primme.stats.lockingIssue) { fprintf(primme.outputFile, "\nA locking problem has occurred.\n"); diff --git a/include/primme_eigs.h b/include/primme_eigs.h index eef97e4b..8aa2c2f6 100644 --- a/include/primme_eigs.h +++ b/include/primme_eigs.h @@ -142,7 +142,8 @@ typedef struct primme_stats { double timeBroadcast; /* time expend by broadcastReal */ double timeDense; /* time expend by Num_update_VWXR_Sprimme */ double timeSketchMatvec; /* time expend by sketched MatVec */ - double timeSketching; /* time expend by sketching */ + double timeStabilization; /* time expend by stabilization in Sketched RR */ + double timeRR; /* time expend by Rayleigh-Ritz (sketching or nonsketching) */ double timeResiduals; /* time expend by computing residuals */ double timeKrylov; /* time expend by building the Krylov basis */ double estimateMinEVal; /* the leftmost Ritz value seen */ diff --git a/src/eigs/auxiliary_eigs.c b/src/eigs/auxiliary_eigs.c index 2a9c1060..a5b902c3 100644 --- a/src/eigs/auxiliary_eigs.c +++ b/src/eigs/auxiliary_eigs.c @@ -179,7 +179,6 @@ void primme_free_context(primme_context ctx) { * W A*V ******************************************************************************/ -// XXX: Eloy - possible change below? /******************************************************************************* * Subroutine matrixMatvec_ - Computes A*V * for column indices basisSize through basisSize+blockSize-1 diff --git a/src/eigs/lanczos.c b/src/eigs/lanczos.c index 2007c79e..ac1be92a 100644 --- a/src/eigs/lanczos.c +++ b/src/eigs/lanczos.c @@ -58,68 +58,12 @@ #ifdef SUPPORTED_TYPE -STATIC void compute_residuals_RR(HSCALAR *evecs, PRIMME_INT ldevecs, HEVAL *evals, PRIMME_INT numEvals, HREAL *resNorms, primme_context ctx) -{ - primme_params *primme = ctx.primme; - - HSCALAR *evecs_ortho; - SCALAR *Aevecs_ortho; - SCALAR *H_ortho; - SCALAR *new_hVecs; - SCALAR *new_evecs; - SCALAR *new_Aevecs; - SCALAR *resVecs; - HREAL *new_hVals; - PRIMME_INT i; - - Num_malloc_SHprimme(primme->nLocal*numEvals, &evecs_ortho, ctx); - Num_malloc_Sprimme(primme->nLocal*numEvals, &Aevecs_ortho, ctx); - Num_malloc_Sprimme(numEvals*numEvals, &H_ortho, ctx); - Num_malloc_Sprimme(numEvals*numEvals, &new_hVecs, ctx); - Num_malloc_Sprimme(primme->nLocal*numEvals, &new_evecs, ctx); - Num_malloc_Sprimme(primme->nLocal*numEvals, &new_Aevecs, ctx); - Num_malloc_Sprimme(primme->nLocal*numEvals, &resVecs, ctx); - Num_malloc_RHprimme(numEvals, &new_hVals, ctx); - - Num_copy_matrix_Sprimme(evecs, primme->nLocal, numEvals, ldevecs, evecs_ortho, primme->nLocal, ctx); - ortho_Sprimme(evecs_ortho, primme->nLocal, NULL, 0, 0, numEvals-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx); // Ortho the Ritz vectors - matrixMatvec_Sprimme(evecs_ortho, primme->nLocal, primme->nLocal, Aevecs_ortho, primme->nLocal, 0, numEvals, ctx); // Projected orthogonal eigenvectors - - // Our "new" banded matrix H - Num_gemm_Sprimme("C", "N", numEvals, numEvals, primme->nLocal, 1.0, evecs_ortho, primme->nLocal, Aevecs_ortho, primme->nLocal, 0.0, H_ortho, numEvals, ctx); - globalSum_Sprimme(H_ortho, numEvals*numEvals, ctx); - - solve_H_Sprimme(H_ortho, numEvals, numEvals, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, new_hVecs, numEvals, new_hVals, NULL, 0, ctx); - - // Compute residuals - Num_gemm_Sprimme("N", "N", primme->nLocal, numEvals, numEvals, 1.0, evecs_ortho, primme->nLocal, new_hVecs, numEvals, 0.0, new_evecs, primme->nLocal, ctx); - for(i = 0; i < numEvals; i++) evals[i] = new_hVals[i]; - matrixMatvec_Sprimme(new_evecs, primme->nLocal, primme->nLocal, new_Aevecs, primme->nLocal, 0, numEvals, ctx); // Projected new eigenvectors - Num_compute_residuals_Sprimme(primme->nLocal, numEvals, evals, new_evecs, primme->nLocal, new_Aevecs, primme->nLocal, resVecs, primme->nLocal, ctx); - - for(i = 0; i < numEvals; i++) - resNorms[i] = Num_dot_Sprimme(primme->nLocal, &resVecs[i*primme->nLocal], 1, &resVecs[i*primme->nLocal], 1, ctx); - globalSum_Rprimme(resNorms, numEvals, ctx); - for(i = 0; i < numEvals; i++) - resNorms[i] = sqrt(resNorms[i]); - - Num_free_SHprimme(evecs_ortho, ctx); - Num_free_Sprimme(Aevecs_ortho, ctx); - Num_free_Sprimme(H_ortho, ctx); - Num_free_Sprimme(new_hVecs, ctx); - Num_free_Sprimme(new_evecs, ctx); - Num_free_Sprimme(new_Aevecs, ctx); - Num_free_Sprimme(resVecs, ctx); - Num_free_RHprimme(new_hVals, ctx); - - return; -} - TEMPLATE_PLEASE -int print_lanczos_timings_Sprimme(primme_context ctx) { +int print_lanczos_timings_Sprimme(PRIMME_INT basisSize, primme_context ctx) { primme_params *primme = ctx.primme; + printf("Basis Size: %-" PRIMME_INT_P "\n", basisSize); printf("Iterations: %-" PRIMME_INT_P "\n", primme->stats.numOuterIterations); printf("Restarts : %-" PRIMME_INT_P "\n", primme->stats.numRestarts); printf("Matvecs : %-" PRIMME_INT_P "\n", primme->stats.numMatvecs); @@ -131,7 +75,8 @@ int print_lanczos_timings_Sprimme(primme_context ctx) { printf("GlobalSum Time : %-22.10E\n", primme->stats.timeGlobalSum); printf("Broadcast Time : %-22.10E\n", primme->stats.timeBroadcast); printf("SketchedMatvec Time : %-22.10E\n", primme->stats.timeSketchMatvec); - printf("Sketching Time : %-22.10E\n", primme->stats.timeSketching); + printf("Rayleigh-Ritz Time : %-22.10E\n", primme->stats.timeRR); + printf("Stabilization Time : %-22.10E\n", primme->stats.timeStabilization); printf("Residual Time : %-22.10E\n", primme->stats.timeResiduals); return 0; @@ -177,6 +122,8 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme_params *primme = ctx.primme; double elapsed_time = primme_wTimer(); + double solve_timer; + double resid_timer; primme->stats.elapsedTime = 0.0; // XXX: Added this for debugging purposes - Heather /* primme parameters */ @@ -185,7 +132,6 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, SCALAR *rwork; /* temporary work vector */ HSCALAR *H; /* Upper triangular portion of V'*A*V */ -// HSCALAR *H_fullOrtho; /* If fullOrtho = 1, used to reduce error */ HSCALAR *hVecs; /* Eigenvectors of H */ HREAL *hVals; /* The Ritz values */ @@ -209,10 +155,11 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Sketching Variables -----------------------------------------------------*/ SCALAR *V_temp; /* Copy of basis vectors for sketching */ SCALAR *SV; /* The sketched basis */ + SCALAR *T; /* The "R" factor in the QR decomposition of SV */ SCALAR *SW; /* The projected sketched basis */ - SCALAR *S_vals; /* CSC Formatted Values */ - PRIMME_INT *S_rows; /* CSC Formatted Rows */ - PRIMME_INT nnzPerCol, ldSV, ldSW; /* Size and nnz of the sketching matrix */ + SCALAR *S_vals; /* CSC Formatted Values */ + PRIMME_INT *S_rows; /* CSC Formatted Rows */ + PRIMME_INT nnzPerCol, ldSV, ldSW, ldT; /* Size and nnz of the sketching matrix */ REAL *normalize_evecs; /* -------------------------------------------------------------- */ @@ -235,11 +182,6 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(Num_malloc_SHprimme(ldH*ldH, &H, ctx)); CHKERR(Num_zero_matrix_SHprimme(H, ldH, ldH, ldH, ctx)); } -/* if(fullOrtho) - { - CHKERR(Num_malloc_SHprimme(maxBlockSize*maxBlockSize, &H_fullOrtho, ctx)); - CHKERR(Num_zero_matrix_SHprimme(H_fullOrtho, maxBlockSize, maxBlockSize, maxBlockSize, ctx)); - }*/ CHKERR(Num_malloc_Sprimme(ldV*(primme->maxBasisSize+maxBlockSize), &V, ctx)); CHKERR(Num_malloc_Sprimme(ldAVhVecs*primme->numEvals, &AVhVecs, ctx)); @@ -273,8 +215,9 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme->stats.timeGlobalSum = 0.0; primme->stats.timeBroadcast = 0.0; primme->stats.timeDense = 0.0; - primme->stats.timeSketching = 0.0; + primme->stats.timeRR = 0.0; primme->stats.timeSketchMatvec = 0.0; + primme->stats.timeStabilization = 0.0; primme->stats.timeResiduals = 0.0; primme->stats.estimateMinEVal = HUGE_VAL; primme->stats.estimateMaxEVal = -HUGE_VAL; @@ -304,12 +247,14 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, { /* Default settings for sketch size and nnz per column. Based on Yuji and Tropp's manuscript */ ldSV = ldSW = primme->sketchingParams.sketchSize; + ldT = primme->maxBasisSize+maxBlockSize; nnzPerCol = primme->sketchingParams.nnzPerCol; S_rows = (PRIMME_INT*)malloc(nnzPerCol*primme->nLocal*sizeof(PRIMME_INT)); CHKERR(Num_malloc_Sprimme(nnzPerCol*primme->nLocal, &S_vals, ctx)); CHKERR(Num_malloc_Sprimme(ldSV*(primme->maxBasisSize+maxBlockSize), &SV, ctx)); + CHKERR(Num_malloc_Sprimme(ldT*ldT, &T, ctx)); CHKERR(Num_malloc_Sprimme(ldSW*primme->maxBasisSize, &SW, ctx)); CHKERR(Num_malloc_Sprimme(ldV*(primme->maxBasisSize+maxBlockSize), &V_temp, ctx)); @@ -330,7 +275,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Update sketched basis if sketching is turned on */ if(primme->projectionParams.projection == primme_proj_sketched) - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, 0, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, 0, blockSize, S_rows, S_vals, ctx)); primme->stats.numOuterIterations++; @@ -344,11 +289,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(ortho_Sprimme(&V[i*ldV], ldV, &H[(i-blockSize)*ldH + i], ldH, 0, blockSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); /* [V_i, b_i] = qr(V_i) */ if(fullOrtho) - { CHKERR(ortho_Sprimme(V, ldV, NULL, 0, i, i+blockSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); /* V_i = cgs(V(:, 0:i-1), V_i) */ - //CHKERR(ortho_Sprimme(V, ldV, H_fullOrtho, ldH, i, i+blockSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); /* V_i = cgs(V(:, 0:i-1), V_i) */ - //for(j = 0; j < blockSize; j++) CHKERR(Num_axpy_Sprimme(blockSize, 1.0, &H_fullOrtho[j*blockSize], 1, &H[(i-blockSize+j)*ldH + i], 1, ctx)); - } /* Symmetrize H */ CHKERR(Num_copy_matrix_conj_Sprimme(&H[(i-blockSize)*ldH + i], blockSize, blockSize, ldH, &H[i*ldH + (i-blockSize)], ldH, ctx)); @@ -366,9 +307,9 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Update sketched basis if sketching is turned on */ if(primme->projectionParams.projection == primme_proj_sketched) - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, i, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, i, blockSize, S_rows, S_vals, ctx)); - if(primme->printLevel >= 2 && (PRIMME_INT)(i+blockSize) % 10 == 0) + if(primme->printLevel >= 2 && (PRIMME_INT)(i+blockSize) % 100 == 0) { /* Moving on to the eigenvalue problem */ primme->initSize = 0; @@ -382,17 +323,20 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(ortho_Sprimme(&V_temp[ldV*(i+blockSize)], ldV, &H[i*ldH + (i+blockSize)], ldH, 0, blockSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); /* [V_i, b_i] = qr(V_i) */ /* SW = SV*H */ - CHKERR(sketch_basis_Sprimme(V_temp, ldV, SV, ldSV, i+blockSize, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V_temp, ldV, SV, ldSV, T, ldT, i+blockSize, blockSize, S_rows, S_vals, ctx)); CHKERR(Num_gemm_Sprimme("N", "N", ldSV, i+blockSize, i+2*blockSize, 1.0, SV, ldSV, H, ldH, 0.0, SW, ldSW, ctx)); /* Getting our sketched basis and projected sketched basis */ - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, i+blockSize, hVals, i+blockSize, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, i+blockSize, hVals, i+blockSize, ctx)); } else { /* End sketching */ + solve_timer = primme_wTimer(); CHKERR(solve_H_Sprimme(H, i+blockSize, ldH, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, hVecs, i+blockSize, hVals, NULL, 0, ctx)); + primme->stats.timeRR += primme_wTimer() - solve_timer; } /* End non-sketching */ /* Check how many pairs have converged basis in approximate residual */ + resid_timer = primme_wTimer(); if(primme->procID == 0){ for(j = 0; j < primme->numEvals; j++) { if(j < numEvals) { @@ -402,6 +346,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, } } } + primme->stats.timeResiduals += primme_wTimer() - resid_timer; numConverged = 0; CHKERR(broadcast_Rprimme(resNorms, numEvals, ctx)); for(j = 0; j < primme->numEvals; j++) if(resNorms[j] < primme->aNorm*primme->eps) numConverged++; @@ -411,9 +356,9 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, printf("BasisSize %ld: NumConverged = %d, Convergence Tolerance = %.6E (%.6E x %.6E)\n", i+blockSize, numConverged, primme->aNorm*primme->eps, primme->aNorm, primme->eps); for(j = 0; j < primme->numEvals; j++){ if(j < numEvals) { - printf("BasisSize %ld Eval[%ld] = %lf, ResNorm[%ld] = %.6E\n", i+blockSize, j, hVals[j], j, resNorms[j]); + printf("BasisSize %ld Eval[%ld] = %.10E, ResNorm[%ld] = %.10E\n", i+blockSize, j, hVals[j], j, resNorms[j]); } else { - printf("BasisSize %ld Eval[%ld] = %lf, ResNorm[%ld] = %.6E\n", i+blockSize, j, 0.0, j, 0.0); + printf("BasisSize %ld Eval[%ld] = NaN, ResNorm[%ld] = NaN\n", i+blockSize, j, j); } } } @@ -423,23 +368,6 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(Num_gemm_Sprimme("N", "N", primme->nLocal, numEvals, i+blockSize, 1.0, V, ldV, hVecs, i+blockSize, 0.0, evecs, ldevecs, ctx)); /* evecs = V*hVecs */ for(j = 0; j < primme->numEvals; j++) evals[j] = hVals[j]; -// /* If sketching, scale the eigenvectors to ensure they are normal before computing their actual residual norms */ -// if(primme->projectionParams.projection == primme_proj_sketched) { -// for(j = 0; j < primme->numEvals; j++) normalize_evecs[j] = Num_dot_Sprimme(primme->nLocal, &evecs[j*ldevecs], 1, &evecs[j*ldevecs], 1, ctx); -// CHKERR(globalSum_Rprimme(normalize_evecs, numEvals, ctx)); -// for(j = 0; j < numEvals; j++) CHKERR(Num_scal_Sprimme(primme->nLocal, 1/sqrt(normalize_evecs[j]), &evecs[j*ldevecs], 1, ctx)); -// -// compute_residuals_RR(evecs, ldevecs, evals, numEvals, resNorms, ctx); -// } else { /* Nonsketching - Compute actual residual norms */ -// CHKERR(matrixMatvec_Sprimme(evecs, primme->nLocal, ldevecs, AVhVecs, ldAVhVecs, 0, numEvals, ctx)); /* AVhVecs = A*V*hVecs */ -// CHKERR(Num_compute_residuals_Sprimme(primme->nLocal, numEvals, evals, evecs, ldevecs, AVhVecs, ldAVhVecs, rwork, ldrwork, ctx)); /* Compute residual vectors */ -// -// for(j = 0; j < numEvals; j++) /* Compute residual norms */ -// resNorms[j] = Num_dot_Sprimme(ldrwork, &rwork[j*ldrwork], 1, &rwork[j*ldrwork], 1, ctx); -// CHKERR(globalSum_Rprimme(resNorms, numEvals, ctx)); -// for(j = 0; j < numEvals; j++) resNorms[j] = sqrt(resNorms[j]); -// } - /* If sketching, scale the eigenvectors to ensure they are normal before computing their actual residual norms */ if(primme->projectionParams.projection == primme_proj_sketched) { for(j = 0; j < primme->numEvals; j++) normalize_evecs[j] = Num_dot_Sprimme(primme->nLocal, &evecs[j*ldevecs], 1, &evecs[j*ldevecs], 1, ctx); @@ -447,6 +375,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, for(j = 0; j < numEvals; j++) CHKERR(Num_scal_Sprimme(primme->nLocal, 1/sqrt(normalize_evecs[j]), &evecs[j*ldevecs], 1, ctx)); } + resid_timer = primme_wTimer(); /* Compute residual vectors */ CHKERR(matrixMatvec_Sprimme(evecs, primme->nLocal, ldevecs, AVhVecs, ldAVhVecs, 0, numEvals, ctx)); /* AVhVecs = A*V*hVecs */ CHKERR(Num_compute_residuals_Sprimme(primme->nLocal, numEvals, evals, evecs, ldevecs, AVhVecs, ldAVhVecs, rwork, ldrwork, ctx)); /* Compute residual vectors */ @@ -455,7 +384,8 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, for(j = 0; j < numEvals; j++) resNorms[j] = Num_dot_Sprimme(ldrwork, &rwork[j*ldrwork], 1, &rwork[j*ldrwork], 1, ctx); CHKERR(globalSum_Rprimme(resNorms, numEvals, ctx)); for(j = 0; j < numEvals; j++) resNorms[j] = sqrt(resNorms[j]); - + primme->stats.timeResiduals += primme_wTimer() - resid_timer; + /* Check the convergence of the Ritz vectors */ CHKERR(check_convergence_Sprimme(evecs, ldevecs, 1 /* given X */, NULL, 0, 0 /* not given R */, NULL, 0, 0, NULL, 0, NULL, 0, 0, numEvals, flags, resNorms, hVals, &reset, -1, ctx)); /* Find number of converged eigenpairs */ @@ -480,7 +410,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, //Report timings if(primme->procID == 0 && (primme->stats.numOuterIterations % 100 == 0 || i % 100 == 0)){ primme->stats.elapsedTime = primme_wTimer() - elapsed_time; // XXX: Added this for debugging purposes - Heather - CHKERR(print_lanczos_timings_Sprimme(ctx)); + CHKERR(print_lanczos_timings_Sprimme(i, ctx)); } } /* End basis build */ @@ -499,38 +429,21 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(ortho_Sprimme(V, ldV, NULL, 0, primme->maxBasisSize, primme->maxBasisSize+blockSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); /* Orthogonalized the last block of V against the rest of the basis */ /* SW = SV*H */ - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, primme->maxBasisSize, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, primme->maxBasisSize, blockSize, S_rows, S_vals, ctx)); CHKERR(Num_gemm_Sprimme("N", "N", ldSV, primme->maxBasisSize, primme->maxBasisSize+blockSize, 1.0, SV, ldSV, H, ldH, 0.0, SW, ldSW, ctx)); /* Performing sketched Rayleigh-Ritz */ - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, ldhVecs, hVals, primme->maxBasisSize, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, ldhVecs, hVals, primme->maxBasisSize, ctx)); } else { /* End sketched */ - + solve_timer = primme_wTimer(); CHKERR(solve_H_Sprimme(H, primme->maxBasisSize, ldH, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, hVecs, ldhVecs, hVals, NULL, 0, ctx)); - + primme->stats.timeRR += primme_wTimer() - solve_timer; } /* End nonsketched */ CHKERR(Num_gemm_Sprimme("N", "N", primme->nLocal, primme->numEvals, primme->maxBasisSize, 1.0, V, ldV, hVecs, ldhVecs, 0.0, evecs, ldevecs, ctx)); /* evecs = V*hVecs */ for(i = 0; i < primme->numEvals; i++) evals[i] = hVals[i]; - /* Find residual norms and Ritz pairs being returned to the user */ - /* If sketching, make sure eigenvectors are at least normal */ -// if(primme->projectionParams.projection == primme_proj_sketched) { -// for(j = 0; j < primme->numEvals; j++) normalize_evecs[j] = Num_dot_Sprimme(primme->nLocal, &evecs[j*ldevecs], 1, &evecs[j*ldevecs], 1, ctx); -// CHKERR(globalSum_Rprimme(normalize_evecs, primme->numEvals, ctx)); -// for(j = 0; j < primme->numEvals; j++) CHKERR(Num_scal_Sprimme(primme->nLocal, 1/sqrt(normalize_evecs[j]), &evecs[j*ldevecs], 1, ctx)); -// -// compute_residuals_RR(evecs, ldevecs, evals, primme->numEvals, resNorms, ctx); /* Rayleigh Ritz residuals */ -// } else { -// /* Find residual norms */ -// CHKERR(matrixMatvec_Sprimme(evecs, primme->nLocal, ldevecs, AVhVecs, ldAVhVecs, 0, primme->numEvals, ctx)); /* AVhVecs = A*V*hVecs */ -// CHKERR(Num_compute_residuals_Sprimme(primme->nLocal, primme->numEvals, evals, evecs, ldevecs, AVhVecs, ldAVhVecs, rwork, ldrwork, ctx)); -// -// for(j = 0; j < primme->numEvals; j++) resNorms[j] = sqrt(Num_dot_Sprimme(ldrwork, &rwork[j*ldrwork], 1, &rwork[j*ldrwork], 1, ctx))/primme->numProcs; -// CHKERR(globalSum_Rprimme(resNorms, primme->numEvals, ctx)); -// } /* End residual computation */ - /* Ensure evecs are normal if sketching */ if(primme->projectionParams.projection == primme_proj_sketched) { for(j = 0; j < primme->numEvals; j++) normalize_evecs[j] = Num_dot_Sprimme(primme->nLocal, &evecs[j*ldevecs], 1, &evecs[j*ldevecs], 1, ctx); @@ -538,6 +451,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, for(j = 0; j < primme->numEvals; j++) CHKERR(Num_scal_Sprimme(primme->nLocal, 1/sqrt(normalize_evecs[j]), &evecs[j*ldevecs], 1, ctx)); } + resid_timer = primme_wTimer(); /* Compute residual vectors */ CHKERR(matrixMatvec_Sprimme(evecs, primme->nLocal, ldevecs, AVhVecs, ldAVhVecs, 0, primme->numEvals, ctx)); /* AVhVecs = A*V*hVecs */ CHKERR(Num_compute_residuals_Sprimme(primme->nLocal, primme->numEvals, evals, evecs, ldevecs, AVhVecs, ldAVhVecs, rwork, ldrwork, ctx)); @@ -545,6 +459,7 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Compute residual norms */ for(j = 0; j < primme->numEvals; j++) resNorms[j] = sqrt(Num_dot_Sprimme(ldrwork, &rwork[j*ldrwork], 1, &rwork[j*ldrwork], 1, ctx))/primme->numProcs; CHKERR(globalSum_Rprimme(resNorms, primme->numEvals, ctx)); + primme->stats.timeResiduals += primme_wTimer() - resid_timer; /* Check the convergence of the Ritz vectors */ CHKERR(check_convergence_Sprimme(evecs, ldevecs, 1 /* given X */, NULL, 0, 0 /* not given R */, NULL, 0, 0, NULL, 0, NULL, 0, 0, primme->numEvals, flags, resNorms, hVals, &reset, -1, ctx)); @@ -574,12 +489,11 @@ int lanczos_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(Num_free_iprimme(global_start, ctx)); CHKERR(Num_free_iprimme(eval_perm, ctx)); - //if(fullOrtho) CHKERR(Num_free_SHprimme(H_fullOrtho, ctx)); - if(primme->projectionParams.projection == primme_proj_sketched) { CHKERR(Num_free_Sprimme(S_vals, ctx)); CHKERR(Num_free_Sprimme(SV, ctx)); + CHKERR(Num_free_Sprimme(T, ctx)); CHKERR(Num_free_Sprimme(SW, ctx)); CHKERR(Num_free_Rprimme(normalize_evecs, ctx)); CHKERR(Num_free_Sprimme(V_temp, ctx)); diff --git a/src/eigs/main_iter.c b/src/eigs/main_iter.c index 33318510..101949ea 100644 --- a/src/eigs/main_iter.c +++ b/src/eigs/main_iter.c @@ -117,10 +117,11 @@ STATIC void displayModel(primme_CostModel *model); #endif TEMPLATE_PLEASE -int print_timings_Sprimme(primme_context ctx) { +int print_timings_Sprimme(PRIMME_INT basisSize, primme_context ctx) { primme_params *primme = ctx.primme; + printf("Basis size: %-" PRIMME_INT_P "\n", basisSize); printf("Iterations: %-" PRIMME_INT_P "\n", primme->stats.numOuterIterations); printf("Restarts : %-" PRIMME_INT_P "\n", primme->stats.numRestarts); printf("Matvecs : %-" PRIMME_INT_P "\n", primme->stats.numMatvecs); @@ -132,7 +133,9 @@ int print_timings_Sprimme(primme_context ctx) { printf("GlobalSum Time : %-22.10E\n", primme->stats.timeGlobalSum); printf("Broadcast Time : %-22.10E\n", primme->stats.timeBroadcast); printf("SketchedMatvec Time : %-22.10E\n", primme->stats.timeSketchMatvec); - printf("Sketching Time : %-22.10E\n", primme->stats.timeSketching); + printf("Rayleigh-Ritz Time : %-22.10E\n", primme->stats.timeRR); + printf("Stabilization Time : %-22.10E\n", primme->stats.timeStabilization); + printf("Residual Time : %-22.10E\n", primme->stats.timeResiduals); return 0; } @@ -206,6 +209,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme_params *primme = ctx.primme; double elapsed_time = primme_wTimer(); + double solve_timer; primme->stats.elapsedTime = 0.0; // XXX: Added this for debugging purposes - Heather /* primme parameters */ @@ -406,8 +410,9 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme->stats.timeOrtho = 0.0; primme->stats.timeGlobalSum = 0.0; primme->stats.timeBroadcast = 0.0; - primme->stats.timeSketching = 0.0; + primme->stats.timeRR = 0.0; primme->stats.timeSketchMatvec = 0.0; + primme->stats.timeStabilization = 0.0; primme->stats.timeResiduals = 0.0; primme->stats.timeDense = 0.0; primme->stats.estimateMinEVal = HUGE_VAL; @@ -506,6 +511,8 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, basisSize, 0 /*unsymmetric*/, ctx)); } + + solve_timer = primme_wTimer(); // XXX: Debugging CHKERR(solve_H_SHprimme(H, basisSize, primme->maxBasisSize, VtBV ? &VtBV[(primme->numOrthoConst + numLocked) * ldVtBV + @@ -514,6 +521,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, ldVtBV, R, primme->maxBasisSize, QtV, primme->maxBasisSize, QtQ, ldQtQ, hU, basisSize, hVecs, basisSize, hVals, hSVals, numConverged, ctx)); + primme->stats.timeRR += primme_wTimer() - solve_timer; numArbitraryVecs = 0; maxRecentlyConverged = availableBlockSize = blockSize = 0; @@ -552,7 +560,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, // XXX: FOR TESTING if(primme->procID == 0 && primme->stats.numOuterIterations % 100 == 0){ primme->stats.elapsedTime = primme_wTimer() - elapsed_time; - CHKERR(print_timings_Sprimme(ctx)); + CHKERR(print_timings_Sprimme(basisSize, ctx)); } /* When QR are computed and there are more than one target shift, */ @@ -874,6 +882,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, basisSize += blockSize; blockSize = 0; + solve_timer = primme_wTimer(); CHKERR(solve_H_SHprimme(H, basisSize, primme->maxBasisSize, VtBV ? &VtBV[(primme->numOrthoConst + numLocked) * ldVtBV + @@ -882,6 +891,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, ldVtBV, R, primme->maxBasisSize, QtV, primme->maxBasisSize, QtQ, ldQtQ, hU, basisSize, hVecs, basisSize, hVals, hSVals, numConverged, ctx)); + primme->stats.timeRR += primme_wTimer() - solve_timer; numArbitraryVecs = 0; candidates_prepared = 0; @@ -1193,6 +1203,8 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme->maxBasisSize, primme->nLocal, basisSize, numNew, 0/*unsymmetric*/, ctx)); basisSize += numNew; + + solve_timer = primme_wTimer(); CHKERR(solve_H_SHprimme(H, basisSize, primme->maxBasisSize, VtBV ? &VtBV[(primme->numOrthoConst + numLocked) * ldVtBV + @@ -1201,6 +1213,7 @@ int main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, ldVtBV, R, primme->maxBasisSize, QtV, primme->maxBasisSize, QtQ, ldQtQ, hU, basisSize, hVecs, basisSize, hVals, hSVals, numConverged, ctx)); + primme->stats.timeRR += primme_wTimer() - solve_timer; } primme->stats.numRestarts++; @@ -1581,13 +1594,14 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Variables for sketching */ SCALAR *SV; /* Sketched Basis vectors */ + SCALAR *T; /* The "R" factor in the QR decomposition of SV */ SCALAR *SW; /* Projected sketched Basis vectors */ SCALAR *V_temp; /* Work array for sketching */ SCALAR *S_vals; /* Holds the nonzero entries of the sketching matrix */ REAL *normalize_evecs; /* For normalizing the sketched Ritz vectors */ PRIMME_INT *S_rows; /* Holds the row numbers of the nonzero entries */ PRIMME_INT nnzPerCol; /* NNZ per column in the sketching matrix */ - PRIMME_INT ldSV, ldSW; /* Leading dimensions of SV and SW */ + PRIMME_INT ldSV, ldT, ldSW; /* Leading dimensions of SV, T, and SW */ /* -------------------------------------------------------------- */ @@ -1641,12 +1655,14 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, /* Allocate space for the variables needed for sketching (if needed) */ ldSV = ldSW = primme->sketchingParams.sketchSize; + ldT = primme->maxBasisSize; nnzPerCol = primme->sketchingParams.nnzPerCol; S_rows = (PRIMME_INT*)malloc(nnzPerCol*primme->nLocal*sizeof(PRIMME_INT)); CHKERR(Num_malloc_Sprimme(primme->nLocal*nnzPerCol, &S_vals, ctx)); CHKERR(Num_malloc_Sprimme(ldSV*primme->maxBasisSize, &SV, ctx)); + CHKERR(Num_malloc_Sprimme(ldT*ldT, &T, ctx)); CHKERR(Num_malloc_Sprimme(ldSW*primme->maxBasisSize, &SW, ctx)); CHKERR(Num_malloc_Sprimme(ldV*primme->maxBasisSize, &V_temp, ctx)); @@ -1671,8 +1687,10 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, primme->stats.timeMatvec = 0.0; primme->stats.timePrecond = 0.0; primme->stats.timeOrtho = 0.0; - primme->stats.timeSketching = 0.0; + primme->stats.timeRR = 0.0; primme->stats.timeSketchMatvec = 0.0; + primme->stats.timeStabilization = 0.0; + primme->stats.timeResiduals = 0.0; primme->stats.timeGlobalSum = 0.0; primme->stats.timeBroadcast = 0.0; primme->stats.timeDense = 0.0; @@ -1737,10 +1755,10 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, targetShiftIndex = 0; - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, 0, basisSize, S_rows, S_vals, ctx)); - CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, 0, basisSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, 0, basisSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, NULL, 0, 0, basisSize, S_rows, S_vals, ctx)); - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); numArbitraryVecs = 0; maxRecentlyConverged = availableBlockSize = blockSize = 0; @@ -1780,7 +1798,7 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, // XXX: FOR TESTING if(primme->procID == 0 && primme->stats.numOuterIterations % 100 == 0){ primme->stats.elapsedTime = primme_wTimer() - elapsed_time; // XXX: Added this for debugging purposes - Heather - CHKERR(print_timings_Sprimme(ctx)); + CHKERR(print_timings_Sprimme(basisSize, ctx)); } /* When QR are computed and there are more than one target shift, */ @@ -1891,8 +1909,8 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, basisSize, blockSize, ctx)); /* Copy hVecs into prevhVecs */ - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, basisSize, blockSize, S_rows, S_vals, ctx)); - CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, basisSize, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, basisSize, blockSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, NULL, 0, basisSize, blockSize, S_rows, S_vals, ctx)); CHKERR(Num_copy_matrix_SHprimme(hVecs, basisSize, basisSize, basisSize, prevhVecs, primme->maxBasisSize, ctx)); @@ -1904,7 +1922,7 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, basisSize += blockSize; blockSize = 0; - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); numArbitraryVecs = 0; candidates_prepared = 0; @@ -2104,7 +2122,7 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, assert(ldV == ldW); /* this function assumes ldV == ldW */ - CHKERR(restart_sketched(V, ldV, W, ldW, SV, ldSV, SW, ldSW, hVecs, basisSize, hVals, min(basisSize, primme->minRestartSize), &basisSize, ctx)); + CHKERR(restart_sketched(V, ldV, W, ldW, SV, ldSV, T, ldT, SW, ldSW, hVecs, basisSize, hVals, min(basisSize, primme->minRestartSize), &basisSize, S_rows, S_vals, ctx)); restartsSinceReset++; @@ -2144,11 +2162,11 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(matrixMatvec_Sprimme(V, primme->nLocal, ldV, W, ldW, basisSize, numNew, ctx)); - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, basisSize, numNew, S_rows, S_vals, ctx)); - CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, basisSize, numNew, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, basisSize, numNew, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, NULL, 0, basisSize, numNew, S_rows, S_vals, ctx)); basisSize += numNew; - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); } primme->stats.numRestarts++; @@ -2188,9 +2206,9 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(ortho_Sprimme(V, ldV, NULL, 0, 0, basisSize-1, NULL, 0, 0, primme->nLocal, primme->iseed, ctx)); CHKERR(matrixMatvec_Sprimme(V, primme->nLocal, ldV, W, ldW, 0, basisSize, ctx)); - CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, 0, basisSize, S_rows, S_vals, ctx)); - CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, 0, basisSize, S_rows, S_vals, ctx)); - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, 0, basisSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, NULL, 0, 0, basisSize, S_rows, S_vals, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, basisSize, hVals, basisSize, ctx)); CHKERR(verify_norms(V, ldV, W, ldW, NULL, 0, hVals, restartLimitReached ? primme->numEvals : numConverged, resNorms, @@ -2308,6 +2326,7 @@ int sketched_main_iter_Sprimme(HEVAL *evals, SCALAR *evecs, PRIMME_INT ldevecs, CHKERR(Num_free_Rprimme(normalize_evecs, ctx)); CHKERR(Num_free_Sprimme(SV, ctx)); + CHKERR(Num_free_Sprimme(T, ctx)); CHKERR(Num_free_Sprimme(SW, ctx)); CHKERR(Num_free_Sprimme(S_vals, ctx)); CHKERR(Num_free_Sprimme(V_temp, ctx)); @@ -2515,6 +2534,7 @@ STATIC int prepare_candidates(SCALAR *V, PRIMME_INT ldV, SCALAR *W, blockNorms[*blockSize] = blockNorms[blki]; iev[*blockSize] = iev[blki]; if (computeXR) { + double resid_timer = primme_wTimer(); // XXX: For monitoring CHKERR(Num_copy_matrix_Sprimme(&X[blki * ldV], nLocal, 1, ldV, &X[(*blockSize) * ldV], ldV, ctx)); CHKERR(Num_copy_matrix_Sprimme(&R[blki * ldW], nLocal, 1, ldW, @@ -2523,6 +2543,7 @@ STATIC int prepare_candidates(SCALAR *V, PRIMME_INT ldV, SCALAR *W, CHKERR(Num_copy_matrix_Sprimme(&BX[blki * ldBV], nLocal, 1, ldBV, &BX[(*blockSize) * ldBV], ldBV, ctx)); } + primme->stats.timeResiduals += primme_wTimer() - resid_timer; } (*blockSize)++; } @@ -2565,6 +2586,7 @@ STATIC int prepare_candidates(SCALAR *V, PRIMME_INT ldV, SCALAR *W, /* blockNorms(basisSize:) = norms(R(basisSize:)) */ assert(ldV == ldW); /* This functions only works in this way */ + double resid_timer = primme_wTimer(); CHKERR(Num_update_VWXR_Sprimme(V, W, BV, nLocal, basisSize, ldV, hVecsBlock, basisSize, ldhVecs, hValsBlock, X?&X[(*blockSize)*ldV]:NULL, 0, computeXR?blockNormsSize:0, ldV, @@ -2580,6 +2602,7 @@ STATIC int prepare_candidates(SCALAR *V, PRIMME_INT ldV, SCALAR *W, NULL, 0, 0, XNorms?&XNorms[*blockSize]:NULL, 0, primme->massMatrixMatvec?blockNormsSize:0, ctx)); + primme->stats.timeResiduals += primme_wTimer() - resid_timer; /* Don't trust residual norm smaller than the error in the residual norm */ @@ -2668,8 +2691,10 @@ STATIC int copy_back_candidates(SCALAR *V, PRIMME_INT ldV, SCALAR *W, if (primme->projectionParams.projection != primme_proj_RR && primme->projectionParams.projection != primme_proj_sketched && primme->numTargetShifts > numConverged) { + double solve_timer = primme_wTimer(); CHKERR(solve_H_RR_SHprimme(H, ldH, VtBV, ldVtBV, hVecs, ldhVecs, hVals, basisSize, numConverged, ctx)); + primme->stats.timeRR += primme_wTimer() - solve_timer; } int i = 0; diff --git a/src/eigs/restart.c b/src/eigs/restart.c index e242bc0a..1797d526 100644 --- a/src/eigs/restart.c +++ b/src/eigs/restart.c @@ -1773,21 +1773,22 @@ STATIC int restart_RR(HSCALAR *H, int ldH, HSCALAR *VtBV, int ldVtBV, ******************************************************************************/ STATIC int restart_sketched(SCALAR *V, int ldV, SCALAR *W, int ldW, SCALAR *SV, - int ldSV, SCALAR *SW, int ldSW, HSCALAR *hVecs, int ldhVecs, HEVAL *hVals, int restartSize, - int *basisSize, primme_context ctx) { + int ldSV, SCALAR *T, int ldT, SCALAR *SW, int ldSW, HSCALAR *hVecs, + int ldhVecs, HEVAL *hVals, int restartSize, int *basisSize, + PRIMME_INT *S_rows, SCALAR *S_vals, primme_context ctx) { primme_params *primme = ctx.primme; int i; /* Loop variable */ SCALAR *V_temp; - SCALAR *SV_temp; + //SCALAR *SV_temp; SCALAR *VecNorms; SCALAR *hVecs_temp; int old_basisSize = *basisSize; CHKERR(Num_malloc_Sprimme(ldV*restartSize, &V_temp, ctx)); - CHKERR(Num_malloc_Sprimme(ldSV*restartSize, &SV_temp, ctx)); + //CHKERR(Num_malloc_Sprimme(ldSV*restartSize, &SV_temp, ctx)); CHKERR(Num_malloc_Sprimme(restartSize, &VecNorms, ctx)); CHKERR(Num_malloc_Sprimme(old_basisSize*restartSize, &hVecs_temp, ctx)); @@ -1819,14 +1820,14 @@ STATIC int restart_sketched(SCALAR *V, int ldV, SCALAR *W, int ldW, SCALAR *SV, CHKERR(Num_free_Sprimme(V_temp, ctx)); // SV = SV * hVecs - CHKERR(Num_gemm_SHprimme("N", "N", ldSV, restartSize, old_basisSize, 1.0, SV, ldSV, hVecs, ldhVecs, 0.0, SV_temp, ldSV, ctx)); - CHKERR(Num_copy_matrix_Sprimme(SV_temp, ldSV, restartSize, ldSV, SV, ldSV, ctx)); // Copy the temporary matrix back into SV + //CHKERR(Num_gemm_SHprimme("N", "N", ldSV, restartSize, old_basisSize, 1.0, SV, ldSV, hVecs, ldhVecs, 0.0, SV_temp, ldSV, ctx)); + //CHKERR(Num_copy_matrix_Sprimme(SV_temp, ldSV, restartSize, ldSV, SV, ldSV, ctx)); // Copy the temporary matrix back into SV // SW = SW * hVecs assert(ldSV == ldSW); - CHKERR(Num_gemm_SHprimme("N", "N", ldSV, restartSize, old_basisSize, 1.0, SW, ldSW, hVecs, ldhVecs, 0.0, SV_temp, ldSW, ctx)); - CHKERR(Num_copy_matrix_Sprimme(SV_temp, ldSV, restartSize, ldSW, SW, ldSW, ctx)); // Copy the temporary matrix back into SW - CHKERR(Num_free_Sprimme(SV_temp, ctx)); + //CHKERR(Num_gemm_SHprimme("N", "N", ldSV, restartSize, old_basisSize, 1.0, SW, ldSW, hVecs, ldhVecs, 0.0, SV_temp, ldSW, ctx)); + //CHKERR(Num_copy_matrix_Sprimme(SV_temp, ldSV, restartSize, ldSW, SW, ldSW, ctx)); // Copy the temporary matrix back into SW + //CHKERR(Num_free_Sprimme(SV_temp, ctx)); // Normalize the matrices for(i = 0; i < restartSize; i++) VecNorms[i] = sqrt(Num_dot_Sprimme(primme->nLocal, &V[i*ldV], 1, &V[i*ldV], 1, ctx)); @@ -1835,14 +1836,17 @@ STATIC int restart_sketched(SCALAR *V, int ldV, SCALAR *W, int ldW, SCALAR *SV, for (i = 0; i < restartSize; i++) { CHKERR(Num_scal_Sprimme(ldV, 1.0/VecNorms[i], &V[i*ldV], 1, ctx)); CHKERR(Num_scal_Sprimme(ldW, 1.0/VecNorms[i], &W[i*ldW], 1, ctx)); - CHKERR(Num_scal_Sprimme(ldSV, 1.0/VecNorms[i], &SV[i*ldSV], 1, ctx)); - CHKERR(Num_scal_Sprimme(ldSW, 1.0/VecNorms[i], &SW[i*ldSW], 1, ctx)); + //CHKERR(Num_scal_Sprimme(ldSV, 1.0/VecNorms[i], &SV[i*ldSV], 1, ctx)); + //CHKERR(Num_scal_Sprimme(ldSW, 1.0/VecNorms[i], &SW[i*ldSW], 1, ctx)); } CHKERR(Num_free_Sprimme(VecNorms, ctx)); - + + // Update eiganpairs and residuals - CHKERR(sketched_RR_Sprimme(SV, ldSV, SW, ldSW, hVecs, restartSize, hVals, restartSize, ctx)); + CHKERR(sketch_basis_Sprimme(V, ldV, SV, ldSV, T, ldT, 0, restartSize, S_rows, S_vals, ctx)); + CHKERR(sketch_basis_Sprimme(W, ldW, SW, ldSW, NULL, 0, 0, restartSize, S_rows, S_vals, ctx)); + CHKERR(sketched_RR_Sprimme(SV, ldSV, T, ldT, SW, ldSW, hVecs, restartSize, hVals, restartSize, ctx)); (*basisSize) = restartSize; diff --git a/src/eigs/sketch.c b/src/eigs/sketch.c index 0932b487..09a6246c 100644 --- a/src/eigs/sketch.c +++ b/src/eigs/sketch.c @@ -133,9 +133,13 @@ int build_sketch_Sprimme(PRIMME_INT *S_rows, SCALAR *S_vals, primme_context ctx) /****************************************************************************** * Subroutine sketch_basis - This routine multiplies the basis by the random * subspace embedding and returns the result in SV + * + * Input/Output + * T - The "R" factor in the QR decomposition of SV + * - If "T" != NULL, then SV will be the "Q" factor ******************************************************************************/ TEMPLATE_PLEASE -int sketch_basis_Sprimme(SCALAR *V, PRIMME_INT ldV, SCALAR *SV, PRIMME_INT ldSV, PRIMME_INT basisSize, PRIMME_INT blockSize, PRIMME_INT *S_rows, SCALAR *S_vals, primme_context ctx) { +int sketch_basis_Sprimme(SCALAR *V, PRIMME_INT ldV, SCALAR *SV, PRIMME_INT ldSV, SCALAR *T, PRIMME_INT ldT, PRIMME_INT basisSize, PRIMME_INT blockSize, PRIMME_INT *S_rows, SCALAR *S_vals, primme_context ctx) { primme_params *primme = ctx.primme; @@ -159,6 +163,8 @@ int sketch_basis_Sprimme(SCALAR *V, PRIMME_INT ldV, SCALAR *SV, PRIMME_INT ldSV, /* Find the sketched basis */ CHKERR(globalSum_Sprimme(&SV[basisSize*ldSV], blockSize*ldSV, ctx)); + if (T) CHKERR(ortho_Sprimme(SV, ldSV, T, ldT, basisSize, basisSize+blockSize-1, NULL, 0, 0, primme->maxBasisSize, primme->iseed, ctx)); + if (primme) primme->stats.timeSketchMatvec += primme_wTimer() - t0; return 0; @@ -175,6 +181,8 @@ int sketched_residuals_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *SW, PRIMME_I primme_params *primme = ctx.primme; + double resid_timer = primme_wTimer(); + SCALAR *SVhVecs, *SWhVecs; /* Temporary arrays used to compute residuals */ PRIMME_INT numEvals = min(primme->numEvals, basisSize); PRIMME_INT i; /* Loop variable */ @@ -194,6 +202,8 @@ int sketched_residuals_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *SW, PRIMME_I Num_free_Sprimme(SVhVecs, ctx); Num_free_Sprimme(SWhVecs, ctx); + + primme->stats.timeResiduals += primme_wTimer() - resid_timer; return 0; } @@ -233,7 +243,7 @@ return 0; ******************************************************************************/ TEMPLATE_PLEASE -int sketched_RR_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *SW, PRIMME_INT ldSW, HSCALAR *hVecs, PRIMME_INT ldhVecs, HREAL *hVals, PRIMME_INT basisSize, primme_context ctx) { +int sketched_RR_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *T, PRIMME_INT ldT, SCALAR *SW, PRIMME_INT ldSW, HSCALAR *hVecs, PRIMME_INT ldhVecs, HREAL *hVals, PRIMME_INT basisSize, primme_context ctx) { #ifdef USE_HERMITIAN primme_params *primme = ctx.primme; @@ -244,74 +254,106 @@ int sketched_RR_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *SW, PRIMME_INT ldSW if(primme->procID == 0) { - SCALAR *UtSW; /* Temporary matrix to compute left hand side of the generalized eigenvalue problem */ - SCALAR *UtSWV; /* Left hand side of the generalized eigenvalue problem */ - SCALAR *UVecs; /* Left singular vectors of the SVD */ - SCALAR *VVecst; /* Right singular vectors of the SVD (transposed) */ - SCALAR *ShVals; /* The "alpha" eigenvalues returned from LaPack's GGEV */ - SCALAR *hVals_b; /* The "beta" eigenvalues returned from LaPack's GGEV */ - SCALAR *trunc_hVecs; /* The stabilized eigenvectors of H */ - SCALAR *Sigma; /* Right hand side of the generalized eigenvalue problem */ - SCALAR *temp_SV; /* Temporarily store SV while performing LaPack's GESVD */ + SCALAR *hVals_a; /* The "alpha" eigenvalues returned from LaPack's GGEV */ + SCALAR *hVals_b; /* The "beta" eigenvalues returned from LaPack's GGEV */ + SCALAR normalize_hvecs; /* For normalizing the returned eigenvectors from LaPack's GGEV*/ + SCALAR *UtSW; /* For solving the generalized eigenvalue problem */ + REAL *sing_vals; /* Returned singular values from the SVD */ int *eval_perm; /* To sort the eigenpairs */ - PRIMME_INT ldVVecst, ldUVecs, ldUtSW, ldUtSWV, ldSigma, ldtrunc_hVecs; /* Leading dimension of matrices */ - PRIMME_INT trunc_basisSize; /* The basis size after stabilization */ - ldUVecs = ldSV; - ldVVecst = basisSize; + REAL cond_est; /* Estimation of the condition number of V - used to determine whether stabilization is needed */ + PRIMME_INT tbasisSize = basisSize; /* Truncated basis size (for Stabilization) */ + + CHKERR(Num_malloc_Sprimme(basisSize*basisSize, &UtSW, ctx)); + CHKERR(Num_malloc_Sprimme(basisSize, &hVals_a, ctx)); + CHKERR(Num_malloc_Sprimme(basisSize, &hVals_b, ctx)); - CHKERR(Num_malloc_Sprimme(ldUVecs*basisSize, &UVecs, ctx)); - CHKERR(Num_malloc_Sprimme(ldVVecst*basisSize, &VVecst, ctx)); - CHKERR(Num_malloc_Sprimme(ldSV*basisSize, &temp_SV, ctx)); CHKERR(Num_malloc_Rprimme(basisSize, &sing_vals, ctx)); + CHKERR(Num_malloc_iprimme(basisSize, &eval_perm, ctx)); - /* Take the SVD decomposition of SV */ - CHKERR(Num_copy_matrix_Sprimme(&SV[0], ldSV, basisSize, ldSV, &temp_SV[0], ldSV, ctx)); - CHKERR(Num_gesvd_Sprimme("S", "S", ldSV, basisSize, SV, ldSV, sing_vals, UVecs, ldUVecs, VVecst, ldVVecst, ctx)); - CHKERR(Num_copy_matrix_Sprimme(&temp_SV[0], ldSV, basisSize, ldSV, &SV[0], ldSV, ctx)); - CHKERR(Num_free_Sprimme(temp_SV, ctx)); + /* Check condition number to see if stabilization is needed */ + CHKERR(Num_gesvd_Sprimme("N", "N", basisSize, basisSize, T, ldT, sing_vals, NULL, basisSize, NULL, basisSize, ctx)); + cond_est = sing_vals[0]/sing_vals[basisSize-1]; + primme->stats.estimateLargestSVal = sing_vals[0]; + printf("Estimated Cond Num: %E\n", cond_est); - /* Truncate the basis for stabilization if needed */ - trunc_basisSize = basisSize; - for(i = basisSize-1; i > primme->numEvals; i--) - { - if(sing_vals[0]/sing_vals[i] >= 1/MACHINE_EPSILON) + /* XXX: Stabilization needed */ + //if(cond_est > 1/MACHINE_EPSILON) { + if(1) { + + double stab_time = primme_wTimer(); // For monitoring + + /* Truncate the basis for stabilization */ + for(i = basisSize-1; i > primme->numEvals; i--) { - trunc_basisSize--; - } else { - break; + if(sing_vals[0]/sing_vals[i] >= 1/MACHINE_EPSILON) + { + tbasisSize--; + } else { + break; + } } - } - primme->stats.estimateLargestSVal = REAL_PART(sing_vals[0]); - - /* Build each side of the generalized eigenvalue problem after stabilization */ - ldSigma = ldUtSW = ldUtSWV = ldtrunc_hVecs = trunc_basisSize; - CHKERR(Num_malloc_Sprimme(ldUtSW*basisSize, &UtSW, ctx)); - CHKERR(Num_malloc_Sprimme(ldUtSWV*trunc_basisSize, &UtSWV, ctx)); - CHKERR(Num_malloc_Sprimme(ldtrunc_hVecs*trunc_basisSize, &trunc_hVecs, ctx)); - CHKERR(Num_malloc_Sprimme(trunc_basisSize, &ShVals, ctx)); - CHKERR(Num_malloc_Sprimme(trunc_basisSize, &hVals_b, ctx)); - CHKERR(Num_malloc_Sprimme(ldSigma*trunc_basisSize, &Sigma, ctx)); - CHKERR(Num_malloc_iprimme(trunc_basisSize, &eval_perm, ctx)); - - /* Construct Mass Matrix (Diagonal matrix with singular values as entries) */ - CHKERR(Num_zero_matrix_Sprimme(Sigma, trunc_basisSize, trunc_basisSize, ldSigma, ctx)); - for(i = 0; i < trunc_basisSize; i++) Sigma[i*ldSigma+i] = sing_vals[i]; - - /* Left hand side of the Eigenvalue problem (U'(SW)Vt'), where Vt is the transpose of the right singular vectors */ - CHKERR(Num_gemm_Sprimme("C", "N", trunc_basisSize, basisSize, ldSV, 1.0, UVecs, ldUVecs, SW, ldSW, 0.0, UtSW, ldUtSW, ctx)); - CHKERR(Num_gemm_Sprimme("N", "C", trunc_basisSize, trunc_basisSize, basisSize, 1.0, UtSW, ldUtSW, VVecst, ldVVecst, 0.0, UtSWV, ldUtSWV, ctx)); + SCALAR *UtSWV; /* Left hand side of the generalized eigenvalue problem */ + SCALAR *UVecs; /* Left singular vectors of the SVD */ + SCALAR *VVecst; /* Right singular vectors of the SVD (transposed) */ + SCALAR *trunc_hVecs; /* The stabilized eigenvectors of H */ + SCALAR *Sigma; /* Right hand side of the generalized eigenvalue problem */ + SCALAR *temp_SV; /* Temporarily store SV while performing LaPack's GESVD */ + PRIMME_INT ldVVecst, ldUVecs, ldUtSW, ldUtSWV, ldSigma, ldtrunc_hVecs; /* Leading dimension of matrices */ + + ldUVecs = ldSV; + ldVVecst = basisSize; + ldSigma = ldUtSW = ldUtSWV = ldtrunc_hVecs = tbasisSize; + + CHKERR(Num_malloc_Sprimme(ldUVecs*basisSize, &UVecs, ctx)); + CHKERR(Num_malloc_Sprimme(ldVVecst*basisSize, &VVecst, ctx)); + CHKERR(Num_malloc_Sprimme(ldSV*basisSize, &temp_SV, ctx)); + CHKERR(Num_malloc_Sprimme(ldUtSWV*tbasisSize, &UtSWV, ctx)); + CHKERR(Num_malloc_Sprimme(ldtrunc_hVecs*tbasisSize, &trunc_hVecs, ctx)); + CHKERR(Num_malloc_Sprimme(ldSigma*tbasisSize, &Sigma, ctx)); + + /* Take the SVD decomposition of SV */ + CHKERR(Num_copy_matrix_Sprimme(&SV[0], ldSV, basisSize, ldSV, &temp_SV[0], ldSV, ctx)); + CHKERR(Num_gesvd_Sprimme("S", "S", ldSV, basisSize, temp_SV, ldSV, sing_vals, UVecs, ldUVecs, VVecst, ldVVecst, ctx)); + CHKERR(Num_free_Sprimme(temp_SV, ctx)); + + /* Construct Mass Matrix (Diagonal matrix with singular values as entries) */ + CHKERR(Num_zero_matrix_Sprimme(Sigma, tbasisSize, tbasisSize, ldSigma, ctx)); + for(i = 0; i < tbasisSize; i++) Sigma[i*ldSigma+i] = sing_vals[i]; + + /* Left hand side of the Eigenvalue problem (U'(SW)Vt'), where Vt is the transpose of the right singular vectors */ + CHKERR(Num_gemm_Sprimme("C", "N", tbasisSize, basisSize, ldSV, 1.0, UVecs, ldUVecs, SW, ldSW, 0.0, UtSW, ldUtSW, ctx)); + CHKERR(Num_gemm_Sprimme("N", "C", tbasisSize, tbasisSize, basisSize, 1.0, UtSW, ldUtSW, VVecst, ldVVecst, 0.0, UtSWV, ldUtSWV, ctx)); - /* Eigenvalue problem */ - CHKERR(Num_ggev_Sprimme("N", "V", trunc_basisSize, UtSWV, ldUtSWV, Sigma, ldSigma, ShVals, NULL, hVals_b, NULL, trunc_basisSize, trunc_hVecs, ldtrunc_hVecs, ctx)); - CHKERR(Num_gemm_Sprimme("C", "N", basisSize, trunc_basisSize, trunc_basisSize, 1.0, VVecst, ldVVecst, trunc_hVecs, ldtrunc_hVecs, 0.0, hVecs, ldhVecs, ctx)); + /* Eigenvalue problem */ + primme->stats.timeStabilization += primme_wTimer() - stab_time; // For monitoring + CHKERR(Num_ggev_Sprimme("N", "V", tbasisSize, UtSWV, ldUtSWV, Sigma, ldSigma, hVals_a, NULL, hVals_b, NULL, tbasisSize, trunc_hVecs, ldtrunc_hVecs, ctx)); + CHKERR(Num_gemm_Sprimme("C", "N", basisSize, tbasisSize, tbasisSize, 1.0, VVecst, ldVVecst, trunc_hVecs, ldtrunc_hVecs, 0.0, hVecs, ldhVecs, ctx)); - SCALAR normalize_hvecs; - for(i = 0; i < trunc_basisSize; i++) + CHKERR(Num_free_Sprimme(UVecs, ctx)); + CHKERR(Num_free_Sprimme(VVecst, ctx)); + CHKERR(Num_free_Sprimme(UtSWV, ctx)); + CHKERR(Num_free_Sprimme(trunc_hVecs, ctx)); + CHKERR(Num_free_Sprimme(Sigma, ctx)); + + /* XXX: Stabilization NOT needed */ + } else { + + /* Compute left-hand-side of the eigenproblem */ + CHKERR(Num_gemm_Sprimme("C", "N", basisSize, basisSize, ldSV, 1.0, SV, ldSV, SW, ldSW, 0.0, UtSW, basisSize, ctx)); + + /* Solve the eigenproblem */ + CHKERR(Num_ggev_Sprimme("N", "V", basisSize, UtSW, basisSize, T, ldT, hVals_a, NULL, hVals_b, NULL, basisSize, hVecs, ldhVecs, ctx)); + + } /* End eigenproblem (with or without stabilization) */ + + + for(i = 0; i < tbasisSize; i++) { - hVals[i] = REAL_PART(ShVals[i]/hVals_b[i]); + // Compute the returned hVals + hVals[i] = REAL_PART(hVals_a[i]/hVals_b[i]); eval_perm[i] = i; // Normalize the eigenvectors @@ -320,26 +362,22 @@ int sketched_RR_Sprimme(SCALAR *SV, PRIMME_INT ldSV, SCALAR *SW, PRIMME_INT ldSW } /* Sort the eigenpairs */ - for(i = 0; i < trunc_basisSize; i++) CHKERR(insertionSort_Rprimme(hVals[i], hVals, 0.0, NULL, 0, NULL, eval_perm, i, 0, ctx.primme)); + for(i = 0; i < tbasisSize; i++) CHKERR(insertionSort_Rprimme(hVals[i], hVals, 0.0, NULL, 0, NULL, eval_perm, i, 0, ctx.primme)); + CHKERR(permute_vecs_Sprimme(hVecs, basisSize, tbasisSize, ldhVecs, eval_perm, ctx)); - CHKERR(permute_vecs_Sprimme(hVecs, basisSize, trunc_basisSize, ldhVecs, eval_perm, ctx)); - - CHKERR(Num_free_Sprimme(UVecs, ctx)); - CHKERR(Num_free_Sprimme(VVecst, ctx)); + /* Free temporary storage */ CHKERR(Num_free_Sprimme(UtSW, ctx)); - CHKERR(Num_free_Sprimme(UtSWV, ctx)); - CHKERR(Num_free_Sprimme(trunc_hVecs, ctx)); - CHKERR(Num_free_Sprimme(ShVals, ctx)); + CHKERR(Num_free_Rprimme(sing_vals, ctx)); + CHKERR(Num_free_Sprimme(hVals_a, ctx)); CHKERR(Num_free_Sprimme(hVals_b, ctx)); - CHKERR(Num_free_Sprimme(Sigma, ctx)); CHKERR(Num_free_iprimme(eval_perm, ctx)); - CHKERR(Num_free_Rprimme(sing_vals, ctx)); - } + + } /* End if procID == 0 */ CHKERR(broadcast_SHprimme(hVecs, basisSize*ldhVecs, ctx)); CHKERR(broadcast_RHprimme(hVals, basisSize, ctx)); - if (primme) primme->stats.timeSketching += primme_wTimer() - t0; + if (primme) primme->stats.timeRR += primme_wTimer() - t0; return 0; #else