From 68502efcdecc427889d1aff7639fe432062b4069 Mon Sep 17 00:00:00 2001 From: jeremylt Date: Fri, 9 Apr 2021 16:58:44 -0600 Subject: [PATCH] wip - bddc --- examples/petsc/Makefile | 7 +- examples/petsc/bddc.c | 578 ++++++++++++++++++++++++++ examples/petsc/bddc.h | 55 +++ examples/petsc/include/libceedsetup.h | 4 + examples/petsc/include/matops.h | 1 + examples/petsc/include/petscutils.h | 2 + examples/petsc/include/structs.h | 20 + examples/petsc/src/libceedsetup.c | 114 ++++- examples/petsc/src/matops.c | 43 ++ examples/petsc/src/petscutils.c | 14 + 10 files changed, 831 insertions(+), 7 deletions(-) create mode 100644 examples/petsc/bddc.c create mode 100644 examples/petsc/bddc.h diff --git a/examples/petsc/Makefile b/examples/petsc/Makefile index 8912b2f593..430b7a2509 100644 --- a/examples/petsc/Makefile +++ b/examples/petsc/Makefile @@ -46,6 +46,11 @@ area.o = $(area.c:%.c=$(OBJDIR)/%.o) area: $(area.o) | $(PETSc.pc) $(ceed.pc) $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ +bddc.c := bddc.c $(utils.c) +bddc.o = $(bddc.c:%.c=$(OBJDIR)/%.o) +bddc: $(bddc.o) | $(PETSc.pc) $(ceed.pc) + $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ + bps.c := bps.c $(utils.c) bps.o = $(bps.c:%.c=$(OBJDIR)/%.o) bps: $(bps.o) | $(PETSc.pc) $(ceed.pc) @@ -89,7 +94,7 @@ print: $(PETSc.pc) $(ceed.pc) @true clean: - $(RM) -r $(OBJDIR) *.vtu area bps bpsraw bpssphere multigrid + $(RM) -r $(OBJDIR) *.vtu area bddc bps bpsraw bpssphere multigrid $(PETSc.pc): $(if $(wildcard $@),,$(error \ diff --git a/examples/petsc/bddc.c b/examples/petsc/bddc.c new file mode 100644 index 0000000000..762b3b8198 --- /dev/null +++ b/examples/petsc/bddc.c @@ -0,0 +1,578 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +// libCEED + PETSc Example: CEED BPs 3-6 with BDDC +// +// This example demonstrates a simple usage of libCEED with PETSc to solve the +// CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. +// +// The code uses higher level communication protocols in DMPlex. +// +// Build with: +// +// make bddc [PETSC_DIR=] [CEED_DIR=] +// +// Sample runs: +// +// bddc -problem bp3 +// bddc -problem bp4 +// bddc -problem bp5 -ceed /cpu/self +// bddc -problem bp6 -ceed /gpu/cuda +// +//TESTARGS -ceed {ceed_resource} -test -problem bp3 -degree 3 + +/// @file +/// CEED BPs 1-6 BDDC example using PETSc +const char help[] = "Solve CEED BPs using BDDC with PETSc and DMPlex\n"; + +#include "bddc.h" + +// The BDDC example uses vectors in three spaces +// +// Fine mesh: Broken mesh: Vertex mesh: +// x----x----x x----x x----x x x x +// | | | | | | | +// | | | | | | | +// | | | | | | | +// | | | | | | | +// x----x----x x----x x----x x x x +// +// Vectors are organized as follows +// - *_Pi : vector on the vertex mesh +// - *_r : vector on the broken mesh, all points but vertices +// - *_Gamma : vector on the broken mesh, face/vertex/edge points +// - *_I : vector on the broken mesh, interior points +// - * : all other vectors are on the fine mesh + +int main(int argc, char **argv) { + PetscInt ierr; + MPI_Comm comm; + char filename[PETSC_MAX_PATH_LEN], + ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self"; + double my_rt_start, my_rt, rt_min, rt_max; + PetscInt degree = 3, q_extra, l_size, xl_size, g_size, l_Pi_size, + xl_Pi_size, g_Pi_size, dim = 3, m_elem[3] = {3, 3, 3}, num_comp_u = 1; + PetscScalar *r; + PetscScalar eps = 1.0; + PetscBool test_mode, benchmark_mode, read_mesh, write_solution; + PetscLogStage solve_stage; + DM dm, dm_Pi; + KSP ksp; + PC pc; + Mat mat_O, mat_Pi, mat_pr; + Vec X, X_loc, X_Pi, X_Pi_loc, rhs, rhs_loc, mult, mask_r, mask_Gamma, + mask_I; + PetscMemType mem_type; + UserO user_O, user_Pi; + UserBDDC user_bddc; + Ceed ceed; + CeedData ceed_data + CeedDataBDDC ceed_data_bddc; + CeedVector rhs_ceed, mult_ceed, target; + CeedQFunction qf_error, qf_restrict, qf_prolong; + CeedOperator op_error; + BPType bp_choice; + CoarsenType coarsen; + + ierr = PetscInitialize(&argc, &argv, NULL, help); + if (ierr) return ierr; + comm = PETSC_COMM_WORLD; + + // Parse command line options + ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); + bp_choice = CEED_BP3; + ierr = PetscOptionsEnum("-problem", + "CEED benchmark problem to solve", NULL, + bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, + NULL); CHKERRQ(ierr); + num_comp_u = bp_options[bp_choice].num_comp_u; + test_mode = PETSC_FALSE; + ierr = PetscOptionsBool("-test", + "Testing mode (do not print unless error is large)", + NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); + benchmark_mode = PETSC_FALSE; + ierr = PetscOptionsBool("-benchmark", + "Benchmarking mode (prints benchmark statistics)", + NULL, benchmark_mode, &benchmark_mode, NULL); + CHKERRQ(ierr); + write_solution = PETSC_FALSE; + ierr = PetscOptionsBool("-write_solution", + "Write solution for visualization", + NULL, write_solution, &write_solution, NULL); + CHKERRQ(ierr); + ierr = PetscOptionsScalar("-eps", + "Epsilon parameter for Kershaw mesh transformation", + NULL, eps, &eps, NULL); + if (eps > 1 || eps <= 0) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, + "-eps %D must be (0,1]", eps); + degree = test_mode ? 3 : 2; + ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", + NULL, degree, °ree, NULL); CHKERRQ(ierr); + if (degree < 2) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, + "-degree %D must be at least 2", degree); + q_extra = bp_options[bp_choice].q_extra; + ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points", + NULL, q_extra, &q_extra, NULL); CHKERRQ(ierr); + ierr = PetscOptionsString("-ceed", "CEED resource specifier", + NULL, ceed_resource, ceed_resource, + sizeof(ceed_resource), NULL); CHKERRQ(ierr); + coarsen = COARSEN_UNIFORM; + ierr = PetscOptionsEnum("-injection", + "Injection strategy to use", NULL, + coarsen_types, (PetscEnum)coarsen, + (PetscEnum *)&coarsen, NULL); CHKERRQ(ierr); + read_mesh = PETSC_FALSE; + ierr = PetscOptionsString("-mesh", "Read mesh from file", NULL, + filename, filename, sizeof(filename), &read_mesh); + CHKERRQ(ierr); + if (!read_mesh) { + PetscInt tmp = dim; + ierr = PetscOptionsIntArray("-cells","Number of cells per dimension", NULL, + m_elem, &tmp, NULL); CHKERRQ(ierr); + } + ierr = PetscOptionsEnd(); CHKERRQ(ierr); + + // Set up libCEED + CeedInit(ceed_resource, &ceed); + CeedMemType mem_type_backend; + CeedGetPreferredMemType(ceed, &mem_type_backend); + + // Setup DMs + if (read_mesh) { + ierr = DMPlexCreateFromFile(PETSC_COMM_WORLD, filename, PETSC_TRUE, &dm_orig); + CHKERRQ(ierr); + } else { + ierr = DMPlexCreateBoxMesh(PETSC_COMM_WORLD, dim, PETSC_FALSE, m_elem, NULL, + NULL, NULL, PETSC_TRUE, &dm_orig); CHKERRQ(ierr); + } + + { + DM dm_dist = NULL; + PetscPartitioner part; + + ierr = DMPlexGetPartitioner(dm_orig, &part); CHKERRQ(ierr); + ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr); + ierr = DMPlexDistribute(dm_orig, 0, NULL, &dm_dist); CHKERRQ(ierr); + if (dm_dist) { + ierr = DMDestroy(&dm_orig); CHKERRQ(ierr); + dm = dm_dist; + } + } + ierr = DMClone(dm, &dm_Pi); CHKERRQ(ierr); + + // Apply Kershaw mesh transformation + ierr = Kershaw(dm, eps); CHKERRQ(ierr); + + VecType vec_type; + switch (mem_type_backend) { + case CEED_MEM_HOST: vec_type = VECSTANDARD; break; + case CEED_MEM_DEVICE: { + const char *resolved; + CeedGetResource(ceed, &resolved); + if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA; + else if (strstr(resolved, "/gpu/hip/occa")) + vec_type = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 + else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP; + else vec_type = VECSTANDARD; + } + } + + // Setup DM + ierr = DMSetVecType(dm, vec_type); CHKERRQ(ierr); + ierr = DMSetFromOptions(dm); CHKERRQ(ierr); + ierr = SetupDMByDegree(dm, degree, num_comp_u, dim, + bp_options[bp_choice].enforce_bc, bp_options[bp_choice].bc_func); + CHKERRQ(ierr); + + // Set up subdomain vertex DM + ierr = DMClone(dm, &dm_Pi); CHKERRQ(ierr); + ierr = DMSetVecType(dm_Pi, vec_type); CHKERRQ(ierr); + ierr = SetupVertexDMFromDM(dm, dm_Pi, num_comp_u, + bp_options[bp_choice].enforce_bc, + bp_options[bp_choice].bc_func); + CHKERRQ(ierr); + + // Create vectors + ierr = DMCreateGlobalVector(dm, &X); CHKERRQ(ierr); + ierr = VecGetLocalSize(X, &l_size); CHKERRQ(ierr); + ierr = VecGetSize(X, &g_size); CHKERRQ(ierr); + ierr = DMCreateLocalVector(dm, &X_loc); CHKERRQ(ierr); + ierr = VecGetSize(X_loc &xl_size); CHKERRQ(ierr); + ierr = DMCreateGlobalVector(dm_Pi, &X); CHKERRQ(ierr); + ierr = VecGetLocalSize(X_Pi, &l_Pi_size); CHKERRQ(ierr); + ierr = VecGetSize(X_Pi, &g_Pi_size); CHKERRQ(ierr); + ierr = DMCreateLocalVector(dm_Pi, &X_Pi_loc); CHKERRQ(ierr); + ierr = VecGetSize(X_Pi_loc &xl_Pi_size); CHKERRQ(ierr); + + // Operator + ierr = PetscMalloc1(1, &user_O); CHKERRQ(ierr); + ierr = MatCreateShell(comm, l_size, l_size, g_size, g_size, + user_O, &mat_O); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_MULT, + (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, + (void(*)(void))MatGetDiag); CHKERRQ(ierr); + ierr = MatShellSetVecType(mat_O, vec_type); CHKERRQ(ierr); + + // Interface vertex operator + ierr = PetscMalloc1(1, &user_Pi); CHKERRQ(ierr); + ierr = MatCreateShell(comm, l_Pi_size, l_Pi_size, g_Pi_size, + g_Pi_size, user_Pi, &mat_Pi); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_MULT, + (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_Pi, MATOP_GET_DIAGONAL, + (void(*)(void))MatGetDiag); CHKERRQ(ierr); + ierr = MatShellSetVecType(mat_Pi, vec_type); CHKERRQ(ierr); + + // Injection operator + ierr = PetscMalloc1(1, &user_pr); CHKERRQ(ierr); + ierr = MatCreateShell(comm, l_size, l_Pi_size, g_size, g_Pi_size, + user_pr, &mat_pr); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_pr, MATOP_MULT, + (void(*)(void))MatMult_Inject); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_pr, MATOP_MULT_TRANSPOSE, + (void(*)(void))MatMult_Inject_T); CHKERRQ(ierr); + ierr = MatShellSetVecType(mat_pr, vec_type); CHKERRQ(ierr); + + // Print global grid information + if (!test_mode) { + PetscInt P = degree + 1, Q = P + q_extra; + + const char *used_resource; + CeedGetResource(ceed, &used_resource); + + ierr = VecGetType(X, &vec_type); CHKERRQ(ierr); + + ierr = PetscPrintf(comm, + "\n-- CEED Benchmark Problem %d -- libCEED + PETSc + BDDC --\n" + " PETSc:\n" + " PETSc Vec Type : %s\n" + " libCEED:\n" + " libCEED Backend : %s\n" + " libCEED Backend MemType : %s\n" + " Mesh:\n" + " Number of 1D Basis Nodes (p) : %d\n" + " Number of 1D Quadrature Points (q) : %d\n" + " Global Nodes : %D\n" + " Owned Nodes : %D\n" + " DoF per node : %D\n", + bp_choice+1, vec_type, used_resource, + CeedMemTypes[mem_type_backend], + P, Q, g_size/num_comp_u, l_size/num_comp_u, + num_comp_u); CHKERRQ(ierr); + } + + // Create RHS vector + ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); + ierr = VecDuplicate(X_loc, &rhs_loc); CHKERRQ(ierr); + ierr = VecZeroEntries(rhs_loc); CHKERRQ(ierr); + ierr = VecGetArrayAndMemType(rhs_loc, &r, &mem_type); CHKERRQ(ierr); + CeedVectorCreate(ceed, xl_size, &rhs_ceed); + CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r); + + // Set up libCEED operator + ierr = PetscMalloc1(1, &ceed_data); CHKERRQ(ierr); + ierr = SetupLibceedByDegree(dm, ceed, degree, dim, q_extra, + dim, num_comp_u, g_size, xl_size, bp_options[bp_choice], + ceed_data, true, rhs_ceed, &target); + CHKERRQ(ierr); + + // Set up libCEED operator on interface vertices + ierr = PetscMalloc1(1, &ceed_data_bddc); CHKERRQ(ierr); + ierr = SetupLibceedBDDC(dm, ceed_data, ceed_data_bddc, g_Pi_size, + xl_Pi_size, bp_options[bp_choice]); + CHKERRQ(ierr); + + // Gather RHS + CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL); + ierr = VecRestoreArrayAndMemType(rhs_loc, &r); CHKERRQ(ierr); + ierr = VecZeroEntries(rhs); CHKERRQ(ierr); + ierr = DMLocalToGlobal(dm, rhs_loc, ADD_VALUES, rhs); CHKERRQ(ierr); + CeedVectorDestroy(&rhs_ceed); + + // Create the injection/restriction QFunction + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_INTERP, + &qf_restrict); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_INTERP, CEED_EVAL_NONE, + &qf_prolong); + + // Create the error QFunction + CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].error, + bp_options[bp_choice].error_loc, &qf_error); + CeedQFunctionAddInput(qf_error, "u", num_comp_u, CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_error, "true_soln", num_comp_u, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_error, "error", num_comp_u, CEED_EVAL_NONE); + + // Create the error operator + CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, + &op_error); + CeedOperatorSetField(op_error, "u", ceed_data[fine_level]->elem_restr_u, + ceed_data[fine_level]->basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_error, "true_soln", + ceed_data[fine_level]->elem_restr_u_i, + CEED_BASIS_COLLOCATED, target); + CeedOperatorSetField(op_error, "error", ceed_data[fine_level]->elem_restr_u_i, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + + // PETSc pointwise mult vectors + // -- Calculate multiplicity + { + ierr = VecSet(X_loc, 1.0); CHKERRQ(ierr); + + // Local-to-global + ierr = VecZeroEntries(X); CHKERRQ(ierr); + ierr = DMLocalToGlobal(dm, X_loc, ADD_VALUES, X); + CHKERRQ(ierr); + ierr = VecZeroEntries(X_loc); CHKERRQ(ierr); + + // Global-to-local + ierr = DMGlobalToLocal(dm, X, INSERT_VALUES, X_loc); + CHKERRQ(ierr); + ierr = VecZeroEntries(X); CHKERRQ(ierr); + + // CEED vector + PetscScalar *x; + ierr = VecGetArrayAndMemType(X_loc, &x, &mem_type); CHKERRQ(ierr); + CeedInt len; + CeedVectorGetLength(ceed_data->x_ceed, &len);) + CeedVectorCreate(ceed_data->ceed, len, &mult_ceed); + CeedVectorSetArray(mult_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, x); + + // Multiplicity + CeedVector e_vec; + CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, NULL, &e_vec); + CeedVectorSetValue(e_vec, 0.0); + CeedElemRestrictionApply(ceed_data->elem_restr_u, CEED_NOTRANSPOSE, mult_ceed, + e_vec, CEED_REQUEST_IMMEDIATE); + CeedVectorSetValue(mult_ceed, 0.0); + CeedElemRestrictionApply(ceed_data->elem_restr_u, CEED_TRANSPOSE, e_vec, + mult_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorSyncArray(mult_ceed, MemTypeP2C(mem_type)); + CeedVectorDestroy(&e_vec); + + // Restore vector + ierr = VecRestoreArrayAndMemType(X_loc, &x); CHKERRQ(ierr); + + // Multiplicity scaling + ierr = VecReciprocal(X_loc); + + // Copy to Ceed vector + ierr = VecGetArrayAndMemType(X_loc, &x, &mem_type); CHKERRQ(ierr); + CeedVectorSetArray(mult_ceed, MemTypeP2C(mem_type), CEED_COPY_VALUES, x); + ierr = VecRestoreArrayAndMemType(X_loc, &x); CHKERRQ(ierr); + ierr = VecZeroEntries(X_loc); CHKERRQ(ierr); + } + + // -- Masks for subdomains + { + CeedInt length_r; + CeedVectorGetLength(ceed_data_bddc->x_r_ceed, &length_r);) + ierr = VecDuplicate(X_loc, &mask_r); CHKERRQ(ierr); + ierr = VecSetSizes(mask_r, length_r, PETSC_DECIDE); CHKERRQ(ierr); + ierr = VecDuplicate(mask_r, &mask_Gamma); CHKERRQ(ierr); + ierr = VecDuplicate(mask_r, &mask_I); CHKERRQ(ierr); + + // Set mask contents + CeedScalar *mask_r_array, *mask_Gamma_array, *mask_I_array; + CeedInt num_elem, elem_size; + CeedElemRestrictionGetNumElements(ceed_data_bddc->elem_restr_r, &num_elem); + CeedElemRestrictionGetElementSize(ceed_data_bddc->elem_restr_r, &elem_size); + ierr = VecGetArray(mask_r_ceed, &mask_r_array); + ierr = VecGetArray(mask_Gamma_ceed, &mask_Gamma_array); + ierr = VecGetArray(mask_I_ceed, &mask_I_array); + for (CeedInt e=0; estrides[0]*n + ceed_data_bddc->strides[1]*c + + ceed_data_bddc->strides[2]*e; + mask_r_array[index] = r ? 1.0 : 0.0; + mask_Gamma_array[index] = Gamma ? 1.0 : 0.0; + mask_I_array[index] = I ? 1.0 : 0.0; + } + } + } + ierr = VecRestoreArray(mask_r_ceed, &mask_r_array); CHKERRQ(ierr); + ierr = VecRestoreArray(mask_Gamma_ceed, &mask_Gamma_array); CHKERRQ(ierr); + ierr = VecRestoreArray(mask_I_ceed, &mask_I_array); CHKERRQ(ierr); + } + + // Set up MatShell user data + user_O->comm = comm; + user_O->dm = dm; + user_O->X_loc = X_loc; + ierr = VecDuplicate(X_loc, &user_O->Y_loc); CHKERRQ(ierr); + user_O->x_ceed = ceed_data->x_ceed; + user_O->y_ceed = ceed_data->y_ceed; + user_O->op = ceed_data->op_apply; + user_O->ceed = ceed; + +// TODO + // Set up PCShell user data +// TODO + + // Set up KSP + ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); + { + ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); + ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, + PETSC_DEFAULT); CHKERRQ(ierr); + } + ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); + ierr = KSPSetOperators(ksp, mat_O, mat_O); CHKERRQ(ierr); + + // Set up PCShell + ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); + { + ierr = PCSetType(pc, PCSHELL); CHKERRQ(ierr); + ierr = PCShellSetContext(pc, user_bddc); CHKERRQ(ierr); + ierr = PCShellSetApply(pc, (void(*)(void))PCShellApply_BDDC); CHKERRQ(ierr); +// TODO + //ierr = PCShellSetSetup(pc, ); CHKERRQ(ierr); +// TODO + } + + // First run, if benchmarking + if (benchmark_mode) { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); + CHKERRQ(ierr); + ierr = VecZeroEntries(X); CHKERRQ(ierr); + my_rt_start = MPI_Wtime(); + ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); + my_rt = MPI_Wtime() - my_rt_start; + ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); + CHKERRQ(ierr); + // Set maxits based on first iteration timing + if (my_rt > 0.02) { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); + CHKERRQ(ierr); + } else { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); + CHKERRQ(ierr); + } + } + + // Timed solve + ierr = VecZeroEntries(X); CHKERRQ(ierr); + ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); + + // -- Performance logging + ierr = PetscLogStageRegister("Solve Stage", &solve_stage); CHKERRQ(ierr); + ierr = PetscLogStagePush(solve_stage); CHKERRQ(ierr); + + // -- Solve + my_rt_start = MPI_Wtime(); + ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); + my_rt = MPI_Wtime() - my_rt_start; + + // -- Performance logging + ierr = PetscLogStagePop(); + + // Output results + { + KSPType ksp_type; + KSPConvergedReason reason; + PCType pc_type; + PetscReal rnorm; + PetscInt its; + ierr = KSPGetType(ksp, &ksp_type); CHKERRQ(ierr); + ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); + ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); + ierr = PCGetType(pc, &pc_type); CHKERRQ(ierr); + if (!test_mode || reason < 0 || rnorm > 1e-8) { + ierr = PetscPrintf(comm, + " KSP:\n" + " KSP Type : %s\n" + " KSP Convergence : %s\n" + " Total KSP Iterations : %D\n" + " Final rnorm : %e\n", + ksp_type, KSPConvergedReasons[reason], its, + (double)rnorm); CHKERRQ(ierr); + ierr = PetscPrintf(comm, + " BDDC:\n" + " PC Type : %s\n" + PCTypes[pc_type]); CHKERRQ(ierr); + } + if (!test_mode) { + ierr = PetscPrintf(comm," Performance:\n"); CHKERRQ(ierr); + } + { + PetscReal max_error; + ierr = ComputeErrorMax(user_O, op_error, X, target, + &max_error); CHKERRQ(ierr); + PetscReal tol = 5e-2; + if (!test_mode || max_error > tol) { + ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); + CHKERRQ(ierr); + ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); + CHKERRQ(ierr); + ierr = PetscPrintf(comm, + " Pointwise Error (max) : %e\n" + " CG Solve Time : %g (%g) sec\n", + (double)max_error, rt_max, rt_min); CHKERRQ(ierr); + } + } + if (benchmark_mode && (!test_mode)) { + ierr = PetscPrintf(comm, + " DoFs/Sec in CG : %g (%g) million\n", + 1e-6*g_size*its/rt_max, + 1e-6*g_size*its/rt_min); + CHKERRQ(ierr); + } + } + + if (write_solution) { + PetscViewer vtk_viewer_soln; + + ierr = PetscViewerCreate(comm, &vtk_viewer_soln); CHKERRQ(ierr); + ierr = PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK); CHKERRQ(ierr); + ierr = PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu"); CHKERRQ(ierr); + ierr = VecView(X, vtk_viewer_soln); CHKERRQ(ierr); + ierr = PetscViewerDestroy(&vtk_viewer_soln); CHKERRQ(ierr); + } + + // Cleanup + ierr = VecDestroy(&X); CHKERRQ(ierr); + ierr = VecDestroy(&X_loc); CHKERRQ(ierr); + ierr = VecDestroy(&mult); CHKERRQ(ierr); + ierr = VecDestroy(&user_O->Y_loc); CHKERRQ(ierr); + ierr = MatDestroy(&mat_O); CHKERRQ(ierr); + ierr = PetscFree(user_O); CHKERRQ(ierr); + ierr = MatDestroy(&mat_pr); CHKERRQ(ierr); + ierr = PetscFree(user_pr); CHKERRQ(ierr); + ierr = CeedDataDestroy(i, ceed_data); CHKERRQ(ierr); + ierr = CeedDataBDDCDestroy(i, ceed_data_bddc); CHKERRQ(ierr); + ierr = DMDestroy(&dm); CHKERRQ(ierr); + ierr = DMDestroy(&dm_Pi); CHKERRQ(ierr); + ierr = VecDestroy(&rhs); CHKERRQ(ierr); + ierr = VecDestroy(&rhs_loc); CHKERRQ(ierr); + ierr = KSPDestroy(&ksp); CHKERRQ(ierr); + CeedVectorDestroy(&target); + CeedQFunctionDestroy(&qf_error); + CeedQFunctionDestroy(&qf_restrict); + CeedQFunctionDestroy(&qf_prolong); + CeedOperatorDestroy(&op_error); + CeedDestroy(&ceed); + return PetscFinalize(); +} diff --git a/examples/petsc/bddc.h b/examples/petsc/bddc.h new file mode 100644 index 0000000000..5fab5c6606 --- /dev/null +++ b/examples/petsc/bddc.h @@ -0,0 +1,55 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef bddc_h +#define bddc_h + +#include "include/bpsproblemdata.h" +#include "include/petscmacros.h" +#include "include/petscutils.h" +#include "include/matops.h" +#include "include/structs.h" +#include "include/libceedsetup.h" + +#include +#include +#include +#include +#include +#include +#include + +#if PETSC_VERSION_LT(3,12,0) +#ifdef PETSC_HAVE_CUDA +#include +// Note: With PETSc prior to version 3.12.0, providing the source path to +// include 'cublas_v2.h' will be needed to use 'petsccuda.h'. +#endif +#endif + +// ----------------------------------------------------------------------------- +// Command Line Options +// ----------------------------------------------------------------------------- + +// Coarsening options +typedef enum { + INJECTION_SCALED = 0, INJECTION_HARMONIC = 1 +} InjectionType; +static const char *const injection_types [] = {"scaled", "harmonic", + "InjectionType", "INJECTION", 0 + }; + +#endif // bddc_h diff --git a/examples/petsc/include/libceedsetup.h b/examples/petsc/include/libceedsetup.h index 0f41310fff..16b702a064 100644 --- a/examples/petsc/include/libceedsetup.h +++ b/examples/petsc/include/libceedsetup.h @@ -17,5 +17,9 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, PetscErrorCode CeedLevelTransferSetup(Ceed ceed, CeedInt num_levels, CeedInt num_comp_u, CeedData *data, CeedInt *leveldegrees, CeedQFunction qf_restrict, CeedQFunction qf_prolong); +PetscErrorCode SetupLibceedBDDC(DM dm_vertex, CeedData data_fine, + CeedData data_vertex, + PetscInt g_vertex_size, PetscInt xl_vertex_size, + BPData bp_data); #endif // libceedsetup_h diff --git a/examples/petsc/include/matops.h b/examples/petsc/include/matops.h index fa416e730e..4215bfa74d 100644 --- a/examples/petsc/include/matops.h +++ b/examples/petsc/include/matops.h @@ -13,6 +13,7 @@ PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y); PetscErrorCode FormResidual_Ceed(SNES snes, Vec X, Vec Y, void *ctx); PetscErrorCode MatMult_Prolong(Mat A, Vec X, Vec Y); PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y); +PetscErrorCode PCShellApply_BDDC(Mat A, Vec X, Vec Y); PetscErrorCode ComputeErrorMax(UserO user, CeedOperator op_error, Vec X, CeedVector target, PetscReal *max_error); diff --git a/examples/petsc/include/petscutils.h b/examples/petsc/include/petscutils.h index a208febdf8..74933dfde5 100644 --- a/examples/petsc/include/petscutils.h +++ b/examples/petsc/include/petscutils.h @@ -18,6 +18,8 @@ typedef PetscErrorCode (*BCFunction)(PetscInt dim, PetscReal time, PetscErrorCode SetupDMByDegree(DM dm, PetscInt degree, PetscInt num_comp_u, PetscInt topo_dim, bool enforce_bc, BCFunction bc_func); +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt num_comp_u, + bool enforce_bc, BCFunction bc_func); PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt P, CeedInt topo_dim, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr); diff --git a/examples/petsc/include/structs.h b/examples/petsc/include/structs.h index 63f0abc7d9..e2dc010990 100644 --- a/examples/petsc/include/structs.h +++ b/examples/petsc/include/structs.h @@ -29,6 +29,16 @@ struct UserProlongRestr_ { CeedOperator op_prolong, op_restrict; Ceed ceed; }; +// Data for PETSc PCshell +typedef struct UserBDDC_ *UserBDDC; +struct UserBDDC_ { + MPI_Comm comm; + DM dm, dm_Pi; + Vec X_loc, Y_loc, diag; + CeedVector x_ceed, y_ceed; + CeedOperator op; + Ceed ceed; +}; // ----------------------------------------------------------------------------- // libCEED Data Structs @@ -45,6 +55,16 @@ struct CeedData_ { CeedVector q_data, x_ceed, y_ceed; }; +// libCEED data struct for BDDC +typedef struct CeedDataBDDC_ *CeedDataBDDC; +struct CeedDataBDDC_ { + CeedBasis basis_Pi; + CeedInt strides[3]; + CeedElemRestriction elem_restr_Pi, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv; + CeedVector x_Pi_ceed, y_Pi_ceed, x_r_ceed, y_r_ceed, mult_ceed; +}; + // BP specific data typedef struct { CeedInt num_comp_x, num_comp_u, topo_dim, q_data_size, q_extra; diff --git a/examples/petsc/src/libceedsetup.c b/examples/petsc/src/libceedsetup.c index acefefb4e7..3f1c3e708e 100644 --- a/examples/petsc/src/libceedsetup.c +++ b/examples/petsc/src/libceedsetup.c @@ -57,11 +57,9 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, P = degree + 1; Q = P + q_extra; CeedBasisCreateTensorH1Lagrange(ceed, topo_dim, num_comp_u, P, Q, - bp_data.q_mode, - &basis_u); + bp_data.q_mode, &basis_u); CeedBasisCreateTensorH1Lagrange(ceed, topo_dim, num_comp_x, 2, Q, - bp_data.q_mode, - &basis_x); + bp_data.q_mode, &basis_x); CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); // CEED restrictions @@ -155,8 +153,7 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedOperatorSetField(op_setup_rhs, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_setup_rhs, "q_data", elem_restr_qd_i, - CEED_BASIS_COLLOCATED, - q_data); + CEED_BASIS_COLLOCATED, q_data); CeedOperatorSetField(op_setup_rhs, "true_soln", elem_restr_u_i, CEED_BASIS_COLLOCATED, *target); CeedOperatorSetField(op_setup_rhs, "rhs", elem_restr_u, basis_u, @@ -254,4 +251,109 @@ PetscErrorCode CeedLevelTransferSetup(Ceed ceed, CeedInt num_levels, PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// Set up libCEED for BDDC interface vertices +// ----------------------------------------------------------------------------- +PetscErrorCode SetupLibceedBDDC(DM dm_vertex, CeedData data_fine, + CeedDataBDDC data_vertex, + PetscInt g_vertex_size, PetscInt xl_vertex_size, +BPData bp_data { + int ierr; + Ceed ceed = data_fine->ceed; + CeedBasis basis_Pi, basis_u = data_fine->basis_u; + CeedElemRestriction elem_restr_Pi, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv,; + CeedVector x_Pi_ceed, y_Pi_ceed, x_r_ceed, y_r_ceed, mask_r_ceed, mask_Gamma_ceed, mask_I_ceed; + CeedInt topo_dim, num_comp_u, P, Q, num_qpts, num_elem, elem_size, + q_data_size = bp_data.q_data_size; + + // CEED basis + CeedBasisGetDimension(basis_u, &topo_dim); + CeedBasisGetNumComponents(basis_u, &num_comp_u); + CeedBasisGetNumNodes1D(basis_u, &P); + elem_size = CeedIntPow(P, topo_dim); + CeedBasisGetNumQuadraturePoints1D(basis_u, &Q); + CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); + CeedScalar *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d; + interp_1d = calloc(2*Q * sizeof(CeedScalar)); + CeedScalar *temp; + CeedBasisGetInterp1D(basis_u, &temp); + memcpy(interp_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&interp_1d[1*Q], temp[(P-1)*Q], Q * sizeof(CeedScalar)); + grad_1d = calloc(2*Q * sizeof(CeedScalar)); + CeedBasisGetGrad1D(basis_u, &temp); + memcpy(grad_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&grad_1d[1*Q], temp[(P-1)*Q], Q * sizeof(CeedScalar)); + q_ref_1d = calloc(Q * sizeof(CeedScalar)); + CeedBasisGetQRef(basis_u, &temp); + memcpy(q_ref_1d, temp, Q * sizeof(CeedScalar)); + q_weight_1d = calloc(Q * sizeof(CeedScalar)); + CeedBasisGetQWeights(basis_u, &temp); + memcpy(q_weight_1d, temp, Q * sizeof(CeedScalar)); + CeedBasisCreateTensorH1(ceed, topo_dim, num_comp_u, 1, Q, + interp_1d, grad_1d, q_ref_1d, + q_weight_1d, &basis_Pi); + + // CEED restrictions + // -- Interface vertex restriction + ierr = CreateRestrictionFromPlex(ceed, dm_vertex, P, topo_dim, 0, 0, 0, &elem_restr_Pi); + CHKERRQ(ierr); + + // -- Subdomain restriction + ierr = DMPlexGetHeightStratum(dm_vertex, 0, &c_start, &c_end); CHKERRQ(ierr); + num_elem = c_end - c_start; + CeedInt strides = [num_comp_u, 1, num_comp_u*elem_size]; + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, num_comp_u, + num_comp_u *num_elem*elem_size, + strides, &elem_restr_r); + + // Create the persistent vectors that will be needed + CeedVectorCreate(ceed, xl_vertex_size, &x_Pi_ceed); + CeedVectorCreate(ceed, xl_vertex_size, &y_Pi_ceed); + CeedVectorCreate(ceed, num_comp_u *elem_size*num_elem, &x_r_ceed); + CeedVectorCreate(ceed, num_comp_u *elem_size*num_elem, &y_r_ceed); + + // Create the mass or diff operator + CeedQFunction qf_apply = data_fine->qf_apply; + // -- Interface nodes + CeedOperatorSetField(op_Pi_Pi, "u", elem_restr_Pi, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_Pi, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_Pi, "v", elem_restr_Pi, basis_u, CEED_VECTOR_ACTIVE); + // -- Interface vertices to subdomain + CeedOperatorSetField(op_r_Pi, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_Pi, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_Pi, "v", elem_restr_Pi, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomain to interface vertices + CeedOperatorSetField(op_Pi_r, "u", elem_restr_Pi, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_r, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_r, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomain to subdomain + CeedOperatorSetField(op_r_r, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_r, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_r, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomain FDM inverse + CeedOperatorCreateFDMElementInverse(op_r_r, &op_r_r_inv, CEED_REQUEST_IMMEDIATE); + + // Save libCEED data required for level + data_vertex->basis_Pi = basis_Pi; + data_vertex->elem_restr_Pi = elem_restr_Pi; + data_vertex->elem_restr_r = elem_restr_r; + data_vertex->op_Pi_r = op_Pi_r; + data_vertex->op_r_Pi = op_r_Pi; + data_vertex->op_Pi_Pi = op_Pi_Pi; + data_vertex->op_r_r = op_r_r; + data_vertex->op_r_r_inv = op_r_r_inv; + data_vertex->x_Pi_ceed = x_Pi_ceed; + data_vertex->y_Pi_ceed = y_Pi_ceed; + data_vertex->x_r_ceed = x_r_ceed; + data_vertex->y_r_ceed = y_r_ceed; + + PetscFunctionReturn(0); +}; + + // ----------------------------------------------------------------------------- diff --git a/examples/petsc/src/matops.c b/examples/petsc/src/matops.c index d71366295a..4b0761a23b 100644 --- a/examples/petsc/src/matops.c +++ b/examples/petsc/src/matops.c @@ -205,6 +205,49 @@ PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y) { PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// This function uses libCEED to compute the action of the BDDC preconditioner +// ----------------------------------------------------------------------------- +PetscErrorCode MatMult_Prolong(Mat A, Vec X, Vec Y) { + PetscErrorCode ierr; + UserBDDC user; + + PetscFunctionBeginUser; + + // Inject to broken space + // -- Scaled injection, point multiply by 1/multiplicity + // -- Harmonic injection, scaled with jump map + // ---- A_I,I^-1 + // ---- A_Gamma,I + // ---- J^T (jump map) + // ---- X_r -= J^T A_Gamma,I A_I,I^-1 X_r + // ---- X_Pi copy nodal values from X_r + + // K_u^-1 - update nodal values from subdomain + // -- A_r,r^-1 + // -- A_Pi,r + // -- X_Pi -= A_Pi_r A_r,r^-1 X_Pi + + // P - subdomain and Schur compliment solve + // -- X_r = A_r,r^-1 X_r + // -- X_Pi = S_Pi^-1 + + // K_u^-T - update subdomain values from nodal + // -- A_r,Pi + // -- A_r,r^-1 + // -- X_r -= A_r,r^-1 A_r,Pi X_Pi + + // Restrict to fine space + // -- Scaled restriction, point multiply by 1/multiplicity + // -- Harmonic injection, scaled with jump map + // ---- J^T (jump map) + // ---- A_I,Gamma + // ---- A_I,I^-1 + // ---- X -= A_I,I^-1 A_Gamma,I J^T X_r + + PetscFunctionReturn(0); +}; + // ----------------------------------------------------------------------------- // This function calculates the error in the final solution // ----------------------------------------------------------------------------- diff --git a/examples/petsc/src/petscutils.c b/examples/petsc/src/petscutils.c index 9e393efb27..658d4363a9 100644 --- a/examples/petsc/src/petscutils.c +++ b/examples/petsc/src/petscutils.c @@ -221,6 +221,20 @@ PetscErrorCode SetupDMByDegree(DM dm, PetscInt degree, PetscInt num_comp_u, PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// This function sets up a BDDC vertex only DM from an existing fine DM +// ----------------------------------------------------------------------------- +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt num_comp_u, + bool enforce_bc, BCFunction bc_func) { + PetscInt ierr, dim; + + PetscFunctionBeginUser; + ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr); + ierr = SetupDMByDegree(dm_vertex, 1, num_comp_u, dim, enforce_bc, bc_func); + CHKERRQ(ierr); + PetscFunctionReturn(0); +}; + // ----------------------------------------------------------------------------- // Utility function - essential BC dofs are encoded in closure indices as -(i+1) // -----------------------------------------------------------------------------