MGTwoLevelTransfer: refactor

dealii · Oct 19, 2023 · a5835fc · a5835fc
1 parent b1b25ec
commit a5835fc
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 42 deletions.
diff --git a/include/deal.II/matrix_free/constraint_info.h b/include/deal.II/matrix_free/constraint_info.h
@@ -176,6 +176,11 @@ namespace internal
 
       inline const typename Number::value_type *
       constraint_pool_end(const unsigned int row) const;
+
+      // TODO
+      std::vector<types::global_dof_index> local_dof_indices;
+      std::vector<types::global_dof_index> local_dof_indices_lex;
+      std::vector<ConstraintKinds>         mask;
     };
 
 
@@ -318,10 +323,8 @@ namespace internal
       const dealii::AffineConstraints<typename Number::value_type> &constraints,
       const std::shared_ptr<const Utilities::MPI::Partitioner>     &partitioner)
     {
-      std::vector<types::global_dof_index> local_dof_indices(
-        cell->get_fe().n_dofs_per_cell());
-      std::vector<types::global_dof_index> local_dof_indices_lex(
-        cell->get_fe().n_dofs_per_cell());
+      local_dof_indices.resize(cell->get_fe().n_dofs_per_cell());
+      local_dof_indices_lex.resize(cell->get_fe().n_dofs_per_cell());
 
       if (mg_level == numbers::invalid_unsigned_int)
         cell->get_dof_indices(local_dof_indices);
@@ -375,7 +378,7 @@ namespace internal
           AssertIndexRange(cell_no, this->hanging_node_constraint_masks.size());
           AssertIndexRange(cell_no, this->active_fe_indices.size());
 
-          std::vector<ConstraintKinds> mask(cell->get_fe().n_components());
+          mask.resize(cell->get_fe().n_components());
           hanging_nodes->setup_constraints(
             cell, {}, lexicographic_numbering, local_dof_indices_lex, mask);
 

diff --git a/include/deal.II/multigrid/mg_transfer_global_coarsening.h b/include/deal.II/multigrid/mg_transfer_global_coarsening.h
@@ -103,6 +103,8 @@ namespace mg
   };
 } // namespace mg
 
+
+
 /**
  * Global coarsening utility functions.
  */
@@ -111,7 +113,7 @@ namespace MGTransferGlobalCoarseningTools
   /**
    * Common polynomial coarsening sequences.
    *
-   * @note These polynomial coarsening sequences up to a degree of 9 are
+   * @note These polynomial coarsening sequences up to a degree of 6 are
    *   precompiled in MGTwoLevelTransfer. See also:
    *   MGTwoLevelTransfer::fast_polynomial_transfer_supported()
    */
@@ -438,7 +440,7 @@ class MGTwoLevelTransferBase<LinearAlgebra::distributed::Vector<Number>>
 /**
  * Class for transfer between two multigrid levels for p- or global coarsening.
  *
- * The implementation of this class is explained in detail in @cite munch2022gc.
+ * @note For more details, see the template specializations.
  */
 template <int dim, typename VectorType>
 class MGTwoLevelTransfer : public MGTwoLevelTransferBase<VectorType>
@@ -487,7 +489,8 @@ class MGTwoLevelTransfer : public MGTwoLevelTransferBase<VectorType>
  * Class for transfer between two multigrid levels for p- or global coarsening.
  * Specialization for LinearAlgebra::distributed::Vector.
  *
- * The implementation of this class is explained in detail in @cite munch2022gc.
+ * The implementation of this class is explained in detail in @cite munch2022gc
+ * and in step-75.
  */
 template <int dim, typename Number>
 class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
@@ -498,8 +501,7 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
 public:
   /**
    * Set up global coarsening between the given DoFHandler objects (
-   * @p dof_handler_fine and @p dof_handler_coarse). The transfer
-   * can be only performed on active levels.
+   * @p dof_handler_fine and @p dof_handler_coarse).
    */
   void
   reinit_geometric_transfer(
@@ -519,7 +521,8 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
    * or on coarse-grid levels, i.e., levels without hanging nodes.
    *
    * @note The function polynomial_transfer_supported() can be used to
-   *   check if the given polynomial coarsening strategy is supported.
+   *   check whether fast evaluation of the given polynomial coarsening
+   *   strategy is supported.
    */
   void
   reinit_polynomial_transfer(
@@ -538,8 +541,8 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
    * underlying Triangulation objects polynomial or geometrical global
    * coarsening is performed.
    *
-   * @note While geometric transfer can be only performed on active levels
-   *   (`numbers::invalid_unsigned_int`), polynomial transfers can also be
+   * @note While geometric transfer can be performed on active levels
+   *   and any multigrid level, polynomial transfers can only be
    *   performed on coarse-grid levels, i.e., levels without hanging nodes.
    *
    * @note The function polynomial_transfer_supported() can be used to
@@ -561,7 +564,7 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
    *
    * @note Currently, the polynomial coarsening strategies: 1) go-to-one,
    *   2) bisect, and 3) decrease-by-one are precompiled with templates for
-   *   degrees up to 9.
+   *   degrees up to 6.
    */
   static bool
   fast_polynomial_transfer_supported(const unsigned int fe_degree_fine,
@@ -666,13 +669,6 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
      * 1d restriction matrix for tensor-product elements.
      */
     AlignedVector<VectorizedArrayType> restriction_matrix_1d;
-
-    /**
-     * ShapeInfo description of the coarse cell. Needed during the
-     * fast application of hanging-node constraints.
-     */
-    internal::MatrixFreeFunctions::ShapeInfo<VectorizedArrayType>
-      shape_info_coarse;
   };
 
   /**
@@ -800,11 +796,7 @@ class MGTwoLevelTransferNonNested<dim,
      */
     AdditionalData(const double       tolerance                = 1e-6,
                    const unsigned int rtree_level              = 0,
-                   const bool         enforce_all_points_found = true)
-      : tolerance(tolerance)
-      , rtree_level(rtree_level)
-      , enforce_all_points_found(enforce_all_points_found)
-    {}
+                   const bool         enforce_all_points_found = true);
 
     /**
      * Tolerance parameter. See the constructor of RemotePointEvaluation for
@@ -1011,7 +1003,8 @@ class MGTwoLevelTransferNonNested<dim,
  * for systems involving multiple components of one of these elements. Other
  * elements are currently not implemented.
  *
- * The implementation of this class is explained in detail in @cite munch2022gc.
+ * The implementation of this class is explained in detail in @cite munch2022gc
+ * and in step-75.
  */
 template <int dim, typename Number>
 class MGTransferMF : public dealii::MGLevelGlobalTransfer<
@@ -1031,7 +1024,7 @@ class MGTransferMF : public dealii::MGLevelGlobalTransfer<
   MGTransferMF();
 
   /**
-   * @name Global coarsening.
+   * @name Global coarsening
    */
   /** @{ */
 
@@ -1080,7 +1073,7 @@ class MGTransferMF : public dealii::MGLevelGlobalTransfer<
   /** @} */
 
   /**
-   * @name Local smoothing.
+   * @name Local smoothing
    */
   /** @{ */
 
@@ -1138,7 +1131,7 @@ class MGTransferMF : public dealii::MGLevelGlobalTransfer<
   /** @} */
 
   /**
-   * @name Transfer functions.
+   * @name Transfer functions
    */
   /** @{ */
 
@@ -1235,7 +1228,7 @@ class MGTransferMF : public dealii::MGLevelGlobalTransfer<
   /** @} */
 
   /**
-   * @name Utility functions.
+   * @name Utility functions
    */
   /** @{ */
 

diff --git a/include/deal.II/multigrid/mg_transfer_global_coarsening.templates.h b/include/deal.II/multigrid/mg_transfer_global_coarsening.templates.h
@@ -66,7 +66,16 @@ namespace
   class CellTransferFactory
   {
   public:
-    static const unsigned int max_degree = 9;
+    // Maximum degree to precompile. If no value is given by the user during
+    // compilation, we choose its value as FE_EVAL_FACTORY_DEGREE_MAX (default
+    // value 6).
+    static const unsigned int max_degree =
+#ifndef FE_EVAL_FACTORY_DEGREE_MAX
+      6
+#else
+      FE_EVAL_FACTORY_DEGREE_MAX
+#endif
+      ;
 
     CellTransferFactory(const unsigned int degree_fine,
                         const unsigned int degree_coarse)
@@ -827,7 +836,7 @@ namespace internal
               const auto cell_fine_raw =
                 dof_handler_fine.get_triangulation().create_cell_iterator(
                   cell_id);
-              return cell_fine_raw->as_dof_handler_iterator(dof_handler_fine)
+              cell_fine_raw->as_dof_handler_iterator(dof_handler_fine)
                 ->get_dof_indices(dof_indices);
             }
           else
@@ -1777,6 +1786,8 @@ namespace internal
       transfer.dof_handler_fine = &dof_handler_fine;
       transfer.mg_level_fine    = mg_level_fine;
 
+      auto temp_time = std::chrono::system_clock::now();
+
       std::unique_ptr<FineDoFHandlerViewBase<dim>> dof_handler_fine_view;
 
       if (internal::h_transfer_uses_first_child_policy(dof_handler_fine,
@@ -1853,6 +1864,13 @@ namespace internal
 
       const auto reference_cell = dof_handler_fine.get_fe(0).reference_cell();
 
+      const auto time0 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      temp_time = std::chrono::system_clock::now();
+
       // create partitioners and vectors for internal purposes
       {
         // ... for fine mesh
@@ -1875,6 +1893,14 @@ namespace internal
         }
       }
 
+
+      const auto time1 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      temp_time = std::chrono::system_clock::now();
+
       // helper function: to process the fine level cells; function @p fu_non_refined is
       // performed on cells that are not refined and @fu_refined is performed on
       // children of cells that are refined
@@ -1990,6 +2016,13 @@ namespace internal
           n_dof_indices_coarse[i + 1] += n_dof_indices_coarse[i];
         }
 
+      const auto time2 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      temp_time = std::chrono::system_clock::now();
+
       // indices
       {
         std::vector<types::global_dof_index> local_dof_indices(
@@ -2123,6 +2156,13 @@ namespace internal
         transfer.constraint_info_fine.finalize();
       }
 
+      const auto time3 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      temp_time = std::chrono::system_clock::now();
+
 
       // ------------- prolongation matrix (0) -> identity matrix --------------
 
@@ -2233,6 +2273,13 @@ namespace internal
           }
       }
 
+      const auto time4 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      temp_time = std::chrono::system_clock::now();
+
 
       // ------------------------------- weights -------------------------------
       if (transfer.fine_element_is_continuous)
@@ -2284,6 +2331,15 @@ namespace internal
           if (is_feq)
             compress_weights(transfer);
         }
+
+      const auto time5 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                           std::chrono::system_clock::now() - temp_time)
+                           .count() /
+                         1e9;
+
+      if (false)
+        std::cout << time0 << " " << time1 << " " << time2 << " " << time3
+                  << " " << time4 << " " << time5 << std::endl;
     }
 
 
@@ -5546,6 +5602,18 @@ MGTwoLevelTransferNonNested<dim, LinearAlgebra::distributed::Vector<Number>>::
 }
 
 
+
+template <int dim, typename Number>
+MGTwoLevelTransferNonNested<dim, LinearAlgebra::distributed::Vector<Number>>::
+  AdditionalData::AdditionalData(const double       tolerance,
+                                 const unsigned int rtree_level,
+                                 const bool         enforce_all_points_found)
+  : tolerance(tolerance)
+  , rtree_level(rtree_level)
+  , enforce_all_points_found(enforce_all_points_found)
+{}
+
+
 DEAL_II_NAMESPACE_CLOSE
 
 #endif
diff --git a/source/multigrid/mg_transfer_matrix_free.cc b/source/multigrid/mg_transfer_matrix_free.cc
@@ -101,6 +101,8 @@ MGTransferMatrixFree<dim, Number>::build(
   const std::vector<std::shared_ptr<const Utilities::MPI::Partitioner>>
     &external_partitioners)
 {
+  auto temp_time = std::chrono::system_clock::now();
+
   Assert(dof_handler.has_level_dofs(),
          ExcMessage(
            "The underlying DoFHandler object has not had its "
@@ -123,6 +125,15 @@ MGTransferMatrixFree<dim, Number>::build(
 
   this->fill_and_communicate_copy_indices(dof_handler);
 
+
+
+  const auto time0 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                       std::chrono::system_clock::now() - temp_time)
+                       .count() /
+                     1e9;
+
+  temp_time = std::chrono::system_clock::now();
+
   vector_partitioners.resize(0,
                              dof_handler.get_triangulation().n_global_levels() -
                                1);
@@ -159,6 +170,15 @@ MGTransferMatrixFree<dim, Number>::build(
     else
       this->ghosted_level_vector[level].reinit(vector_partitioners[level]);
 
+
+
+  const auto time1 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                       std::chrono::system_clock::now() - temp_time)
+                       .count() /
+                     1e9;
+
+  temp_time = std::chrono::system_clock::now();
+
   // unpack element info data
   fe_degree             = elem_info.fe_degree;
   element_is_continuous = elem_info.element_is_continuous;
@@ -195,6 +215,17 @@ MGTransferMatrixFree<dim, Number>::build(
         }
     }
 
+
+
+  const auto time2 = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                       std::chrono::system_clock::now() - temp_time)
+                       .count() /
+                     1e9;
+
+  temp_time = std::chrono::system_clock::now();
+
+  std::cout << time0 << " " << time1 << " " << time2 << std::endl;
+
   evaluation_data.resize(n_child_cell_dofs);
 }