Code cleaning (pytorch#2320)

Revert "Revert CMakeLists.txt as build fails with clang due to unused variables (pytorch#2315)", so more compiler warning are enabled. fixing compiler warnings. Verified on both gcc & clang-14
jjsjann123 · Jan 12, 2023 · 1b6393c · 1b6393c
1 parent cedd359
commit 1b6393c
Show file tree

Hide file tree

Showing 21 changed files with 84 additions and 123 deletions.
diff --git a/third_party/nvfuser/CMakeLists.txt b/third_party/nvfuser/CMakeLists.txt
@@ -132,6 +132,7 @@ list(APPEND NVFUSER_SRCS
 )
 
 add_library(${NVFUSER_CODEGEN} SHARED ${NVFUSER_SRCS})
+torch_compile_options(${NVFUSER_CODEGEN})
 
 if(NOT USE_ROCM)
   target_compile_options(${NVFUSER_CODEGEN} PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
@@ -158,7 +159,6 @@ else()
   target_include_directories(${NVFUSER_CODEGEN} PRIVATE ${Caffe2_HIP_INCLUDE})
 endif()
 if(NOT MSVC)
-  target_compile_options(${NVFUSER_CODEGEN} PRIVATE -Wno-unused-variable)
   target_compile_options(${NVFUSER_CODEGEN} PRIVATE -Werror)
 endif()
 target_include_directories(${NVFUSER_CODEGEN} PUBLIC
@@ -186,6 +186,7 @@ if(BUILD_PYTHON)
   )
 
   add_library(${NVFUSER} MODULE ${NVFUSER_PYTHON_SRCS})
+  torch_compile_options(${NVFUSER})
   if(NOT USE_ROCM)
     target_compile_options(${NVFUSER} PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
     # NB: This must be target_compile_definitions, not target_compile_options,
@@ -212,7 +213,6 @@ if(BUILD_PYTHON)
   # avoid using Python3_add_library, copied from functorch
   set_target_properties(${NVFUSER} PROPERTIES PREFIX "" DEBUG_POSTFIX "")
   if(NOT MSVC)
-    target_compile_options(${NVFUSER} PRIVATE -Wno-unused-variable)
     target_compile_options(${NVFUSER} PRIVATE -Werror)
     set_target_properties(${NVFUSER} PROPERTIES SUFFIX ".so")
   else()
@@ -339,6 +339,7 @@ if(BUILD_TEST)
              ${TORCH_ROOT}/test/cpp/jit/test_utils.cpp
              ${JIT_TEST_SRCS}
              ${JIT_TEST_CU_SRCS})
+  torch_compile_options(${NVFUSER_TESTS})
 
   target_compile_definitions(${NVFUSER_TESTS} PRIVATE USE_GTEST)
   if(NOT USE_ROCM)
@@ -349,7 +350,6 @@ if(BUILD_TEST)
   target_include_directories(${NVFUSER_TESTS} PRIVATE "${NVFUSER_ROOT}" "${TORCH_ROOT}/torch/csrc/api/include/")
   target_link_libraries(${NVFUSER_TESTS} PRIVATE ${NVFUSER_CODEGEN} torch ${TORCHLIB_FLAVOR} gtest_main gmock_main)
   if(NOT MSVC)
-    target_compile_options(${NVFUSER_TESTS} PRIVATE -Wno-unused-variable)
     set_property(SOURCE ${JIT_TEST_SRCS} APPEND PROPERTY COMPILE_OPTIONS "-Werror")
   endif()
 
@@ -388,12 +388,12 @@ if(BUILD_NVFUSER_BENCHMARK)
   list(APPEND BENCHMARK_SRCS ${NVFUSER_ROOT}/benchmark/main.cpp)
 
   add_executable(${NVFUSER_BENCHMARK} ${BENCHMARK_SRCS})
+  torch_compile_options(${NVFUSER_BENCHMARK})
   install(TARGETS ${NVFUSER_BENCHMARK} DESTINATION bin)
   target_link_libraries(${NVFUSER_BENCHMARK} PRIVATE torch_library benchmark ${NVFUSER_CODEGEN})
   target_include_directories(${NVFUSER_BENCHMARK} PRIVATE ${NVFUSER_ROOT})
   if(NOT MSVC)
     target_compile_options_if_supported(nvfuser_bench -Werror)
-    target_compile_options_if_supported(nvfuser_bench -Wno-unused-variable)
     target_compile_options_if_supported(nvfuser_bench -Wno-deprecated-copy)
   endif()
 

diff --git a/third_party/nvfuser/benchmark/matmul.cpp b/third_party/nvfuser/benchmark/matmul.cpp
@@ -44,10 +44,12 @@ bool hasRequiredSmemSize(size_t required_size) {
 // util to track support matmul operand layout.
 using MatmulLayout = MmaOptions::MmaInputLayout;
 
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-variable")
 static constexpr std::array<MatmulLayout, 3> kAllSupportedLayout = {
     MatmulLayout::TT,
     MatmulLayout::NT,
     MatmulLayout::TN};
+C10_DIAGNOSTIC_POP()
 
 // Generic interface to get matmul op with the given layout.
 TensorView* matmul(TensorView* a, TensorView* b, MatmulLayout layout) {

diff --git a/third_party/nvfuser/benchmark/reduction.cpp b/third_party/nvfuser/benchmark/reduction.cpp
@@ -38,11 +38,6 @@ static void setupReduction(Fusion* fusion, DataType dtype, int red_axis) {
   }
 
   fusion->addOutput(tv1_cast);
-
-  TensorView* output_of_reduction = nullptr;
-  if (is_fp16) {
-    output_of_reduction = tv1_cast;
-  }
 }
 
 static void NvFuserScheduler_Reduction(

diff --git a/third_party/nvfuser/benchmark/timm.cpp b/third_party/nvfuser/benchmark/timm.cpp
@@ -623,8 +623,6 @@ static void NvFuserScheduler_TIMM_vit_base_patch16_224_LN_BWD(
       benchmark_state.range(2)};
 
   at::manual_seed(0);
-  // auto bool_options = at::TensorOptions().dtype(at::kBool).device(at::kCUDA,
-  // 0);
   auto fp16_options = at::TensorOptions().dtype(at::kHalf).device(at::kCUDA, 0);
   auto fp32_options =
       at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

diff --git a/third_party/nvfuser/csrc/executor_utils.cpp b/third_party/nvfuser/csrc/executor_utils.cpp
@@ -554,7 +554,8 @@ void validateAlignedVectorizeExtents(
       " and ",
       info.producer_tv->toString());
 
-  int64_t vectorized_merged_domain_extent = 1;
+  // TODO: Rewrite validation of the vectorized dimension
+  // int64_t vectorized_merged_domain_extent = 1;
   for (auto id : info.contig_root_ids) {
     auto extent_val = expr_eval.evaluate(id->extent());
     TORCH_INTERNAL_ASSERT(
@@ -564,7 +565,8 @@ void validateAlignedVectorizeExtents(
         " as the extent of a vectorized root domain, ",
         id->toString(),
         ", is unknown.");
-    vectorized_merged_domain_extent *= extent_val->as<int64_t>();
+    // TODO: Rewrite validation of the vectorized dimension
+    // vectorized_merged_domain_extent *= extent_val->as<int64_t>();
   }
 
   // TODO: Rewrite validation of the vectorized dimension, we can't just used a

diff --git a/third_party/nvfuser/csrc/kernel_db/test/test_nvfuser_kernel_db_open.cpp b/third_party/nvfuser/csrc/kernel_db/test/test_nvfuser_kernel_db_open.cpp
@@ -52,8 +52,7 @@ TEST_F(NVFuserTest, KernelDb_Open_CUDA) {
     fs::path test_cubin_file = test_db_path / "test1.cubin";
     ASSERT_TRUE(Nvf::copy_to_text_file(test_cubin_file.string(), bad_text));
     // Execute 1, 2, 3
-    auto& kernl_db =
-        Nvf::KernelDb::get(kernel_db_dir, kernel_db_file, true, false, true);
+    Nvf::KernelDb::get(kernel_db_dir, kernel_db_file, true, false, true);
     // Check 1
     ASSERT_TRUE(fs::is_regular_file(test_db_file));
     // Check 2

diff --git a/third_party/nvfuser/csrc/python_frontend/test/test_nvfuser_fusion_cache.cpp b/third_party/nvfuser/csrc/python_frontend/test/test_nvfuser_fusion_cache.cpp
@@ -36,7 +36,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
     std::unique_ptr<RecordFunctor> null_record(nullptr);
 
     try {
-      auto bad_cache_entry_ptr = fc->queryChildren(null_record.get());
+      fc->queryChildren(null_record.get());
       FAIL() << "Should trigger an assert when the record is looked up!";
     } catch (...) {
       SUCCEED();
@@ -57,7 +57,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
     }
 
     try {
-      auto id = fc->createChild(null_record.get());
+      fc->createChild(null_record.get());
       FAIL() << "Should trigger an assert when the record is looked up!";
     } catch (...) {
       SUCCEED();
@@ -117,7 +117,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
 
     std::unique_ptr<RecordFunctor> end_record(new EndRecord());
     try {
-      auto id = fc->createChild(end_record.get());
+      fc->createChild(end_record.get());
       SUCCEED();
     } catch (const std::exception& e) {
       FAIL() << "An unexpected assert on Terminal Cache Entry creation!"
@@ -133,7 +133,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
     }
 
     try {
-      auto no_cache_entry_ptr = fc->queryChildren(test_record.get());
+      fc->queryChildren(test_record.get());
       FAIL() << "Expected an assert from a terminal entry!";
     } catch (...) {
       SUCCEED();
@@ -203,7 +203,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
 
     std::unique_ptr<RecordFunctor> end_record(new EndRecord());
     try {
-      auto id = fc->createChild(end_record.get());
+      fc->createChild(end_record.get());
       FAIL() << "Expected the cache to assert because it is full!";
     } catch (...) {
       SUCCEED();
@@ -244,7 +244,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
 
     std::unique_ptr<RecordFunctor> end_record(new EndRecord());
     try {
-      auto no_cache_entry_ptr = fc->queryChildren(end_record.get());
+      fc->queryChildren(end_record.get());
       SUCCEED();
     } catch (const std::exception& e) {
       FAIL() << "An unexpected assert on cache lookup!" << e.what();

diff --git a/third_party/nvfuser/csrc/register_interface.cpp b/third_party/nvfuser/csrc/register_interface.cpp
@@ -216,7 +216,6 @@ RegisterOperators size_eq_guard({
               return;
             }
 
-            // auto inp = inputs[0].toIntList();
             TORCH_INTERNAL_ASSERT(
                 inputs[1].isIntList(), "reference needs to be of int list");
             auto ref = inputs[1].toIntList();

diff --git a/third_party/nvfuser/csrc/utils.cpp b/third_party/nvfuser/csrc/utils.cpp
@@ -186,8 +186,7 @@ const auto& getEnableOptions() {
 
 } // namespace
 
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function")
 void debugPrint(const c10::TensorTypePtr& type) {
   std::stringstream sizes_s;
   if (auto sizes = type->symbolic_sizes().sizes()) {
@@ -234,7 +233,7 @@ void debugPrint(const c10::TensorTypePtr& type) {
     std::cout << "no stride properties available" << std::endl;
   }
 }
-#pragma clang diagnostic pop
+C10_DIAGNOSTIC_POP()
 
 bool is_zero_dim_tensor(const std::shared_ptr<c10::TensorType>& tensor_type) {
   return tensor_type && tensor_type->dim().has_value() &&

diff --git a/third_party/nvfuser/test/test_gpu1.cpp b/third_party/nvfuser/test/test_gpu1.cpp
@@ -6803,7 +6803,7 @@ TEST_F(NVFuserTest, FusionCacheBefore_CUDA) {
   // Before: TV2 = TV1 * 3
   // After:  TV3 = TV1 * 3;
   //         TV2 = TV3;
-  TensorView* tv3 = tv2->cacheBefore();
+  tv2->cacheBefore();
 
   constexpr int BSX = 32;
   tv2->split(-1, BSX);
@@ -6841,7 +6841,7 @@ TEST_F(NVFuserTest, FusionCacheAfter_CUDA) {
   // Before: TV1 = TV0 + 1
   // After:  TV3 = TV0;
   //         TV1 = TV3 + 1
-  TensorView* tv3 = tv0->cacheAfter();
+  tv0->cacheAfter();
 
   constexpr int BSX = 32;
   tv2->split(-1, BSX);
@@ -6885,7 +6885,7 @@ TEST_F(NVFuserTest, FusionCacheFork_CUDA) {
   // Output:  TV3, TV2
 
   // cacheFork !!does not!! automatically apply ComputeAt to the cache
-  auto tv3 = tv1->cacheFork();
+  tv1->cacheFork();
 
   constexpr int BSX = 32;
   tv2->split(-1, BSX);
@@ -7516,7 +7516,8 @@ TEST_F(NVFuserTest, FusionMagicSchedulerLayerNormBackward_CUDA) {
   for (const auto idx : c10::irange(kOuterNumDims)) {
     outer_shape.push_back(shape[idx]);
   }
-  for (const auto idx : c10::irange(kOuterNumDims, kM)) {
+  for (const auto i : c10::irange(kOuterNumDims, kM)) {
+    (void)i; // Suppress unused variable warning
     outer_shape.push_back(1);
   }
 
@@ -7604,7 +7605,8 @@ TEST_F(NVFuserTest, FusionMagicSchedulerRMSNormBackward_CUDA) {
   for (const auto idx : c10::irange(kOuterNumDims)) {
     outer_shape.push_back(shape[idx]);
   }
-  for (const auto idx : c10::irange(kOuterNumDims, kM)) {
+  for (const auto i : c10::irange(kOuterNumDims, kM)) {
+    (void)i; // Suppress unused variable warning
     outer_shape.push_back(1);
   }
 
@@ -7884,7 +7886,6 @@ TEST_F(NVFuserTest, FusionMagicSchedulerInstanceNormalization_CUDA) {
   FusionExecutorCache executor_cache(std::move(fusion));
 
   auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
-  auto cg_outputs_full = {at_run_mean, at_run_var, cg_outputs[0]};
 
   auto aten_outputs = at::instance_norm(
       at_input,