Skip to content

Commit

Permalink
Code cleaning (pytorch#2320)
Browse files Browse the repository at this point in the history
Revert "Revert CMakeLists.txt as build fails with clang due to unused variables (pytorch#2315)", so more compiler warning are enabled.
fixing compiler warnings. Verified on both gcc & clang-14
  • Loading branch information
jjsjann123 committed Jan 12, 2023
1 parent cedd359 commit 1b6393c
Show file tree
Hide file tree
Showing 21 changed files with 84 additions and 123 deletions.
8 changes: 4 additions & 4 deletions third_party/nvfuser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ list(APPEND NVFUSER_SRCS
)

add_library(${NVFUSER_CODEGEN} SHARED ${NVFUSER_SRCS})
torch_compile_options(${NVFUSER_CODEGEN})

if(NOT USE_ROCM)
target_compile_options(${NVFUSER_CODEGEN} PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
Expand All @@ -158,7 +159,6 @@ else()
target_include_directories(${NVFUSER_CODEGEN} PRIVATE ${Caffe2_HIP_INCLUDE})
endif()
if(NOT MSVC)
target_compile_options(${NVFUSER_CODEGEN} PRIVATE -Wno-unused-variable)
target_compile_options(${NVFUSER_CODEGEN} PRIVATE -Werror)
endif()
target_include_directories(${NVFUSER_CODEGEN} PUBLIC
Expand Down Expand Up @@ -186,6 +186,7 @@ if(BUILD_PYTHON)
)

add_library(${NVFUSER} MODULE ${NVFUSER_PYTHON_SRCS})
torch_compile_options(${NVFUSER})
if(NOT USE_ROCM)
target_compile_options(${NVFUSER} PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
# NB: This must be target_compile_definitions, not target_compile_options,
Expand All @@ -212,7 +213,6 @@ if(BUILD_PYTHON)
# avoid using Python3_add_library, copied from functorch
set_target_properties(${NVFUSER} PROPERTIES PREFIX "" DEBUG_POSTFIX "")
if(NOT MSVC)
target_compile_options(${NVFUSER} PRIVATE -Wno-unused-variable)
target_compile_options(${NVFUSER} PRIVATE -Werror)
set_target_properties(${NVFUSER} PROPERTIES SUFFIX ".so")
else()
Expand Down Expand Up @@ -339,6 +339,7 @@ if(BUILD_TEST)
${TORCH_ROOT}/test/cpp/jit/test_utils.cpp
${JIT_TEST_SRCS}
${JIT_TEST_CU_SRCS})
torch_compile_options(${NVFUSER_TESTS})

target_compile_definitions(${NVFUSER_TESTS} PRIVATE USE_GTEST)
if(NOT USE_ROCM)
Expand All @@ -349,7 +350,6 @@ if(BUILD_TEST)
target_include_directories(${NVFUSER_TESTS} PRIVATE "${NVFUSER_ROOT}" "${TORCH_ROOT}/torch/csrc/api/include/")
target_link_libraries(${NVFUSER_TESTS} PRIVATE ${NVFUSER_CODEGEN} torch ${TORCHLIB_FLAVOR} gtest_main gmock_main)
if(NOT MSVC)
target_compile_options(${NVFUSER_TESTS} PRIVATE -Wno-unused-variable)
set_property(SOURCE ${JIT_TEST_SRCS} APPEND PROPERTY COMPILE_OPTIONS "-Werror")
endif()

Expand Down Expand Up @@ -388,12 +388,12 @@ if(BUILD_NVFUSER_BENCHMARK)
list(APPEND BENCHMARK_SRCS ${NVFUSER_ROOT}/benchmark/main.cpp)

add_executable(${NVFUSER_BENCHMARK} ${BENCHMARK_SRCS})
torch_compile_options(${NVFUSER_BENCHMARK})
install(TARGETS ${NVFUSER_BENCHMARK} DESTINATION bin)
target_link_libraries(${NVFUSER_BENCHMARK} PRIVATE torch_library benchmark ${NVFUSER_CODEGEN})
target_include_directories(${NVFUSER_BENCHMARK} PRIVATE ${NVFUSER_ROOT})
if(NOT MSVC)
target_compile_options_if_supported(nvfuser_bench -Werror)
target_compile_options_if_supported(nvfuser_bench -Wno-unused-variable)
target_compile_options_if_supported(nvfuser_bench -Wno-deprecated-copy)
endif()

Expand Down
2 changes: 2 additions & 0 deletions third_party/nvfuser/benchmark/matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ bool hasRequiredSmemSize(size_t required_size) {
// util to track support matmul operand layout.
using MatmulLayout = MmaOptions::MmaInputLayout;

C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-variable")
static constexpr std::array<MatmulLayout, 3> kAllSupportedLayout = {
MatmulLayout::TT,
MatmulLayout::NT,
MatmulLayout::TN};
C10_DIAGNOSTIC_POP()

// Generic interface to get matmul op with the given layout.
TensorView* matmul(TensorView* a, TensorView* b, MatmulLayout layout) {
Expand Down
5 changes: 0 additions & 5 deletions third_party/nvfuser/benchmark/reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@ static void setupReduction(Fusion* fusion, DataType dtype, int red_axis) {
}

fusion->addOutput(tv1_cast);

TensorView* output_of_reduction = nullptr;
if (is_fp16) {
output_of_reduction = tv1_cast;
}
}

static void NvFuserScheduler_Reduction(
Expand Down
2 changes: 0 additions & 2 deletions third_party/nvfuser/benchmark/timm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,6 @@ static void NvFuserScheduler_TIMM_vit_base_patch16_224_LN_BWD(
benchmark_state.range(2)};

at::manual_seed(0);
// auto bool_options = at::TensorOptions().dtype(at::kBool).device(at::kCUDA,
// 0);
auto fp16_options = at::TensorOptions().dtype(at::kHalf).device(at::kCUDA, 0);
auto fp32_options =
at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
Expand Down
6 changes: 4 additions & 2 deletions third_party/nvfuser/csrc/executor_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,8 @@ void validateAlignedVectorizeExtents(
" and ",
info.producer_tv->toString());

int64_t vectorized_merged_domain_extent = 1;
// TODO: Rewrite validation of the vectorized dimension
// int64_t vectorized_merged_domain_extent = 1;
for (auto id : info.contig_root_ids) {
auto extent_val = expr_eval.evaluate(id->extent());
TORCH_INTERNAL_ASSERT(
Expand All @@ -564,7 +565,8 @@ void validateAlignedVectorizeExtents(
" as the extent of a vectorized root domain, ",
id->toString(),
", is unknown.");
vectorized_merged_domain_extent *= extent_val->as<int64_t>();
// TODO: Rewrite validation of the vectorized dimension
// vectorized_merged_domain_extent *= extent_val->as<int64_t>();
}

// TODO: Rewrite validation of the vectorized dimension, we can't just used a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ TEST_F(NVFuserTest, KernelDb_Open_CUDA) {
fs::path test_cubin_file = test_db_path / "test1.cubin";
ASSERT_TRUE(Nvf::copy_to_text_file(test_cubin_file.string(), bad_text));
// Execute 1, 2, 3
auto& kernl_db =
Nvf::KernelDb::get(kernel_db_dir, kernel_db_file, true, false, true);
Nvf::KernelDb::get(kernel_db_dir, kernel_db_file, true, false, true);
// Check 1
ASSERT_TRUE(fs::is_regular_file(test_db_file));
// Check 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
std::unique_ptr<RecordFunctor> null_record(nullptr);

try {
auto bad_cache_entry_ptr = fc->queryChildren(null_record.get());
fc->queryChildren(null_record.get());
FAIL() << "Should trigger an assert when the record is looked up!";
} catch (...) {
SUCCEED();
Expand All @@ -57,7 +57,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
}

try {
auto id = fc->createChild(null_record.get());
fc->createChild(null_record.get());
FAIL() << "Should trigger an assert when the record is looked up!";
} catch (...) {
SUCCEED();
Expand Down Expand Up @@ -117,7 +117,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {

std::unique_ptr<RecordFunctor> end_record(new EndRecord());
try {
auto id = fc->createChild(end_record.get());
fc->createChild(end_record.get());
SUCCEED();
} catch (const std::exception& e) {
FAIL() << "An unexpected assert on Terminal Cache Entry creation!"
Expand All @@ -133,7 +133,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {
}

try {
auto no_cache_entry_ptr = fc->queryChildren(test_record.get());
fc->queryChildren(test_record.get());
FAIL() << "Expected an assert from a terminal entry!";
} catch (...) {
SUCCEED();
Expand Down Expand Up @@ -203,7 +203,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {

std::unique_ptr<RecordFunctor> end_record(new EndRecord());
try {
auto id = fc->createChild(end_record.get());
fc->createChild(end_record.get());
FAIL() << "Expected the cache to assert because it is full!";
} catch (...) {
SUCCEED();
Expand Down Expand Up @@ -244,7 +244,7 @@ TEST_F(NVFuserTest, PyFusionCache_CUDA) {

std::unique_ptr<RecordFunctor> end_record(new EndRecord());
try {
auto no_cache_entry_ptr = fc->queryChildren(end_record.get());
fc->queryChildren(end_record.get());
SUCCEED();
} catch (const std::exception& e) {
FAIL() << "An unexpected assert on cache lookup!" << e.what();
Expand Down
1 change: 0 additions & 1 deletion third_party/nvfuser/csrc/register_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ RegisterOperators size_eq_guard({
return;
}

// auto inp = inputs[0].toIntList();
TORCH_INTERNAL_ASSERT(
inputs[1].isIntList(), "reference needs to be of int list");
auto ref = inputs[1].toIntList();
Expand Down
5 changes: 2 additions & 3 deletions third_party/nvfuser/csrc/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,7 @@ const auto& getEnableOptions() {

} // namespace

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-function"
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function")
void debugPrint(const c10::TensorTypePtr& type) {
std::stringstream sizes_s;
if (auto sizes = type->symbolic_sizes().sizes()) {
Expand Down Expand Up @@ -234,7 +233,7 @@ void debugPrint(const c10::TensorTypePtr& type) {
std::cout << "no stride properties available" << std::endl;
}
}
#pragma clang diagnostic pop
C10_DIAGNOSTIC_POP()

bool is_zero_dim_tensor(const std::shared_ptr<c10::TensorType>& tensor_type) {
return tensor_type && tensor_type->dim().has_value() &&
Expand Down
13 changes: 7 additions & 6 deletions third_party/nvfuser/test/test_gpu1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6803,7 +6803,7 @@ TEST_F(NVFuserTest, FusionCacheBefore_CUDA) {
// Before: TV2 = TV1 * 3
// After: TV3 = TV1 * 3;
// TV2 = TV3;
TensorView* tv3 = tv2->cacheBefore();
tv2->cacheBefore();

constexpr int BSX = 32;
tv2->split(-1, BSX);
Expand Down Expand Up @@ -6841,7 +6841,7 @@ TEST_F(NVFuserTest, FusionCacheAfter_CUDA) {
// Before: TV1 = TV0 + 1
// After: TV3 = TV0;
// TV1 = TV3 + 1
TensorView* tv3 = tv0->cacheAfter();
tv0->cacheAfter();

constexpr int BSX = 32;
tv2->split(-1, BSX);
Expand Down Expand Up @@ -6885,7 +6885,7 @@ TEST_F(NVFuserTest, FusionCacheFork_CUDA) {
// Output: TV3, TV2

// cacheFork !!does not!! automatically apply ComputeAt to the cache
auto tv3 = tv1->cacheFork();
tv1->cacheFork();

constexpr int BSX = 32;
tv2->split(-1, BSX);
Expand Down Expand Up @@ -7516,7 +7516,8 @@ TEST_F(NVFuserTest, FusionMagicSchedulerLayerNormBackward_CUDA) {
for (const auto idx : c10::irange(kOuterNumDims)) {
outer_shape.push_back(shape[idx]);
}
for (const auto idx : c10::irange(kOuterNumDims, kM)) {
for (const auto i : c10::irange(kOuterNumDims, kM)) {
(void)i; // Suppress unused variable warning
outer_shape.push_back(1);
}

Expand Down Expand Up @@ -7604,7 +7605,8 @@ TEST_F(NVFuserTest, FusionMagicSchedulerRMSNormBackward_CUDA) {
for (const auto idx : c10::irange(kOuterNumDims)) {
outer_shape.push_back(shape[idx]);
}
for (const auto idx : c10::irange(kOuterNumDims, kM)) {
for (const auto i : c10::irange(kOuterNumDims, kM)) {
(void)i; // Suppress unused variable warning
outer_shape.push_back(1);
}

Expand Down Expand Up @@ -7884,7 +7886,6 @@ TEST_F(NVFuserTest, FusionMagicSchedulerInstanceNormalization_CUDA) {
FusionExecutorCache executor_cache(std::move(fusion));

auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
auto cg_outputs_full = {at_run_mean, at_run_var, cg_outputs[0]};

auto aten_outputs = at::instance_norm(
at_input,
Expand Down

0 comments on commit 1b6393c

Please sign in to comment.