Skip to content

Commit

Permalink
Minor fix for the debug interface of using PTX directly (pytorch#1917)
Browse files Browse the repository at this point in the history
  • Loading branch information
naoyam committed Aug 17, 2022
1 parent 6e8f953 commit ee8ef33
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions torch/csrc/jit/codegen/cuda/executor.cpp
Expand Up @@ -1098,7 +1098,8 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
void FusionExecutor::compileRtc(
const std::string& code,
const std::string& name,
bool structured) {
bool structured,
CompileOptions options) {
FUSER_PERF_SCOPE("ExecutorRunFusion::compileRtc");
std::string scode;
if (!structured) {
Expand All @@ -1107,7 +1108,7 @@ void FusionExecutor::compileRtc(
scode = code;
}
fusion_id_ = 1;
options_ = CompileOptions();
options_ = options;

std::tie(compiled_kernel_, last_compiler_log_) =
executor_utils::nvrtcCompile(scode, name, fusion_id_);
Expand Down
3 changes: 2 additions & 1 deletion torch/csrc/jit/codegen/cuda/executor.h
Expand Up @@ -141,7 +141,8 @@ class TORCH_CUDA_CU_API FusionExecutor : public NonCopyable {
void compileRtc(
const std::string& code,
const std::string& name,
bool structured = false);
bool structured = false,
CompileOptions options = CompileOptions());

//! Internal tests only. Runs the compiled CUDA kernel from compileRtc.
void runRtc(
Expand Down

0 comments on commit ee8ef33

Please sign in to comment.