Skip to content

Commit

Permalink
CHERRY-Pick: Inference: fix mask rcnn model diff, optim memory usage,…
Browse files Browse the repository at this point in the history
… memory leak. #18532 (#18547)

fix mask rcnn
add interface for setting optim_cache_dir(eg: when in trt int8 mode, and load model from memory, there should be a interface for setting the trt calibration table data dir)

test=release/1.5
  • Loading branch information
NHZlX committed Jul 8, 2019
1 parent 7c73a68 commit bc9fd1f
Show file tree
Hide file tree
Showing 19 changed files with 354 additions and 119 deletions.
12 changes: 12 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) {
return this;
}

PDNode *PDNode::assert_has_n_inputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->inputs.size() == n; });
return this;
}

PDNode *PDNode::assert_has_n_outputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->outputs.size() == n; });
return this;
}

PDNode *PDNode::assert_more(PDNode::teller_t &&teller) {
asserts_.emplace_back(std::move(teller));
return this;
Expand Down Expand Up @@ -1444,11 +1454,13 @@ PDNode *patterns::ConvAffineChannel::operator()(
auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
->AsInput()
->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Scale");
// AC Bias
auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
->AsInput()
->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Bias");

// AC output
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ struct PDNode {
const std::unordered_set<std::string>& op_types,
const std::string& argument, int nth);

PDNode* assert_has_n_inputs(size_t n);
PDNode* assert_has_n_outputs(size_t n);

template <typename T>
PDNode* assert_op_attr(const std::string& attr_name, const T& attr) {
asserts_.emplace_back([=](Node* x) {
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ struct Argument {

using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
using fusion_statis_t = std::unordered_map<std::string, int>;
using engine_opt_info_t = std::map<std::string, std::string>;
using anakin_max_shape_t = std::map<std::string, std::vector<int>>;

bool Has(const std::string& key) const { return valid_fields_.count(key); }
Expand Down Expand Up @@ -130,7 +129,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);

// The overall graph to work on.
DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
Expand Down
21 changes: 13 additions & 8 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument,

bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory();
bool int8_valid = !(model_from_memory && enable_int8);
std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid,
"TRT INT8 Now don't support model load from memory.");

if ((!model_from_memory && use_static_engine) || enable_int8) {
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");

if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else if (use_static_engine || enable_int8) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
Expand All @@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("use_static_engine", new bool(use_static_engine));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
}
if (pass_name == "ngraph_subgraph_pass") {
pass->Set("program",
Expand All @@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_gpu", new bool(argument->use_gpu()));
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
argument->anakin_max_input_shape()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator;
if (enable_int8 && calibration_data.size() != 0) {
calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data));
LOG(INFO) << "RUN Paddle TRT int8 calibration mode...";
}
// When in int8 mode and calibration_mode, the program just produce the
// calibration table data.
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/analysis/passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)

cc_library(analysis_passes SRCS passes.cc DEPS
ir_graph_build_pass
ir_analysis_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
inference_op_replace_pass
ir_graph_to_program_pass
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include <unordered_map>

namespace paddle {
namespace inference {
namespace analysis {

void InferenceOpReplacePass::RunImpl(Argument* argument) {
if (!argument->use_gpu()) return;
std::unordered_map<std::string, std::string> replaced_map{
{"conditional_block", "conditional_block_infer"},
};

auto& graph = argument->main_graph();
auto nodes = graph.Nodes();

for (auto& node : nodes) {
if (!node->IsOp()) continue;
auto* op_desc = node->Op();
std::string op_type = op_desc->Type();
if (!replaced_map.count(op_type)) continue;
op_desc->SetType(replaced_map[op_type]);
op_desc->Flush();
}
}

std::string InferenceOpReplacePass::repr() const {
return "inference-op-replace-pass";
}

} // namespace analysis
} // namespace inference
} // namespace paddle
43 changes: 43 additions & 0 deletions paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>

#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"

namespace paddle {
namespace inference {
namespace analysis {

/*
* There are some ops (while, conditional_block_op etc) which have different
* optimization points under predicion and training conditions.
* So, We added the corresponding inference impl to these ops separately.
* This pass replaces these ops with corresponding inference ops.
*/
class InferenceOpReplacePass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
};

} // namespace analysis
} // namespace inference
} // namespace paddle
31 changes: 26 additions & 5 deletions paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <functional>
#include <limits>
#include <map>
#include <set>
#include <string>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) {
void MemoryOptimizePass::CollectVarMemorySize(
space_table_t* space_table) const {
const int fake_batch_size = 1;
auto valid_var = [&](framework::ir::Node* node) -> bool {
std::set<std::string> invalid_op = {"while", "conditional_block",
"tensorrt_engine",
"conditional_block_infer"};
for (auto* tmp : node->inputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
for (auto* tmp : node->outputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
return true;
};
// Collect tensors from graph.
for (auto* node : graph_->Nodes()) {
if (node->IsVar() &&
node->Var()->GetType() ==
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
valid_var(node)) {
// Parameters will not be reused.
if (node->Var()->Persistable()) continue;
auto shape = node->Var()->GetShape();
Expand All @@ -135,12 +159,9 @@ void MakeSimpleReusePlan(
std::unordered_map<std::string, int>* cluster_size) {
std::vector<MemNode> mem_nodes;
for (auto& data : lifecycles) {
if (!space_table.count(data.first)) continue;
MemNode temp_node;
temp_node.name = data.first;
PADDLE_ENFORCE(
space_table.count(data.first),
"%s variable should be in the spacetable during memory optimize",
data.first);
temp_node.size = space_table.at(data.first);
temp_node.cluster = -1;
temp_node.lifetime = data.second;
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/passes/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
Expand All @@ -38,6 +39,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
passes_.emplace("adjust_cudnn_workspace_size_pass",
std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
passes_.emplace("inference_op_replace_pass",
std::unique_ptr<AnalysisPass>(new InferenceOpReplacePass));
passes_.emplace(
"ir_graph_to_program_pass",
std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields.

CP_MEMBER(opt_cache_dir_);
prog_file_ = std::move(other.prog_file_);
params_file_ = std::move(other.params_file_);

Expand Down Expand Up @@ -406,11 +407,6 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer,
Update();
}

void AnalysisConfig::SetEngineOptInfo(
std::map<std::string, std::string> engine_opt_info) {
engine_opt_info_ = engine_opt_info;
}

NativeConfig AnalysisConfig::ToNativeConfig() const {
NativeConfig config;
config.model_dir = model_dir_;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,10 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetStaticMemoryOptimForceUpdate(
config_.static_memory_optim_force_update_);
argument_.SetModelFromMemory(config_.model_from_memory_);
argument_.SetEngineOptInfo(config_.engine_opt_info_);
// Analyze inference_program
argument_.SetUseAnakin(config_.anakin_engine_enabled());
argument_.SetPredictorID(predictor_id_);
argument_.SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir());
} else {
Expand Down
12 changes: 8 additions & 4 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ struct AnalysisConfig {
/** Set parameter composed file path.
*/
void SetParamsFile(const std::string& x) { params_file_ = x; }
/** Set opt cache dir.
*/
void SetOptimCacheDir(const std::string& opt_cache_dir) {
opt_cache_dir_ = opt_cache_dir;
}
/** Get the model directory path.
*/
const std::string& model_dir() const { return model_dir_; }
Expand Down Expand Up @@ -143,7 +148,7 @@ struct AnalysisConfig {
int max_batch_size = 1, int min_subgraph_size = 3,
Precision precision = Precision::kFloat32,
bool use_static = false,
bool use_calib_mode = false);
bool use_calib_mode = true);
/** A boolean state telling whether the TensorRT engine is used.
*/
bool tensorrt_engine_enabled() const { return use_tensorrt_; }
Expand Down Expand Up @@ -223,7 +228,6 @@ struct AnalysisConfig {
/** A boolean state telling whether the model is set from the CPU memory.
*/
bool model_from_memory() const { return model_from_memory_; }
void SetEngineOptInfo(std::map<std::string, std::string> engine_opt_info);

/** Turn on memory optimize
* NOTE still in development, will release latter.
Expand Down Expand Up @@ -311,15 +315,15 @@ struct AnalysisConfig {
bool anakin_auto_config_layout_{false};
std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_;
std::map<std::string, std::string> engine_opt_info_;

bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
// If the config is already used on a predictor, it becomes invalid.
mutable bool is_valid_{true};
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
std::string opt_cache_dir_;
};

} // namespace paddle
4 changes: 2 additions & 2 deletions paddle/fluid/inference/api/paddle_pass_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class PaddlePassBuilder {
protected:
std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass", "ir_analysis_pass",
"ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass"}};
"ir_params_sync_among_devices_pass", "adjust_cudnn_workspace_size_pass",
"inference_op_replace_pass"}};
std::vector<std::string> passes_;
};

Expand Down

0 comments on commit bc9fd1f

Please sign in to comment.