Commit dbe08e9b authored by yuguo960516yuguo's avatar yuguo960516yuguo
Browse files

2.4.2

parent b5499578
......@@ -28,7 +28,7 @@ class DLPackTensor {
std::remove_reference<decltype(::DLTensor::shape[0])>::type; // int64_t
// lanes is only used in CPU to enable vectorization
explicit DLPackTensor(const Tensor& tensor, LaneType lanes = 1);
explicit DLPackTensor(const phi::DenseTensor& tensor, LaneType lanes = 1);
inline operator const ::DLTensor&() const { return t_; }
......@@ -44,5 +44,7 @@ class DLPackTensor {
ShapeType shape_[DDim::kMaxRank];
};
DLManagedTensor* toDLPack(const phi::DenseTensor& src);
} // namespace framework
} // namespace paddle
......@@ -87,6 +87,15 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext {
});
}
bool IsSelectedRowsInputs(const std::string& name) const override {
auto var_types = ctx_.GetInputsVarType(name);
return std::all_of(var_types.begin(),
var_types.end(),
[](const proto::VarType::Type& type) {
return type == proto::VarType::SELECTED_ROWS;
});
}
bool IsSelectedRowsInput(const std::string& name) const override {
auto var_type = ctx_.GetInputVarType(name);
return var_type == proto::VarType::SELECTED_ROWS;
......@@ -155,6 +164,16 @@ int64_t CompatMetaTensor::numel() const {
}
}
bool CompatMetaTensor::is_selected_rows() const {
if (is_runtime_) {
auto* var = PADDLE_GET_CONST(Variable*, var_);
return var->IsType<phi::SelectedRows>();
} else {
auto* var = PADDLE_GET_CONST(VarDesc*, var_);
return var->GetType() == proto::VarType::SELECTED_ROWS;
}
}
bool CompatMetaTensor::is_dense() const {
if (is_runtime_) {
auto* var = PADDLE_GET_CONST(Variable*, var_);
......@@ -182,7 +201,7 @@ DDim CompatMetaTensor::dims() const {
if (var->IsType<phi::DenseTensor>()) {
return var->Get<phi::DenseTensor>().dims();
} else if (var->IsType<phi::SelectedRows>()) {
return var->Get<phi::SelectedRows>().dims();
return var->Get<phi::SelectedRows>().GetCompleteDims();
} else if (var->IsType<phi::SparseCooTensor>()) {
return var->Get<phi::SparseCooTensor>().dims();
} else if (var->IsType<framework::LoDTensorArray>()) {
......@@ -260,8 +279,7 @@ void CompatMetaTensor::set_dims(const DDim& dims) {
auto* tensor = var->GetMutable<phi::DenseTensor>();
phi::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims;
} else if (var->IsType<phi::SelectedRows>()) {
auto* tensor = var->GetMutable<phi::SelectedRows>()->mutable_value();
phi::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims;
var->GetMutable<phi::SelectedRows>()->set_height(dims[0]);
} else if (var->IsType<phi::SparseCooTensor>()) {
auto* tensor = var->GetMutable<phi::SparseCooTensor>();
phi::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims;
......
......@@ -59,6 +59,8 @@ class CompatMetaTensor : public phi::MetaTensor {
bool initialized() const override { return initialized_; };
bool is_selected_rows() const;
bool is_tensor_array() const;
bool is_dense() const;
......
......@@ -148,6 +148,7 @@ pass_library(delete_c_identity_op_pass inference)
pass_library(preln_residual_bias_fuse_pass inference)
pass_library(delete_fill_constant_op_pass inference)
pass_library(constant_folding_pass inference)
pass_library(auto_mixed_precision_pass inference)
pass_library(simplify_with_basic_ops_pass base)
pass_library(fc_elementwise_layernorm_fuse_pass base)
pass_library(skip_layernorm_fuse_pass base)
......
This diff is collapsed.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace paddle {
namespace framework {
namespace ir {
class AutoMixedPrecisionPass : public FusePassBase {
public:
using VarType = framework::proto::VarType;
public:
AutoMixedPrecisionPass() = default;
~AutoMixedPrecisionPass() = default;
protected:
void ApplyImpl(Graph* graph) const override;
private:
void Init(Graph* graph) const;
void SetDefaultBlacklist() const;
void SetOpUniqueType() const;
void RestoreOpOriginType() const;
inline std::string GetOpOriginalType(const std::string& op_type) const;
void GetOpPrecision() const;
void UpdateOpPrecision() const;
void InsertCastOp() const;
void ProcessOpWithDtypeAttr() const;
bool InputVarsNotConvert(Node* op_node, const std::string& var_name) const;
bool OutputVarsNotConvert(Node* op_node, const std::string& var_name) const;
void SetVarPrecision() const;
void ConvertWeightsData() const;
private:
mutable bool skip_pass_{false};
mutable bool keep_io_types_{false};
// float16 or bfloat16 now
mutable phi::DataType low_precision_{phi::DataType::FLOAT16};
mutable phi::Backend backend_{phi::Backend::GPU};
mutable std::unordered_set<std::string> black_list_;
// subgraph id -> pointer to subgraph
mutable std::vector<Graph*> subgraphes_;
// var name -> real var node
mutable std::unordered_map<std::string, Node*> real_vars_;
// subgraph id -> all op nodes in subgraph
mutable std::vector<std::vector<Node*>> all_op_nodes_;
// op's unique type -> the op's origin type
mutable std::unordered_map<std::string, std::string> op_original_type_;
// op's unique type -> whether the op run at low precision
mutable std::unordered_set<std::string> op_run_low_precision_;
mutable std::unordered_set<std::string> vars_convert_to_low_precision_;
};
bool OpSupportPrecision(const std::string& op_type,
phi::Backend backend,
phi::DataType precision,
const std::unordered_set<std::string>& black_list);
void DoInsertCastOp(Graph* graph,
Node* var_node,
Node* op_node,
proto::VarType::Type from_type,
proto::VarType::Type to_type,
framework::BlockDesc* block_desc,
int* suffix,
std::unordered_map<Node*, Node*>* cache);
} // namespace ir
} // namespace framework
} // namespace paddle
......@@ -142,6 +142,9 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
}
out_desc->SetShape(out_shape);
out_desc->SetPersistable(true);
auto *var_desc_out = op_node->Op()->Block()->Var(out_name);
var_desc_out->SetShape(out_shape);
var_desc_out->SetPersistable(true);
auto *global_out_tensor = scope->Var(out_name)->GetMutable<LoDTensor>();
*global_out_tensor = *local_out_tensor;
}
......
......@@ -29,6 +29,11 @@ void FillConstData(LoDTensor* out_t, T value) {
}
void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const {
bool with_dynamic_shape = Get<bool>("with_dynamic_shape");
// Not support
if (with_dynamic_shape) {
return;
}
FusePassBase::Init("delete_fill_constant_op_pass", graph);
GraphPatternDetector detector;
auto fill_constant_op =
......
......@@ -75,7 +75,6 @@ Graph::Graph(const ProgramDesc &program,
}
} else {
auto var_nodes = InitFromProgram(program_, start_op_index, end_op_index);
ResolveHazard(var_nodes);
}
}
......@@ -88,7 +87,6 @@ Graph::Graph(const BlockDesc &block,
const int64_t end_op_index)
: main_graph_(main_graph) {
auto var_nodes = InitFromBlock(block, start_op_index, end_op_index);
ResolveHazard(var_nodes);
}
// TODO(levi): delete this interface after when we can convert all
......
......@@ -1045,6 +1045,7 @@ PDNode *patterns::SeqConvEltAddRelu::operator()(
PDNode *patterns::Squeeze2Transpose2::operator()() {
auto *squeeze2_op_in = pattern->NewNode(squeeze2_op_in_repr())
->AsInput()
->assert_has_n_outputs(1)
->assert_is_op_input("squeeze2", "X");
auto *squeeze2_op = pattern->NewNode(squeeze2_op_repr())
->assert_is_op("squeeze2")
......
......@@ -130,86 +130,6 @@ TEST(GraphTest, Basic) {
ASSERT_EQ(nodes.size(), 5UL);
}
TEST(GraphTest, WriteAfterRead) {
// void Test() {
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum");
op->SetInput("X", {"a"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
op = prog.MutableBlock(0)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"a"});
op->SetAttr("op_role", 1);
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
control_dep1 = n->outputs[1];
ASSERT_EQ(n->outputs.size(), 2UL);
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
}
}
ASSERT_EQ(control_dep1, control_dep2);
}
TEST(GraphTest, WriteAfterWrite) {
// void Test() {
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum");
op->SetInput("X", {"a"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
op = prog.MutableBlock(0)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"b"});
op->SetAttr("op_role", 1);
prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
ASSERT_EQ(n->outputs.size(), 2UL);
control_dep1 = n->outputs[1];
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
}
}
ASSERT_NE(control_dep1, nullptr);
ASSERT_NE(control_dep2, nullptr);
ASSERT_EQ(control_dep1, control_dep2);
}
TEST(GraphTest, TestException) {
ProgramDesc prog;
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
......@@ -350,12 +270,13 @@ TEST(GraphTest, TestMultiBlock) {
op = prog.MutableBlock(1)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"a"});
op->SetOutput("Out", {"d"});
op->SetAttr("op_role", 1);
prog.MutableBlock(1)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("d")->SetType(proto::VarType::LOD_TENSOR);
// Set contents in block_2.
op = prog.MutableBlock(2)->AppendOp();
......@@ -367,12 +288,13 @@ TEST(GraphTest, TestMultiBlock) {
op = prog.MutableBlock(2)->AppendOp();
op->SetType("dummy");
op->SetInput("X", {"c"});
op->SetOutput("Out", {"b"});
op->SetOutput("Out", {"d"});
op->SetAttr("op_role", 1);
prog.MutableBlock(2)->Var("a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(2)->Var("b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(2)->Var("c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(1)->Var("d")->SetType(proto::VarType::LOD_TENSOR);
// Step2: Convert program into graph, 3 blocks corresponding 3 sub_graphs.
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
......@@ -399,45 +321,29 @@ TEST(GraphTest, TestMultiBlock) {
// Check contents in sub_graph_1.
const ir::Graph *g1 = g->GetSubGraph(1);
ir::Node *control_dep1 = nullptr;
ir::Node *control_dep2 = nullptr;
for (ir::Node *n : g1->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
control_dep1 = n->outputs[1];
ASSERT_EQ(n->outputs.size(), 2UL);
ASSERT_EQ(n->outputs.size(), 1UL);
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
ASSERT_EQ(n->inputs.size(), 1UL);
}
}
ASSERT_EQ(control_dep1, control_dep2);
// Check contents in sub_graph_2.
const ir::Graph *g2 = g->GetSubGraph(2);
control_dep1 = nullptr;
control_dep2 = nullptr;
for (ir::Node *n : g2->Nodes()) {
if (n->Name() == "sum") {
ASSERT_EQ(n->outputs[0]->Name(), "b");
ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1]));
ASSERT_EQ(n->outputs.size(), 2UL);
control_dep1 = n->outputs[1];
ASSERT_EQ(n->outputs.size(), 1UL);
}
if (n->Name() == "dummy") {
ASSERT_EQ(n->inputs[0]->Name(), "c");
ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1]));
control_dep2 = n->inputs[1];
ASSERT_EQ(n->inputs.size(), 2UL);
ASSERT_EQ(n->inputs.size(), 1UL);
}
}
ASSERT_NE(control_dep1, nullptr);
ASSERT_NE(control_dep2, nullptr);
ASSERT_EQ(control_dep1, control_dep2);
// Step3: Clone graph.
std::shared_ptr<ir::Graph> clone_g = g->Clone();
......
......@@ -331,8 +331,6 @@ void BatchMergePass::ApplyImpl(ir::Graph* graph) const {
copy_node(node);
}
}
result.ResolveHazard(created);
}
} // namespace ir
......
......@@ -183,5 +183,6 @@ void NaiveExecutor::ResetTrtOps(int num) {
}
#endif
}
} // namespace framework
} // namespace paddle
......@@ -50,7 +50,7 @@ USE_OP_ITSELF(concat_grad);
USE_OP_ITSELF(elementwise_mul_grad);
USE_OP_ITSELF(sigmoid_grad);
USE_OP_ITSELF(tanh_grad);
USE_OP(sum);
USE_OP_ITSELF(sum);
USE_OP_ITSELF(slice_grad);
USE_OP_ITSELF(lookup_table_grad);
USE_OP_ITSELF(sqrt);
......@@ -101,6 +101,7 @@ PD_DECLARE_KERNEL(slice_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(cross_entropy_with_softmax, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(cross_entropy_with_softmax_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sqrt, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_n, GPU, ALL_LAYOUT);
namespace paddle {
namespace framework {
......
......@@ -512,6 +512,13 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext {
});
}
bool IsSelectedRowsInputs(const std::string& name) const override {
auto vars = ctx_.MultiInputVar(name);
return std::all_of(vars.begin(), vars.end(), [](const Variable* var) {
return var->IsType<phi::SelectedRows>();
});
}
bool IsSelectedRowsInput(const std::string& name) const override {
const auto* var = ctx_.InputVar(name);
return var->IsType<phi::SelectedRows>();
......
......@@ -146,6 +146,48 @@ PreparedOp::PreparedOp(const framework::OperatorBase& op,
kernel_signature_(std::move(kernel_signature)),
phi_kernel_(phi_kernel) {}
#ifdef PADDLE_WITH_MLU
static void tokenize(const std::string& ops,
char delim,
std::unordered_set<std::string>* op_set) {
std::string::size_type beg = 0;
for (uint64_t end = 0; (end = ops.find(delim, end)) != std::string::npos;
++end) {
op_set->insert(ops.substr(beg, end - beg));
beg = end + 1;
}
op_set->insert(ops.substr(beg));
}
static bool is_in_mlu_black_list(const std::string& op_name) {
static bool inited = false;
static std::unordered_set<std::string> mlu_black_list;
static std::mutex s_mtx;
if (!inited) {
std::lock_guard<std::mutex> guard(s_mtx);
if (!inited) {
if (std::getenv("MLU_BLACK_LIST") != nullptr) {
std::string ops(std::getenv("MLU_BLACK_LIST"));
tokenize(ops, ',', &mlu_black_list);
}
inited = true;
VLOG(3) << "MLU Black List: ";
for (auto iter = mlu_black_list.begin(); iter != mlu_black_list.end();
++iter) {
VLOG(3) << *iter << " ";
}
}
}
if (mlu_black_list.find(op_name) != mlu_black_list.end()) {
return true;
}
return false;
}
#endif
template <typename VarType>
PreparedOp PrepareImpl(
const NameVarMap<VarType>& ins,
......@@ -194,6 +236,12 @@ PreparedOp PrepareImpl(
#endif
#ifdef PADDLE_WITH_MLU
if (is_in_mlu_black_list(op.Type())) {
expected_kernel_key.place_ = platform::CPUPlace();
}
#endif
bool has_phi_kernel = false;
const auto* arg_map_fn = phi_op_utils_map.GetArgumentMappingFn(op.Type());
......
......@@ -38,8 +38,7 @@ void Analyzer::RunAnalysis(Argument *argument) {
if (!disable_logs) {
string::PrettyLogH1("--- Running analysis [%s]", pass);
}
if (!argument->enable_analysis_optim() && pass == "ir_analysis_pass")
continue;
if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue;
auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr,
......
......@@ -31,7 +31,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
Argument argument;
argument.SetDisableLogs(false);
argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetEnableAnalysisOptim(false);
argument.SetEnableIrOptim(false);
argument.SetUseGPU(false);
argument.SetAnalysisPasses({"ir_graph_build_pass",
"ir_analysis_pass",
......@@ -44,7 +44,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
TEST(Analyzer, analysis_with_tensorrt) {
Argument argument;
argument.SetDisableLogs(false);
argument.SetEnableAnalysisOptim(false);
argument.SetEnableIrOptim(false);
argument.SetTensorRtMaxBatchSize(3);
argument.SetTensorRtWorkspaceSize(1 << 20);
argument.SetModelDir(FLAGS_inference_model_dir);
......
......@@ -42,8 +42,6 @@ namespace paddle {
namespace inference {
namespace analysis {
using framework::ir::Graph;
#ifdef PADDLE_WITH_MKLDNN
using VarQuantScale =
std::unordered_map<std::string, std::pair<bool, framework::LoDTensor>>;
......@@ -148,7 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_analysis_optim, EnableAnalysisOptim, bool);
DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
// For JITLayer
DECL_ARGUMENT_FIELD(skip_load_params, SkipLoadParams, bool);
......@@ -362,6 +360,8 @@ struct Argument {
DECL_ARGUMENT_FIELD(mixed_black_list,
MixedBlackList,
std::unordered_set<std::string>);
DECL_ARGUMENT_FIELD(enable_gpu_mixed, EnableGPUMixed, bool);
DECL_ARGUMENT_FIELD(mixed_precision_mode, MixedPrecisionMode, int);
private:
std::unordered_set<std::string> valid_fields_;
......
......@@ -153,25 +153,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
return *var->GetMutable<T>();
}
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE_EQ(
fin.is_open(),
true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
framework::proto::ProgramDesc program_desc;
program_desc.ParseFromString(buffer);
return program_desc;
}
static bool FileExists(const std::string &filepath) {
std::ifstream file(filepath);
bool exists = file.is_open();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment