Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
dbe08e9b
Commit
dbe08e9b
authored
Jun 12, 2023
by
yuguo960516yuguo
Browse files
2.4.2
parent
b5499578
Changes
302
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
211 additions
and
980 deletions
+211
-980
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+6
-34
paddle/fluid/inference/analysis/ir_pass_manager.h
paddle/fluid/inference/analysis/ir_pass_manager.h
+0
-6
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+8
-8
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+2
-14
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+73
-846
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
...id/inference/analysis/passes/convert_to_mixed_precision.h
+41
-11
paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
...id/inference/analysis/passes/inference_op_replace_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
...le/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+3
-2
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
...uid/inference/analysis/passes/ir_graph_to_program_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
...luid/inference/analysis/passes/ir_graph_to_program_pass.h
+1
-1
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
...e/fluid/inference/analysis/passes/memory_optimize_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/passes.cc
paddle/fluid/inference/analysis/passes/passes.cc
+0
-3
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+24
-15
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+32
-26
paddle/fluid/inference/api/demo_ci/.gitignore
paddle/fluid/inference/api/demo_ci/.gitignore
+1
-0
paddle/fluid/inference/api/mkldnn_quantizer.cc
paddle/fluid/inference/api/mkldnn_quantizer.cc
+2
-4
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+9
-3
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-2
No files found.
paddle/fluid/inference/analysis/ir_pass_manager.cc
View file @
dbe08e9b
...
...
@@ -27,6 +27,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/errors.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -36,15 +37,6 @@ using string::PrettyLogEndl;
using
string
::
Style
;
IRPassManager
::
IRPassManager
(
Argument
*
argument
)
{
ARGUMENT_CHECK_FIELD
(
argument
,
main_program
);
graph_
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
if
(
argument
->
Has
(
"scope"
))
{
auto
*
scope_ptr
=
argument
->
scope_ptr
();
PADDLE_ENFORCE_NOT_NULL
(
scope_ptr
,
platform
::
errors
::
PreconditionNotMet
(
"The scope ptr should not be nullptr."
));
graph_
->
SetNotOwned
(
framework
::
ir
::
kParamScopeAttr
,
scope_ptr
);
}
disable_logs_
=
argument
->
disable_logs
();
ARGUMENT_CHECK_FIELD
(
argument
,
ir_analysis_passes
);
...
...
@@ -95,10 +87,14 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
// mixed precision related
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
pass
->
Set
(
"enable_gpu_mixed"
,
new
bool
(
argument
->
enable_gpu_mixed
()));
pass
->
Set
(
"mixed_precision_mode"
,
new
int
(
argument
->
mixed_precision_mode
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
@@ -302,42 +298,18 @@ void IRPassManager::CreatePasses(Argument *argument,
}
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
if
(
passes_
.
empty
())
{
return
graph
;
}
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
graph
.
get
(),
platform
::
errors
::
InvalidArgument
(
"Graph cannot be null."
));
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
PrettyLogEndl
(
Style
::
H2
(),
"--- Running IR pass [%s]"
,
pass
->
Type
());
}
// delete_fill_constant_op_pass is not apply under trt dynamic shape
if
(
pass
->
Type
()
==
"delete_fill_constant_op_pass"
)
{
bool
use_dynamic
=
pass
->
Get
<
bool
>
(
"with_dynamic_shape"
);
if
(
use_dynamic
)
continue
;
}
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
}
return
graph
;
}
framework
::
proto
::
ProgramDesc
IRPassManager
::
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
ProgramDesc
*
program
)
const
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_to_program_pass"
);
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
ProgramDesc
desc
;
desc
.
CopyFrom
(
*
program
->
Proto
());
pass
->
SetNotOwned
(
"program"
,
&
desc
);
auto
*
the_graph
=
graph
->
release
();
graph
->
reset
(
pass
->
Apply
(
the_graph
));
return
*
desc
.
Proto
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_pass_manager.h
View file @
dbe08e9b
...
...
@@ -48,15 +48,9 @@ class IRPassManager final {
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
);
framework
::
proto
::
ProgramDesc
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
ProgramDesc
*
program
)
const
;
framework
::
ir
::
Graph
&
graph
()
const
{
return
*
graph_
;
}
private:
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
vector
<
std
::
unique_ptr
<
Pass
>>
passes_
;
bool
disable_logs_
{
false
};
};
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
View file @
dbe08e9b
...
...
@@ -94,13 +94,13 @@ void OutputProcess(framework::ir::Graph *graph,
backend
,
precision
,
blacklist
))
{
Add
CastOp
(
graph
,
Insert
CastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
suffix
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
View file @
dbe08e9b
...
...
@@ -13,7 +13,7 @@ cc_library(
cc_library
(
convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass
)
DEPS analysis_pass ir_graph_build_pass
auto_mixed_precision_pass
)
cc_library
(
ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc
...
...
@@ -30,17 +30,6 @@ cc_library(
inference_op_replace_pass
SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass
)
if
(
WITH_TESTING
)
cc_library
(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass gtest
)
else
()
cc_library
(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass
)
endif
()
cc_library
(
analysis_passes
...
...
@@ -52,8 +41,7 @@ cc_library(
memory_optim_pass
convert_to_mixed_precision
inference_op_replace_pass
ir_graph_to_program_pass
ir_graph_clean_pass
)
ir_graph_to_program_pass
)
set
(
analysis_deps
${
analysis_deps
}
analysis_passes subgraph_detector
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
View file @
dbe08e9b
This diff is collapsed.
Click to expand it.
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
View file @
dbe08e9b
...
...
@@ -15,14 +15,12 @@
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
...
...
@@ -30,20 +28,52 @@ namespace paddle {
namespace
inference
{
namespace
analysis
{
class
ConvertToMixedPrecisionPass
{
public:
explicit
ConvertToMixedPrecisionPass
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
Run
();
private:
void
LoadModel
();
void
SaveMixedModel
();
private:
std
::
string
model_file_
;
std
::
string
params_file_
;
std
::
string
mixed_model_file_
;
std
::
string
mixed_params_file_
;
phi
::
DataType
mixed_precision_
;
phi
::
Backend
backend_
;
bool
keep_io_types_
;
std
::
unordered_set
<
std
::
string
>
black_list_
;
framework
::
Scope
scope_
;
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
main_graph_
{
nullptr
};
};
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
);
const
std
::
unordered_set
<
std
::
string
>&
black
_
list
);
void
Add
CastOp
(
void
Insert
CastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
ir
::
Node
*
var_
node
,
framework
::
ir
::
Node
*
op_node
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
);
int
*
suffix
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
visited
);
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
...
...
@@ -51,8 +81,8 @@ void ConvertToMixedPrecision(const std::string& model_file,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{}
);
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>
&
black_list
);
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
View file @
dbe08e9b
...
...
@@ -40,7 +40,7 @@ void InferenceOpReplacePass::RunImpl(Argument* argument) {
}
std
::
string
InferenceOpReplacePass
::
repr
()
const
{
return
"inference
-
op
-
replace
-
pass"
;
return
"inference
_
op
_
replace
_
pass"
;
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
View file @
dbe08e9b
...
...
@@ -105,7 +105,7 @@ void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
framework
::
ir
::
kFuseStatisAttr
));
}
std
::
string
IrAnalysisPass
::
repr
()
const
{
return
"ir
-
analysis
-
pass"
;
}
std
::
string
IrAnalysisPass
::
repr
()
const
{
return
"ir
_
analysis
_
pass"
;
}
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
View file @
dbe08e9b
...
...
@@ -64,7 +64,8 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
"set."
));
}
auto
graph
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
auto
graph
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
new
framework
::
ir
::
Graph
(
argument
->
main_program
()));
argument
->
SetMainGraph
(
graph
.
release
());
auto
*
scope_ptr
=
argument
->
scope_ptr
();
PADDLE_ENFORCE_NOT_NULL
(
scope_ptr
,
...
...
@@ -125,7 +126,7 @@ std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
}
}
std
::
string
IrGraphBuildPass
::
repr
()
const
{
return
"ir
-
graph
-
build
-
pass"
;
}
std
::
string
IrGraphBuildPass
::
repr
()
const
{
return
"ir
_
graph
_
build
_
pass"
;
}
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
View file @
dbe08e9b
...
...
@@ -31,7 +31,7 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) {
new
int
(
argument
->
memory_optim_sort_kind
()));
}
std
::
unique_ptr
<
Graph
>
graph
(
argument
->
main_graph_ptr
());
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
(
argument
->
main_graph_ptr
());
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
...
...
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
View file @
dbe08e9b
...
...
@@ -28,7 +28,7 @@ class IrGraphToProgramPass : public AnalysisPass {
public:
void
RunImpl
(
Argument
*
argument
)
override
;
std
::
string
repr
()
const
override
{
return
"ir
-
graph
-
to
-
param
-
pass"
;
}
std
::
string
repr
()
const
override
{
return
"ir
_
graph
_
to
_
param
_
pass"
;
}
};
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
View file @
dbe08e9b
...
...
@@ -169,7 +169,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
}
std
::
string
IrParamsSyncAmongDevicesPass
::
repr
()
const
{
return
"ir
-
params
-
sync
-
among
-
devices
-
pass"
;
return
"ir
_
params
_
sync
_
among
_
devices
_
pass"
;
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
View file @
dbe08e9b
...
...
@@ -295,7 +295,7 @@ void UpdateOpDescsByReuse(
}
}
std
::
string
MemoryOptimizePass
::
repr
()
const
{
return
"memory
optimize
pass"
;
}
std
::
string
MemoryOptimizePass
::
repr
()
const
{
return
"memory
_
optimize
_
pass"
;
}
void
MemoryOptimizePass
::
RunImpl
(
Argument
*
argument
)
{
// Memory optimization.
...
...
paddle/fluid/inference/analysis/passes/passes.cc
View file @
dbe08e9b
...
...
@@ -18,7 +18,6 @@
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
...
...
@@ -34,8 +33,6 @@ PassRegistry::PassRegistry() {
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisPass
));
passes_
.
emplace
(
"ir_graph_build_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrGraphBuildPass
));
passes_
.
emplace
(
"ir_graph_clean_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrInferCleanGraphPass
));
passes_
.
emplace
(
"memory_optimize_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
MemoryOptimizePass
));
passes_
.
emplace
(
...
...
paddle/fluid/inference/api/analysis_config.cc
View file @
dbe08e9b
...
...
@@ -85,15 +85,29 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
Update
();
}
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
int
device_id
,
Precision
precision_mode
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
gpu_device_id_
=
device_id
;
mixed_precision_mode_
=
precision_mode
;
if
(
precision_mode
==
Precision
::
kFloat32
)
{
// default
}
else
if
(
precision_mode
==
Precision
::
kHalf
||
precision_mode
==
Precision
::
kBf16
)
{
enable_gpu_mixed_
=
true
;
}
else
{
LOG
(
ERROR
)
<<
"The Paddle-GPU inference currently only supports "
"float32/float16/bfloat16 precision. Please check the parameters "
"you specified in EnableUseGpu or enable_use_gpu function."
;
}
#else
LOG
(
ERROR
)
<<
"Please
compile with gpu to EnableGpu()
"
;
LOG
(
ERROR
)
<<
"Please
use PaddlePaddle with GPU version.
"
;
use_gpu_
=
false
;
#endif
...
...
@@ -279,7 +293,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
if
(
ipu_config_mapper_
.
find
(
key
)
==
ipu_config_mapper_
.
end
())
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"invalid key
{}
in IPU config"
,
key
));
"invalid key
%s
in IPU config
:
"
,
key
));
}
switch
(
ipu_config_mapper_
.
at
(
key
))
{
case
ipu_config_code
::
ipu_device_num
:
...
...
@@ -315,7 +329,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"invalid key
{}
in IPU config"
,
key
));
"invalid key
%s
in IPU config"
,
key
));
break
;
}
}
...
...
@@ -372,8 +386,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// Mixed related.
// Mixed
precision
related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_gpu_mixed_
);
CP_MEMBER
(
mixed_precision_mode_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
...
...
@@ -740,13 +756,7 @@ void AnalysisConfig::Update() {
((
use_custom_device
()
^
pass_builder_
->
use_custom_device
())))
{
if
(
use_gpu
())
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
if
(
use_tensorrt_
)
{
// Append after the Affine_channel_conv_fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
else
if
(
use_ipu
())
{
VLOG
(
1
)
<<
"IpuPassStrategy has been used for new."
;
pass_builder_
.
reset
(
new
IpuPassStrategy
);
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
...
...
@@ -946,9 +956,6 @@ void AnalysisConfig::Update() {
"but did not have the option -DWITH_CUSTOM_DEVICE compiled."
));
#endif
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
}
std
::
string
AnalysisConfig
::
SerializeInfoCache
()
{
...
...
@@ -960,6 +967,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
calibration_file_path_
;
ss
<<
use_gpu_
;
ss
<<
enable_gpu_mixed_
;
ss
<<
use_external_stream_
;
ss
<<
exec_stream_
;
ss
<<
use_fc_padding_
;
...
...
@@ -1167,6 +1175,7 @@ std::string AnalysisConfig::Summary() {
os
.
InsertRow
({
"use_gpu"
,
use_gpu_
?
"true"
:
"false"
});
if
(
use_gpu_
)
{
os
.
InsertRow
({
"gpu_device_id"
,
std
::
to_string
(
gpu_device_id_
)});
os
.
InsertRow
({
"enable_gpu_mixed"
,
std
::
to_string
(
enable_gpu_mixed_
)});
os
.
InsertRow
({
"memory_pool_init_size"
,
std
::
to_string
(
memory_pool_init_size_mb_
)
+
"MB"
});
os
.
InsertRow
(
...
...
@@ -1360,7 +1369,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() {
return
trt_allow_build_at_runtime_
;
}
void
AnalysisConfig
::
Exp_
SetBlackListOpsForMixedModel
(
void
AnalysisConfig
::
Exp_
DisableMixedPrecisionOps
(
const
std
::
unordered_set
<
std
::
string
>
&
black_list
)
{
mixed_black_list_
=
black_list
;
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
View file @
dbe08e9b
...
...
@@ -1065,7 +1065,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetUseGPU
(
config_
.
use_gpu
());
argument_
.
SetUseFcPadding
(
config_
.
use_fc_padding
());
argument_
.
SetGPUDeviceId
(
config_
.
gpu_device_id
());
argument_
.
SetEnable
Analysis
Optim
(
config_
.
enable_ir_optim_
);
argument_
.
SetEnable
Ir
Optim
(
config_
.
enable_ir_optim_
);
argument_
.
SetEnableMemoryOptim
(
config_
.
enable_memory_optim
());
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
...
...
@@ -1210,53 +1210,57 @@ void AnalysisPredictor::PrepareArgument() {
}
#endif
auto
pass
es
=
config_
.
pass_builder
()
->
AllPasses
()
;
auto
*
pass
_builder
=
config_
.
pass_builder
();
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
LOG
(
INFO
)
<<
"Model is mixed precision type with "
<<
model_precision_
<<
", we will use a new PassStrategy. Note that only the GPU "
"backend is supported for now."
;
passes
.
clear
();
pass_builder
->
ClearPasses
();
const
auto
&
deleted_passes
=
pass_builder
->
GetAllDeletedPasses
();
if
(
config_
.
tensorrt_engine_enabled
())
{
for
(
const
auto
&
pass
:
kTrtLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
if
(
deleted_passes
.
count
(
pass
))
continue
;
pass_builder
->
AppendPass
(
pass
);
}
}
else
if
(
config_
.
use_gpu
())
{
for
(
const
auto
&
pass
:
kGpuLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
if
(
deleted_passes
.
count
(
pass
))
continue
;
pass_builder
->
AppendPass
(
pass
);
}
}
const
auto
&
deleted_passes
=
config_
.
pass_builder
()
->
GetAllDeletedPasses
();
for
(
const
auto
&
it
:
deleted_passes
)
{
auto
iterator
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
it
);
if
(
iterator
!=
passes
.
end
())
{
passes
.
erase
(
iterator
);
}
}
if
(
config_
.
ir_debug_
)
{
auto
it
=
std
::
begin
(
passes
);
while
(
it
!=
std
::
end
(
passes
))
{
if
(
*
it
!=
"graph_viz_pass"
)
{
it
=
passes
.
insert
(
it
+
1
,
"graph_viz_pass"
);
if
(
!
config_
.
ir_optim
())
{
argument_
.
SetEnableIrOptim
(
false
);
if
(
config_
.
enable_gpu_mixed_
)
{
argument_
.
SetEnableIrOptim
(
true
);
pass_builder
->
ClearPasses
();
pass_builder
->
AppendPass
(
"auto_mixed_precision_pass"
);
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode with no ir "
"optimization."
;
}
else
{
++
it
;
}
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed."
;
}
}
else
{
if
(
config_
.
ir_debug_
)
{
pass_builder
->
TurnOnDebug
();
}
if
(
config_
.
enable_gpu_mixed_
)
{
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode."
;
}
if
(
!
config_
.
ir_optim
())
{
passes
.
clear
();
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed"
;
}
argument_
.
SetDisableLogs
(
config_
.
glog_info_disabled
());
argument_
.
SetIrAnalysisPasses
(
pass
es
);
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetIrAnalysisPasses
(
pass
_builder
->
AllPasses
()
);
argument_
.
SetAnalysisPasses
(
pass_builder
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
argument_
.
SetEnableGPUMixed
(
config_
.
enable_gpu_mixed_
);
argument_
.
SetMixedPrecisionMode
(
static_cast
<
int
>
(
paddle
::
ConvertPrecision
(
config_
.
mixed_precision_mode_
)));
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
@@ -2107,7 +2111,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
}
x
->
predictor_stream_
=
stream
;
x
->
Init
(
scope_
,
inference_program_
);
#ifdef PADDLE_WITH_TENSORRT
x
->
executor_
->
ResetTrtOps
(
++
AnalysisPredictor
::
clone_num_
);
#endif
return
std
::
unique_ptr
<
PaddlePredictor
>
(
x
);
}
...
...
paddle/fluid/inference/api/demo_ci/.gitignore
0 → 100644
View file @
dbe08e9b
data
paddle/fluid/inference/api/mkldnn_quantizer.cc
View file @
dbe08e9b
...
...
@@ -604,10 +604,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
if
(
predictor_
.
config_
.
ir_debug_
)
builder
->
TurnOnDebug
();
auto
passes
=
builder
->
AllPasses
();
predictor_
.
argument_
.
SetIrAnalysisPasses
(
passes
);
predictor_
.
argument_
.
SetAnalysisPasses
({
"ir_graph_clean_pass"
,
"ir_analysis_pass"
,
"memory_optimize_pass"
,
"ir_graph_to_program_pass"
});
predictor_
.
argument_
.
SetAnalysisPasses
(
{
"ir_analysis_pass"
,
"memory_optimize_pass"
,
"ir_graph_to_program_pass"
});
predictor_
.
argument_
.
SetQuantVarScales
(
scales_
);
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
View file @
dbe08e9b
...
...
@@ -247,8 +247,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
/// \param memory_pool_init_size_mb initial size of the GPU memory pool in MB.
/// \param device_id device_id the GPU card to use (default is 0).
/// \param precision the precision used in Paddle-GPU inference.
///
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
);
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
,
Precision
precision_mode
=
Precision
::
kFloat32
);
///
/// \brief Turn off GPU.
///
...
...
@@ -967,7 +971,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist.
///
void
Exp_
SetBlackListOpsForMixedModel
(
void
Exp_
DisableMixedPrecisionOps
(
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
SetApplyOptim
(
bool
value
)
{
apply_optim_
=
value
;
}
...
...
@@ -987,13 +991,15 @@ struct PD_INFER_DECL AnalysisConfig {
mutable
std
::
string
params_file_
;
mutable
std
::
string
calibration_file_path_
;
// Mixed precision.
// Mixed precision related.
Precision
mixed_precision_mode_
{
Precision
::
kFloat32
};
std
::
unordered_set
<
std
::
string
>
mixed_black_list_
;
// GPU related.
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
enable_gpu_mixed_
{
false
};
bool
thread_local_stream_
{
false
};
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
View file @
dbe08e9b
...
...
@@ -171,8 +171,9 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
"multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass"
,
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
"gpu_cpu_map_matmul_to_mul_pass"
,
"fc_fuse_pass"
,
"fc_elementwise_layernorm_fuse_pass"
,
//
"fc_elementwise_layernorm_fuse_pass",
"embedding_eltwise_layernorm_fuse_pass"
,
"runtime_context_cache_pass"
,
};
...
...
@@ -227,9 +228,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"constant_folding_pass"
,
"constant_folding_pass"
,
//
// following pass should be located in the last, since it will
// work on all fused ops.
"auto_mixed_precision_pass"
,
//
"runtime_context_cache_pass"
});
...
...
Prev
1
2
3
4
5
6
7
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment