Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
dbe08e9b
Commit
dbe08e9b
authored
Jun 12, 2023
by
yuguo960516yuguo
Browse files
2.4.2
parent
b5499578
Changes
302
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
211 additions
and
980 deletions
+211
-980
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+6
-34
paddle/fluid/inference/analysis/ir_pass_manager.h
paddle/fluid/inference/analysis/ir_pass_manager.h
+0
-6
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+8
-8
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+2
-14
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+73
-846
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
...id/inference/analysis/passes/convert_to_mixed_precision.h
+41
-11
paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
...id/inference/analysis/passes/inference_op_replace_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
...le/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+3
-2
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
...uid/inference/analysis/passes/ir_graph_to_program_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
...luid/inference/analysis/passes/ir_graph_to_program_pass.h
+1
-1
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
...e/fluid/inference/analysis/passes/memory_optimize_pass.cc
+1
-1
paddle/fluid/inference/analysis/passes/passes.cc
paddle/fluid/inference/analysis/passes/passes.cc
+0
-3
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+24
-15
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+32
-26
paddle/fluid/inference/api/demo_ci/.gitignore
paddle/fluid/inference/api/demo_ci/.gitignore
+1
-0
paddle/fluid/inference/api/mkldnn_quantizer.cc
paddle/fluid/inference/api/mkldnn_quantizer.cc
+2
-4
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+9
-3
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-2
No files found.
paddle/fluid/inference/analysis/ir_pass_manager.cc
View file @
dbe08e9b
...
...
@@ -27,6 +27,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/errors.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -36,15 +37,6 @@ using string::PrettyLogEndl;
using
string
::
Style
;
IRPassManager
::
IRPassManager
(
Argument
*
argument
)
{
ARGUMENT_CHECK_FIELD
(
argument
,
main_program
);
graph_
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
if
(
argument
->
Has
(
"scope"
))
{
auto
*
scope_ptr
=
argument
->
scope_ptr
();
PADDLE_ENFORCE_NOT_NULL
(
scope_ptr
,
platform
::
errors
::
PreconditionNotMet
(
"The scope ptr should not be nullptr."
));
graph_
->
SetNotOwned
(
framework
::
ir
::
kParamScopeAttr
,
scope_ptr
);
}
disable_logs_
=
argument
->
disable_logs
();
ARGUMENT_CHECK_FIELD
(
argument
,
ir_analysis_passes
);
...
...
@@ -95,10 +87,14 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_tuned_dynamic_shape
();
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
// mixed precision related
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
pass
->
Set
(
"enable_gpu_mixed"
,
new
bool
(
argument
->
enable_gpu_mixed
()));
pass
->
Set
(
"mixed_precision_mode"
,
new
int
(
argument
->
mixed_precision_mode
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
@@ -302,42 +298,18 @@ void IRPassManager::CreatePasses(Argument *argument,
}
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
if
(
passes_
.
empty
())
{
return
graph
;
}
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
graph
.
get
(),
platform
::
errors
::
InvalidArgument
(
"Graph cannot be null."
));
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
PrettyLogEndl
(
Style
::
H2
(),
"--- Running IR pass [%s]"
,
pass
->
Type
());
}
// delete_fill_constant_op_pass is not apply under trt dynamic shape
if
(
pass
->
Type
()
==
"delete_fill_constant_op_pass"
)
{
bool
use_dynamic
=
pass
->
Get
<
bool
>
(
"with_dynamic_shape"
);
if
(
use_dynamic
)
continue
;
}
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
}
return
graph
;
}
framework
::
proto
::
ProgramDesc
IRPassManager
::
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
ProgramDesc
*
program
)
const
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_to_program_pass"
);
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
ProgramDesc
desc
;
desc
.
CopyFrom
(
*
program
->
Proto
());
pass
->
SetNotOwned
(
"program"
,
&
desc
);
auto
*
the_graph
=
graph
->
release
();
graph
->
reset
(
pass
->
Apply
(
the_graph
));
return
*
desc
.
Proto
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_pass_manager.h
View file @
dbe08e9b
...
...
@@ -48,15 +48,9 @@ class IRPassManager final {
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
);
framework
::
proto
::
ProgramDesc
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
ProgramDesc
*
program
)
const
;
framework
::
ir
::
Graph
&
graph
()
const
{
return
*
graph_
;
}
private:
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
vector
<
std
::
unique_ptr
<
Pass
>>
passes_
;
bool
disable_logs_
{
false
};
};
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
View file @
dbe08e9b
...
...
@@ -94,14 +94,14 @@ void OutputProcess(framework::ir::Graph *graph,
backend
,
precision
,
blacklist
))
{
Add
CastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
var_to_cast_op_map
);
Insert
CastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
block_desc
,
&
suffix
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
View file @
dbe08e9b
...
...
@@ -13,7 +13,7 @@ cc_library(
cc_library
(
convert_to_mixed_precision
SRCS convert_to_mixed_precision.cc
DEPS analysis_pass ir_graph_build_pass
)
DEPS analysis_pass ir_graph_build_pass
auto_mixed_precision_pass
)
cc_library
(
ir_params_sync_among_devices_pass
SRCS ir_params_sync_among_devices_pass.cc
...
...
@@ -30,17 +30,6 @@ cc_library(
inference_op_replace_pass
SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass
)
if
(
WITH_TESTING
)
cc_library
(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass gtest
)
else
()
cc_library
(
ir_graph_clean_pass
SRCS ir_graph_clean_pass.cc
DEPS analysis_pass
)
endif
()
cc_library
(
analysis_passes
...
...
@@ -52,8 +41,7 @@ cc_library(
memory_optim_pass
convert_to_mixed_precision
inference_op_replace_pass
ir_graph_to_program_pass
ir_graph_clean_pass
)
ir_graph_to_program_pass
)
set
(
analysis_deps
${
analysis_deps
}
analysis_passes subgraph_detector
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
View file @
dbe08e9b
...
...
@@ -14,807 +14,88 @@
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include <algorithm>
#include <iterator>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/auto_mixed_precision_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/tensor_meta.h"
using
namespace
paddle
::
framework
;
// NOLINT
#include "paddle/phi/common/backend.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
{
bool
PhiKernelSupportPrecision
(
const
std
::
string
&
op_type
,
ConvertToMixedPrecisionPass
::
ConvertToMixedPrecisionPass
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
auto
kernels
=
phi
::
KernelFactory
::
Instance
().
kernels
();
if
(
kernels
.
find
(
op_type
)
==
kernels
.
end
())
{
return
false
;
}
phi
::
KernelKey
kernel_key
(
backend
,
layout
,
data_type
);
return
phi
::
KernelFactory
::
Instance
().
HasKernel
(
op_type
,
kernel_key
);
}
bool
GpuKernelSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
DataType
data_type
,
phi
::
DataLayout
layout
=
phi
::
DataLayout
::
ALL_LAYOUT
)
{
auto
phi_op_type
=
phi
::
TransToPhiKernelName
(
op_type
);
bool
res
=
PhiKernelSupportPrecision
(
phi_op_type
,
phi
::
Backend
::
GPU
,
data_type
,
layout
);
res
|=
PhiKernelSupportPrecision
(
phi_op_type
,
phi
::
Backend
::
GPUDNN
,
data_type
,
layout
);
if
(
!
res
)
{
auto
&
all_kernels
=
OperatorWithKernel
::
AllOpKernels
();
auto
it
=
all_kernels
.
find
(
op_type
);
if
(
it
!=
all_kernels
.
end
())
{
for
(
auto
&
kern_pair
:
it
->
second
)
{
if
(
platform
::
is_gpu_place
(
kern_pair
.
first
.
place_
)
&&
kern_pair
.
first
.
data_type_
==
framework
::
proto
::
VarType
::
FP16
)
{
res
=
true
;
}
}
}
}
return
res
;
}
class
ConvertToMixedPrecisionPass
{
public:
explicit
ConvertToMixedPrecisionPass
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
:
model_file_
(
model_file
),
params_file_
(
params_file
),
mixed_model_file_
(
mixed_model_file
),
mixed_params_file_
(
mixed_params_file
),
mixed_precision_
(
mixed_precision
),
backend_
(
backend
),
keep_io_types_
(
keep_io_types
),
black_list_
(
black_list
),
place_
(
paddle
::
CPUPlace
()),
executor_
(
place_
)
{
black_list_
.
insert
(
"assign"
);
black_list_
.
insert
(
"fill_constant"
);
black_list_
.
insert
(
"assign_value"
);
black_list_
.
insert
(
"eye"
);
black_list_
.
insert
(
"fill_any_like"
);
black_list_
.
insert
(
"fill_constant_batch_size_like"
);
}
void
Run
();
private:
void
LoadAndPrepare
();
inline
bool
NodeVarHasDtype
(
framework
::
ir
::
Node
*
node
);
void
ConvertAllFp64ToFp32
(
framework
::
ir
::
Graph
*
graph
);
void
FixCastAttr
(
framework
::
ir
::
Graph
*
graph
);
void
SaveMixedModel
();
void
ConvertTensorDtype
(
int
block_idx
);
void
ProcessInputNode
(
bool
support_precision
,
ir
::
Node
*
in_node
,
ir
::
Node
*
op_node
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
block_idx
);
void
ProcessOutputNode
(
int
block_idx
,
ir
::
Node
*
var_node
,
framework
::
proto
::
VarType
::
Type
to_type
);
inline
bool
IsFloatVarType
(
framework
::
proto
::
VarType
::
Type
type
);
bool
OutShouldNotConvert
(
ir
::
Node
*
var_node
);
// Just process special cases for weights conversion.
bool
WeightsShouldNotConvert
(
ir
::
Node
*
var_node
);
// To support multi block, we need to consider a lot of special cases.
// Return Node* which first appers in block.
framework
::
ir
::
Node
*
GetRealNode
(
int
block_idx
,
framework
::
ir
::
Node
*
node
);
void
FindVarsInMultiBlock
();
inline
bool
VarIsMultiPrecisionOpsOut
(
int
block_idx
,
framework
::
ir
::
Node
*
op_node
);
private:
// A trick. Patch for strange op, which input name equal to output name, such
// as `fused_multi_transformer`
void
PatchForStrangeOp
();
private:
std
::
string
model_file_
;
std
::
string
params_file_
;
std
::
string
mixed_model_file_
;
std
::
string
mixed_params_file_
;
phi
::
DataType
mixed_precision_
;
phi
::
Backend
backend_
;
bool
keep_io_types_
;
std
::
unordered_set
<
std
::
string
>
black_list_
;
paddle
::
CPUPlace
place_
;
framework
::
Executor
executor_
;
framework
::
Scope
scope_
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
cast_map_
;
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
framework
::
proto
::
VarType
::
Type
,
int
>>
vars_in_multi_block_map_
;
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>>
vars_appear_multi_in_one_block_
;
int
suffix_
{
0
};
std
::
unique_ptr
<
framework
::
ProgramDesc
>
program_desc_
{
nullptr
};
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
main_graph_
{
nullptr
};
std
::
vector
<
framework
::
ir
::
Graph
*>
graphes_
;
};
framework
::
ir
::
Node
*
ConvertToMixedPrecisionPass
::
GetRealNode
(
int
block_idx
,
framework
::
ir
::
Node
*
node
)
{
if
(
vars_in_multi_block_map_
.
count
(
node
->
Name
()))
{
int
var_origin_block_id
=
vars_in_multi_block_map_
.
at
(
node
->
Name
()).
second
;
if
(
block_idx
!=
var_origin_block_id
)
{
auto
graph
=
graphes_
[
var_origin_block_id
];
for
(
auto
nd
:
graph
->
Nodes
())
{
if
(
nd
->
Name
()
==
node
->
Name
())
{
return
nd
;
}
}
}
}
return
node
;
}
inline
bool
ConvertToMixedPrecisionPass
::
NodeVarHasDtype
(
framework
::
ir
::
Node
*
node
)
{
if
(
node
->
IsVar
()
&&
(
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
STRINGS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
VOCAB
))
{
return
true
;
}
return
false
;
}
// op1(fp32) -> var1, op2(fp16) -> var1
// if and only if op1 and op2 both support fp16, we convert op1 and op2's
// precision.
inline
bool
ConvertToMixedPrecisionPass
::
VarIsMultiPrecisionOpsOut
(
int
block_idx
,
framework
::
ir
::
Node
*
op_node
)
{
CHECK_EQ
(
op_node
->
IsOp
(),
true
);
bool
ret
{
false
};
for
(
auto
*
out
:
op_node
->
outputs
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
out
);
if
(
!
real_node
->
Var
()
->
Persistable
()
&&
vars_appear_multi_in_one_block_
[
block_idx
].
count
(
out
->
Name
()))
{
for
(
auto
op_type
:
vars_appear_multi_in_one_block_
[
block_idx
].
at
(
out
->
Name
()))
{
if
(
OpSupportPrecision
(
op_type
,
backend_
,
mixed_precision_
,
black_list_
))
{
ret
=
true
;
VLOG
(
2
)
<<
out
->
Name
()
<<
" is multi precision op's out, so we skip convert to fp16"
;
break
;
}
}
}
if
(
ret
)
break
;
}
return
ret
;
}
void
ConvertToMixedPrecisionPass
::
ProcessInputNode
(
bool
support_precision
,
ir
::
Node
*
in_node
,
ir
::
Node
*
op_node
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
block_idx
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
in_node
);
if
(
!
NodeVarHasDtype
(
real_node
))
return
;
auto
graph
=
graphes_
[
block_idx
];
bool
is_main_block
=
block_idx
==
0
;
auto
*
in_var
=
real_node
->
Var
();
auto
in_var_type
=
in_var
->
GetDataType
();
auto
prev_type
=
in_var_type
;
bool
is_in_multi_block
=
vars_in_multi_block_map_
.
count
(
in_var
->
Name
());
if
(
!
is_main_block
&&
is_in_multi_block
)
{
in_var_type
=
vars_in_multi_block_map_
.
at
(
in_var
->
Name
()).
first
;
}
if
(
support_precision
)
{
if
(
in_var
->
Persistable
()
&&
in_var_type
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
WeightsShouldNotConvert
(
in_node
))
return
;
in_var
->
SetDataType
(
to_type
);
in_var_type
=
to_type
;
VLOG
(
3
)
<<
" in_node name "
<<
in_var
->
Name
()
<<
" from "
<<
prev_type
<<
" to "
<<
to_type
;
}
else
if
(
!
in_var
->
Persistable
()
&&
IsFloatVarType
(
in_var_type
)
&&
in_var_type
!=
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var_type
,
to_type
,
suffix
,
block_desc
,
&
cast_map_
);
VLOG
(
3
)
<<
" in_node name "
<<
in_var
->
Name
()
<<
"("
<<
prev_type
<<
") to "
<<
cast_map_
[
in_node
]
->
Name
()
<<
"("
<<
to_type
<<
")"
;
}
}
else
{
if
(
!
in_var
->
Persistable
()
&&
IsFloatVarType
(
in_var_type
)
&&
in_var_type
!=
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
in_var_type
,
to_type
,
suffix
,
block_desc
,
&
cast_map_
);
VLOG
(
3
)
<<
" in_node name "
<<
in_var
->
Name
()
<<
"("
<<
prev_type
<<
") to "
<<
cast_map_
[
in_node
]
->
Name
()
<<
"("
<<
to_type
<<
")"
;
}
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
)
:
model_file_
(
model_file
),
params_file_
(
params_file
),
mixed_model_file_
(
mixed_model_file
),
mixed_params_file_
(
mixed_params_file
),
mixed_precision_
(
mixed_precision
),
backend_
(
backend
),
keep_io_types_
(
keep_io_types
),
black_list_
(
black_list
)
{
if
(
mixed_precision_
!=
phi
::
DataType
::
FLOAT16
&&
mixed_precision_
!=
phi
::
DataType
::
BFLOAT16
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only "
"support fp16 and bf16."
,
static_cast
<
int
>
(
mixed_precision_
)));
}
}
void
ConvertToMixedPrecisionPass
::
ProcessOutputNode
(
int
block_idx
,
ir
::
Node
*
var_node
,
framework
::
proto
::
VarType
::
Type
to_type
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
var_node
);
if
(
!
NodeVarHasDtype
(
real_node
))
return
;
auto
*
out_var
=
real_node
->
Var
();
auto
prev_type
=
out_var
->
GetDataType
();
if
(
out_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
OutShouldNotConvert
(
var_node
))
return
;
out_var
->
SetDataType
(
to_type
);
if
(
backend_
!=
phi
::
Backend
::
GPU
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported place %d, we now only "
"support gpu."
,
static_cast
<
int
>
(
backend_
)));
}
VLOG
(
3
)
<<
" out_node name "
<<
var_node
->
Name
()
<<
" from dtype "
<<
prev_type
<<
" to "
<<
out_var
->
GetDataType
();
}
// Just process special cases.
bool
ConvertToMixedPrecisionPass
::
OutShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_node
=
var_node
->
inputs
[
0
];
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm's input and output (variance and mean) are the same.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Output
(
"MeanOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"VarianceOut"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedMean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Output
(
"SavedVariance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
return
false
;
}
void
ConvertToMixedPrecisionPass
::
LoadModel
()
{
framework
::
Executor
exe
{
platform
::
CPUPlace
{}};
bool
ConvertToMixedPrecisionPass
::
WeightsShouldNotConvert
(
ir
::
Node
*
var_node
)
{
auto
op_nodes
=
var_node
->
outputs
;
for
(
auto
*
op_node
:
op_nodes
)
{
auto
*
op_desc
=
op_node
->
Op
();
// batch_norm op's bias, mean, scale and variance just be float32, so we can
// not convert the dtype.
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Mean"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Scale"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"Variance"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
else
if
(
op_desc
->
Type
()
==
"fused_multi_transformer"
)
{
auto
vecs
=
op_desc
->
Input
(
"LnScale"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"LnBias"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"FFNLnScale"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
vecs
=
op_desc
->
Input
(
"FFNLnBias"
);
if
(
std
::
find
(
vecs
.
begin
(),
vecs
.
end
(),
var_node
->
Name
())
!=
vecs
.
end
())
{
return
true
;
}
}
}
return
false
;
}
inline
bool
ConvertToMixedPrecisionPass
::
IsFloatVarType
(
framework
::
proto
::
VarType
::
Type
type
)
{
if
(
type
==
framework
::
proto
::
VarType
::
FP16
||
type
==
framework
::
proto
::
VarType
::
FP32
||
type
==
framework
::
proto
::
VarType
::
BF16
)
return
true
;
return
false
;
}
void
ConvertToMixedPrecisionPass
::
LoadAndPrepare
()
{
program_desc_
=
inference
::
Load
(
&
executor_
,
&
scope_
,
model_file_
,
params_file_
);
auto
program_desc
=
inference
::
Load
(
&
exe
,
&
scope_
,
model_file_
,
params_file_
);
main_graph_
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
new
framework
::
ir
::
Graph
(
*
program_desc_
));
// Remove all control var
IrInferCleanGraphPass
pass
;
Argument
arg
;
arg
.
SetMainGraphNotOwned
(
main_graph_
.
get
());
pass
.
Run
(
&
arg
);
vars_appear_multi_in_one_block_
.
resize
(
program_desc_
->
Size
());
FindVarsInMultiBlock
();
}
void
ConvertToMixedPrecisionPass
::
FindVarsInMultiBlock
()
{
std
::
vector
<
std
::
set
<
std
::
string
>>
block_var_names_set
(
program_desc_
->
Size
());
for
(
size_t
i
=
0
;
i
<
program_desc_
->
Size
();
++
i
)
{
for
(
auto
op
:
program_desc_
->
Block
(
i
).
AllOps
())
{
auto
in_names
=
op
->
InputArgumentNames
();
block_var_names_set
[
i
].
insert
(
in_names
.
begin
(),
in_names
.
end
());
auto
out_names
=
op
->
OutputArgumentNames
();
if
(
op
->
HasAttr
(
"sub_block"
)
==
false
)
{
for
(
auto
&
n
:
out_names
)
{
if
(
block_var_names_set
[
i
].
count
(
n
))
{
vars_appear_multi_in_one_block_
[
i
][
n
].
push_back
(
op
->
Type
());
}
}
}
block_var_names_set
[
i
].
insert
(
out_names
.
begin
(),
out_names
.
end
());
}
}
for
(
size_t
i
=
0
;
i
<
program_desc_
->
Size
()
-
1
;
++
i
)
{
for
(
size_t
j
=
i
+
1
;
j
<
program_desc_
->
Size
();
++
j
)
{
std
::
set
<
std
::
string
>
vars_in_multi_block
;
std
::
set_intersection
(
block_var_names_set
[
i
].
begin
(),
block_var_names_set
[
i
].
end
(),
block_var_names_set
[
j
].
begin
(),
block_var_names_set
[
j
].
end
(),
std
::
inserter
(
vars_in_multi_block
,
vars_in_multi_block
.
begin
()));
for
(
auto
name
:
vars_in_multi_block
)
{
vars_in_multi_block_map_
.
emplace
(
name
,
std
::
make_pair
(
framework
::
proto
::
VarType
::
FP32
,
i
));
}
}
}
}
void
ConvertToMixedPrecisionPass
::
ConvertAllFp64ToFp32
(
framework
::
ir
::
Graph
*
graph
)
{
auto
op_nodes
=
framework
::
ir
::
TopologySortOperations
(
*
graph
);
for
(
auto
*
op_node
:
op_nodes
)
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
||
op_type
==
"fetch"
)
continue
;
if
(
op_type
==
"fill_constant"
)
{
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
}
else
if
(
op_type
==
"assign_value"
)
{
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
}
else
if
(
op_type
==
"eye"
)
{
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
}
else
if
(
op_type
==
"fill_any_like"
)
{
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
}
else
if
(
op_type
==
"cast"
)
{
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"in_dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"in_dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
if
(
PADDLE_GET_CONST
(
int
,
op_node
->
Op
()
->
GetAttr
(
"out_dtype"
))
==
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP64
))
op_node
->
Op
()
->
SetAttr
(
"out_dtype"
,
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
}
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
!
in_var
->
Persistable
()
&&
in_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP64
)
{
in_var
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
}
new
framework
::
ir
::
Graph
(
*
program_desc
));
main_graph_
->
SetNotOwned
(
framework
::
ir
::
kParamScopeAttr
,
&
scope_
);
}
void
ConvertToMixedPrecisionPass
::
Run
()
{
Load
AndPrepare
();
Load
Model
();
for
(
size_t
i
=
0
;
i
<
main_graph_
->
SubGraphsSize
();
++
i
)
{
auto
graph
=
main_graph_
->
GetSubGraph
(
i
);
graphes_
.
push_back
(
graph
);
VLOG
(
2
)
<<
" -------- handle subgraph "
<<
i
<<
", has "
<<
graph
->
Nodes
().
size
()
<<
" nodes --------"
;
framework
::
ir
::
AutoMixedPrecisionPass
pass
;
pass
.
Set
(
"mixed_precision_mode"
,
new
int
{
static_cast
<
int
>
(
mixed_precision_
)});
pass
.
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
{
black_list_
});
pass
.
Set
(
"enable_gpu_mixed"
,
new
bool
{
true
});
pass
.
Set
(
"keep_io_types"
,
new
bool
{
keep_io_types_
});
ConvertAllFp64ToFp32
(
graph
);
ConvertTensorDtype
(
i
);
FixCastAttr
(
graph
);
// A trick
PatchForStrangeOp
();
CHECK_EQ
(
ir
::
VarDescIsConsistency
(
*
graph
),
true
);
}
pass
.
Apply
(
main_graph_
.
get
());
SaveMixedModel
();
}
void
ConvertToMixedPrecisionPass
::
ConvertTensorDtype
(
int
block_idx
)
{
auto
graph
=
graphes_
[
block_idx
];
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
mixed_precision_
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
mixed_precision_
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only "
"support fp16 and bf16."
,
static_cast
<
int
>
(
mixed_precision_
)));
}
auto
op_nodes
=
framework
::
ir
::
TopologySortOperations
(
*
graph
);
auto
*
block_desc
=
op_nodes
[
0
]
->
Op
()
->
Block
();
int
num_low_precision
=
0
;
std
::
vector
<
framework
::
ir
::
Node
*>
output_nodes
;
for
(
auto
*
op_node
:
op_nodes
)
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
VLOG
(
3
)
<<
"-------------------- op_type "
<<
op_type
<<
", phi_type "
<<
phi
::
TransToPhiKernelName
(
op_type
);
// 1. set input dtype.
if
(
op_type
==
"feed"
)
{
auto
feed_var
=
op_node
->
outputs
[
0
]
->
Var
();
if
(
!
keep_io_types_
&&
feed_var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
feed_var
->
SetDataType
(
to_type
);
}
}
else
if
(
op_type
==
"fetch"
)
{
auto
*
fetch_var
=
op_node
->
inputs
[
0
];
output_nodes
.
push_back
(
fetch_var
);
continue
;
}
else
if
(
op_type
==
"cast"
)
{
continue
;
}
else
if
(
op_node
->
Op
()
->
HasAttr
(
"sub_block"
))
{
// NOLINT
// sub_block op's output dtype should be same as input dtype, if have the
// same name.
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
in_name_to_node
;
for
(
auto
*
in
:
op_node
->
inputs
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
in
);
if
(
NodeVarHasDtype
(
real_node
))
{
in_name_to_node
[
in
->
Name
()]
=
in
;
}
}
for
(
auto
out
:
op_node
->
outputs
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
out
);
if
(
NodeVarHasDtype
(
real_node
))
{
if
(
in_name_to_node
.
count
(
out
->
Name
()))
real_node
->
Var
()
->
SetDataType
(
in_name_to_node
[
out
->
Name
()]
->
Var
()
->
GetDataType
());
}
}
continue
;
}
// 2. if op support fp16/bf16 and not in blacklist.
// - cast weight to fp16/bf16.
// - add cast op if the input dtype is not fp16/bf16.
// - set output dtype.
//
// If a var(op's out var) appears multiple times in a block, we should not
// convert to fp16.
else
if
(
black_list_
.
count
(
op_type
)
==
0
&&
// NOLINT
!
VarIsMultiPrecisionOpsOut
(
block_idx
,
op_node
))
{
bool
support_precision
=
OpSupportPrecision
(
op_type
,
backend_
,
mixed_precision_
,
black_list_
);
// if op not has float input, we will not choose the low precision kernel.
{
bool
has_float_input
{
false
};
for
(
auto
in_node
:
op_node
->
inputs
)
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
in_node
);
if
(
real_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP16
||
real_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP32
||
real_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP64
||
real_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
BF16
)
{
has_float_input
=
true
;
break
;
}
}
if
(
!
has_float_input
)
{
support_precision
=
false
;
VLOG
(
2
)
<<
" op doesn't has float input, just skip."
;
}
}
VLOG
(
2
)
<<
" support low precision "
<<
support_precision
;
if
(
support_precision
)
{
VLOG
(
2
)
<<
" process input nodes:"
;
++
num_low_precision
;
auto
inputs
=
op_node
->
inputs
;
// Just for paddle's terriable case: op's input and output has the same
// name.
std
::
unordered_map
<
std
::
string
,
std
::
string
>
names_map
;
for
(
auto
out_node
:
op_node
->
outputs
)
{
for
(
auto
in_node
:
op_node
->
inputs
)
{
if
(
out_node
->
Name
()
==
in_node
->
Name
())
{
names_map
[
out_node
->
Name
()]
=
in_node
->
Name
();
}
}
}
// Process inputs.
for
(
auto
*
in_node
:
inputs
)
{
ProcessInputNode
(
true
,
in_node
,
op_node
,
&
suffix_
,
block_desc
,
to_type
,
block_idx
);
if
(
names_map
.
count
(
in_node
->
Name
())
&&
cast_map_
.
count
(
in_node
))
{
names_map
[
in_node
->
Name
()]
=
cast_map_
[
in_node
]
->
Name
();
}
}
VLOG
(
2
)
<<
" process output nodes:"
;
// Process outputs.
for
(
auto
*
out_node
:
op_node
->
outputs
)
{
ProcessOutputNode
(
block_idx
,
out_node
,
to_type
);
}
}
else
{
auto
inputs
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
inputs
)
{
ProcessInputNode
(
false
,
in_node
,
op_node
,
&
suffix_
,
block_desc
,
framework
::
proto
::
VarType
::
FP32
,
block_idx
);
}
}
}
// 3. check op not support fp16/bf16 or in blacklist.
// - add cast op if the input dtype is not fp32.
else
{
// NOLINT
VLOG
(
3
)
<<
"not to run fp16 op_type: "
<<
op_type
;
auto
ins
=
op_node
->
inputs
;
for
(
auto
*
in_node
:
ins
)
{
auto
*
in_var
=
in_node
->
Var
();
if
(
in_var
->
GetDataType
()
==
to_type
)
{
AddCastOp
(
graph
,
in_node
,
op_node
,
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix_
,
block_desc
,
&
cast_map_
);
VLOG
(
3
)
<<
"-- "
<<
in_node
->
Name
()
<<
"("
<<
to_type
<<
") to "
<<
cast_map_
[
in_node
]
->
Name
()
<<
"("
<<
framework
::
proto
::
VarType
::
FP32
<<
")"
;
}
}
}
}
// 4. if output_op's dtype is not compatible to output dtype, then just
// insert cast.
for
(
auto
*
node
:
output_nodes
)
{
ir
::
Node
*
fetch_op
{
nullptr
};
for
(
auto
*
op_node
:
node
->
outputs
)
{
if
(
op_node
->
IsOp
()
&&
op_node
->
Op
()
->
Type
()
==
"fetch"
)
{
fetch_op
=
op_node
;
}
}
CHECK_NOTNULL
(
fetch_op
);
auto
var
=
node
->
Var
();
if
(
keep_io_types_
&&
var
->
GetDataType
()
==
to_type
)
{
// fp16/bf16 -> fp32.
AddCastOp
(
graph
,
node
,
fetch_op
,
to_type
,
framework
::
proto
::
VarType
::
FP32
,
&
suffix_
,
block_desc
,
&
cast_map_
);
}
else
if
(
!
keep_io_types_
&&
var
->
GetDataType
()
==
framework
::
proto
::
VarType
::
FP32
)
{
// fp32 -> fp16/bf16
AddCastOp
(
graph
,
node
,
fetch_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix_
,
block_desc
,
&
cast_map_
);
}
}
for
(
auto
node
:
graph
->
Nodes
())
{
auto
*
real_node
=
GetRealNode
(
block_idx
,
node
);
if
(
!
NodeVarHasDtype
(
real_node
))
continue
;
if
(
vars_in_multi_block_map_
.
count
(
real_node
->
Name
())
&&
vars_in_multi_block_map_
.
at
(
real_node
->
Name
()).
second
==
block_idx
)
{
vars_in_multi_block_map_
.
at
(
real_node
->
Name
()).
first
=
real_node
->
Var
()
->
GetDataType
();
}
}
if
(
num_low_precision
)
LOG
(
INFO
)
<<
"--- detected "
<<
num_low_precision
<<
" low precision ops in "
<<
block_idx
<<
" subgraph"
;
}
// We modify op's input output precision, and we need to fix cast op in_dtype
// and out_dtype attribute.
void
ConvertToMixedPrecisionPass
::
FixCastAttr
(
framework
::
ir
::
Graph
*
graph
)
{
auto
op_nodes
=
framework
::
ir
::
TopologySortOperations
(
*
graph
);
for
(
auto
*
op_node
:
op_nodes
)
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
if
(
op_type
!=
"cast"
)
continue
;
auto
input
=
op_node
->
inputs
[
0
];
auto
output
=
op_node
->
outputs
[
0
];
op_node
->
Op
()
->
SetAttr
(
"in_dtype"
,
static_cast
<
int
>
(
input
->
Var
()
->
GetDataType
()));
op_node
->
Op
()
->
SetAttr
(
"out_dtype"
,
static_cast
<
int
>
(
output
->
Var
()
->
GetDataType
()));
}
}
void
ConvertToMixedPrecisionPass
::
SaveMixedModel
()
{
framework
::
ProgramDesc
mixed_program_desc
;
framework
::
ir
::
GraphToProgram
(
*
main_graph_
,
&
mixed_program_desc
);
paddle
::
CPUPlace
place
;
auto
parameters
=
scope_
.
LocalVarNames
();
std
::
sort
(
parameters
.
begin
(),
parameters
.
end
());
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
main_graph_
->
Nodes
())
{
if
(
!
(
node
->
IsVar
()))
continue
;
if
(
NodeVarHasDtype
(
node
))
{
if
(
node
->
Var
()
->
Persistable
()
&&
node
->
Var
()
->
GetDataType
()
==
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
VLOG
(
2
)
<<
"weights keep to fp32: "
<<
node
->
Name
();
weights_should_be_fp32
.
insert
(
node
->
Name
());
}
}
}
#define CONVERT_TENSOR_DTYPE(DTYPE, dtype) \
mixed_tensor.set_type(DTYPE); \
auto* mixed_data = mixed_tensor.mutable_data<dtype>(platform::CPUPlace()); \
for (int i = 0; i < t->numel(); i++) { \
mixed_data[i] = static_cast<dtype>(data[i]); \
} \
t->clear(); \
paddle::framework::TensorCopySync(mixed_tensor, place, t)
for
(
const
auto
&
param_name
:
parameters
)
{
auto
*
var
=
scope_
.
FindLocalVar
(
param_name
);
if
(
var
->
IsType
<
phi
::
DenseTensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
phi
::
DenseTensor
>
();
if
(
t
->
dtype
()
!=
phi
::
DataType
::
FLOAT32
)
continue
;
phi
::
DenseTensor
mixed_tensor
;
mixed_tensor
.
Resize
(
t
->
dims
());
auto
*
data
=
t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
if
(
mixed_precision_
==
phi
::
DataType
::
FLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
CONVERT_TENSOR_DTYPE
(
paddle
::
experimental
::
DataType
::
FLOAT16
,
phi
::
dtype
::
float16
);
}
else
if
(
mixed_precision_
==
phi
::
DataType
::
BFLOAT16
&&
!
weights_should_be_fp32
.
count
(
param_name
))
{
CONVERT_TENSOR_DTYPE
(
paddle
::
experimental
::
DataType
::
BFLOAT16
,
phi
::
dtype
::
bfloat16
);
}
}
}
#undef CONVERT_TENSOR_DTYPE
auto
SerializeParams
=
[
&
]()
->
std
::
string
{
std
::
ostringstream
os
;
phi
::
CPUContext
ctx
;
for
(
const
auto
&
param
:
parameters
)
{
VLOG
(
3
)
<<
"Serialize param: "
<<
param
;
PADDLE_ENFORCE_NOT_NULL
(
scope_
.
FindVar
(
param
),
platform
::
errors
::
NotFound
(
"Block should already have a '%s' variable"
,
param
));
auto
*
tensor
=
scope_
.
FindVar
(
param
)
->
GetMutable
<
framework
::
LoD
Tensor
>
();
auto
*
tensor
=
scope_
.
FindVar
(
param
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
framework
::
SerializeToStream
(
os
,
*
tensor
,
ctx
);
}
return
os
.
str
();
...
...
@@ -831,96 +112,42 @@ void ConvertToMixedPrecisionPass::SaveMixedModel() {
StrToBinary
(
mixed_params_file_
,
SerializeParams
());
}
void
ConvertToMixedPrecisionPass
::
PatchForStrangeOp
()
{
for
(
auto
*
graph
:
graphes_
)
{
for
(
auto
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
op_node
->
Name
()
==
"fused_multi_transformer"
)
{
auto
cache_kv_inputs
=
op_node
->
Op
()
->
Input
(
"CacheKV"
);
auto
cache_kv_outputs
=
op_node
->
Op
()
->
Output
(
"CacheKVOut"
);
CHECK_EQ
(
cache_kv_inputs
.
size
(),
cache_kv_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
cache_kv_inputs
.
size
();
++
i
)
{
op_node
->
Op
()
->
RenameOutput
(
cache_kv_outputs
[
i
],
cache_kv_inputs
[
i
]);
}
}
}
}
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
)
{
return
framework
::
ir
::
OpSupportPrecision
(
op_type
,
backend
,
precision
,
black_list
);
}
}
// namespace
void
Add
CastOp
(
void
Insert
CastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
ir
::
Node
*
var_
node
,
framework
::
ir
::
Node
*
op_node
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
)
{
auto
update_cast_desc
=
[
&
](
framework
::
OpDesc
&
desc
,
const
std
::
string
&
x_name
,
const
std
::
string
&
out_name
,
const
int
in_dtype
,
const
int
out_dtype
)
{
desc
.
SetType
(
"cast"
);
desc
.
SetInput
(
"X"
,
{
x_name
});
desc
.
SetOutput
(
"Out"
,
{
out_name
});
desc
.
SetAttr
(
"in_dtype"
,
in_dtype
);
desc
.
SetAttr
(
"out_dtype"
,
out_dtype
);
desc
.
SetAttr
(
"use_mkldnn"
,
false
);
desc
.
SetAttr
(
"with_quant_attr"
,
false
);
desc
.
Flush
();
};
if
(
map
->
count
(
node
)
==
0
)
{
// insert cast op before node.
std
::
string
cast_input_name
=
node
->
Var
()
->
Name
();
std
::
string
cast_output_name
=
node
->
Var
()
->
Name
()
+
"_cast.tmp_"
+
std
::
to_string
((
*
suffix
)
++
);
CHECK_NOTNULL
(
block_desc
);
framework
::
OpDesc
cast_op_desc
(
block_desc
);
update_cast_desc
(
cast_op_desc
,
cast_input_name
,
cast_output_name
,
static_cast
<
int
>
(
from_type
),
static_cast
<
int
>
(
to_type
));
auto
*
cast_op_node
=
graph
->
CreateOpNode
(
&
cast_op_desc
);
auto
*
cast_output_vardesc
=
block_desc
->
Var
(
cast_output_name
);
cast_output_vardesc
->
SetPersistable
(
false
);
cast_output_vardesc
->
SetDataType
(
to_type
);
cast_output_vardesc
->
SetShape
(
node
->
Var
()
->
GetShape
());
auto
*
cast_output_node
=
graph
->
CreateVarNode
(
cast_output_vardesc
);
IR_NODE_LINK_TO
(
cast_op_node
,
cast_output_node
);
(
*
map
)[
node
]
=
cast_output_node
;
}
next_op
->
Op
()
->
Rename
(
node
->
Name
(),
map
->
at
(
node
)
->
Name
());
IR_NODE_LINK_TO
(
node
,
map
->
at
(
node
)
->
inputs
[
0
]);
IR_NODE_LINK_TO
(
map
->
at
(
node
),
next_op
);
}
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
)
{
auto
phi_op_type
=
phi
::
TransToPhiKernelName
(
op_type
);
bool
support_precision
=
false
;
if
(
blacklist
.
count
(
op_type
)
==
0
)
{
if
(
backend
==
phi
::
Backend
::
GPU
)
support_precision
=
GpuKernelSupportPrecision
(
op_type
,
precision
);
else
support_precision
=
PhiKernelSupportPrecision
(
phi_op_type
,
backend
,
precision
);
}
return
support_precision
;
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
std
::
unordered_set
<
std
::
string
>
black_list
)
{
int
*
suffix
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
visited
)
{
framework
::
ir
::
DoInsertCastOp
(
graph
,
var_node
,
op_node
,
from_type
,
to_type
,
block_desc
,
suffix
,
visited
);
}
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
)
{
ConvertToMixedPrecisionPass
pass
(
model_file
,
params_file
,
mixed_model_file
,
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h
View file @
dbe08e9b
...
...
@@ -15,14 +15,12 @@
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
...
...
@@ -30,20 +28,52 @@ namespace paddle {
namespace
inference
{
namespace
analysis
{
class
ConvertToMixedPrecisionPass
{
public:
explicit
ConvertToMixedPrecisionPass
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
const
std
::
string
&
mixed_model_file
,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
Run
();
private:
void
LoadModel
();
void
SaveMixedModel
();
private:
std
::
string
model_file_
;
std
::
string
params_file_
;
std
::
string
mixed_model_file_
;
std
::
string
mixed_params_file_
;
phi
::
DataType
mixed_precision_
;
phi
::
Backend
backend_
;
bool
keep_io_types_
;
std
::
unordered_set
<
std
::
string
>
black_list_
;
framework
::
Scope
scope_
;
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
main_graph_
{
nullptr
};
};
bool
OpSupportPrecision
(
const
std
::
string
&
op_type
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>&
blacklist
);
const
std
::
unordered_set
<
std
::
string
>&
black
_
list
);
void
Add
CastOp
(
void
Insert
CastOp
(
framework
::
ir
::
Graph
*
graph
,
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Node
*
next_op
,
framework
::
ir
::
Node
*
var_
node
,
framework
::
ir
::
Node
*
op_node
,
framework
::
proto
::
VarType
::
Type
from_type
,
framework
::
proto
::
VarType
::
Type
to_type
,
int
*
suffix
,
framework
::
BlockDesc
*
block_desc
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
map
);
int
*
suffix
,
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>*
visited
);
void
ConvertToMixedPrecision
(
const
std
::
string
&
model_file
,
const
std
::
string
&
params_file
,
...
...
@@ -51,8 +81,8 @@ void ConvertToMixedPrecision(const std::string& model_file,
const
std
::
string
&
mixed_params_file
,
phi
::
DataType
mixed_precision
,
phi
::
Backend
backend
,
bool
keep_io_types
=
true
,
std
::
unordered_set
<
std
::
string
>
black_list
=
{}
);
bool
keep_io_types
,
const
std
::
unordered_set
<
std
::
string
>
&
black_list
);
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
View file @
dbe08e9b
...
...
@@ -40,7 +40,7 @@ void InferenceOpReplacePass::RunImpl(Argument* argument) {
}
std
::
string
InferenceOpReplacePass
::
repr
()
const
{
return
"inference
-
op
-
replace
-
pass"
;
return
"inference
_
op
_
replace
_
pass"
;
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
View file @
dbe08e9b
...
...
@@ -105,7 +105,7 @@ void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
framework
::
ir
::
kFuseStatisAttr
));
}
std
::
string
IrAnalysisPass
::
repr
()
const
{
return
"ir
-
analysis
-
pass"
;
}
std
::
string
IrAnalysisPass
::
repr
()
const
{
return
"ir
_
analysis
_
pass"
;
}
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
View file @
dbe08e9b
...
...
@@ -64,7 +64,8 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
"set."
));
}
auto
graph
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
auto
graph
=
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
(
new
framework
::
ir
::
Graph
(
argument
->
main_program
()));
argument
->
SetMainGraph
(
graph
.
release
());
auto
*
scope_ptr
=
argument
->
scope_ptr
();
PADDLE_ENFORCE_NOT_NULL
(
scope_ptr
,
...
...
@@ -125,7 +126,7 @@ std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
}
}
std
::
string
IrGraphBuildPass
::
repr
()
const
{
return
"ir
-
graph
-
build
-
pass"
;
}
std
::
string
IrGraphBuildPass
::
repr
()
const
{
return
"ir
_
graph
_
build
_
pass"
;
}
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
View file @
dbe08e9b
...
...
@@ -31,7 +31,7 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) {
new
int
(
argument
->
memory_optim_sort_kind
()));
}
std
::
unique_ptr
<
Graph
>
graph
(
argument
->
main_graph_ptr
());
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
(
argument
->
main_graph_ptr
());
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
...
...
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
View file @
dbe08e9b
...
...
@@ -28,7 +28,7 @@ class IrGraphToProgramPass : public AnalysisPass {
public:
void
RunImpl
(
Argument
*
argument
)
override
;
std
::
string
repr
()
const
override
{
return
"ir
-
graph
-
to
-
param
-
pass"
;
}
std
::
string
repr
()
const
override
{
return
"ir
_
graph
_
to
_
param
_
pass"
;
}
};
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
View file @
dbe08e9b
...
...
@@ -169,7 +169,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
}
std
::
string
IrParamsSyncAmongDevicesPass
::
repr
()
const
{
return
"ir
-
params
-
sync
-
among
-
devices
-
pass"
;
return
"ir
_
params
_
sync
_
among
_
devices
_
pass"
;
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
View file @
dbe08e9b
...
...
@@ -295,7 +295,7 @@ void UpdateOpDescsByReuse(
}
}
std
::
string
MemoryOptimizePass
::
repr
()
const
{
return
"memory
optimize
pass"
;
}
std
::
string
MemoryOptimizePass
::
repr
()
const
{
return
"memory
_
optimize
_
pass"
;
}
void
MemoryOptimizePass
::
RunImpl
(
Argument
*
argument
)
{
// Memory optimization.
...
...
paddle/fluid/inference/analysis/passes/passes.cc
View file @
dbe08e9b
...
...
@@ -18,7 +18,6 @@
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
...
...
@@ -34,8 +33,6 @@ PassRegistry::PassRegistry() {
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisPass
));
passes_
.
emplace
(
"ir_graph_build_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrGraphBuildPass
));
passes_
.
emplace
(
"ir_graph_clean_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrInferCleanGraphPass
));
passes_
.
emplace
(
"memory_optimize_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
MemoryOptimizePass
));
passes_
.
emplace
(
...
...
paddle/fluid/inference/api/analysis_config.cc
View file @
dbe08e9b
...
...
@@ -85,15 +85,29 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
Update
();
}
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
int
device_id
,
Precision
precision_mode
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
gpu_device_id_
=
device_id
;
mixed_precision_mode_
=
precision_mode
;
if
(
precision_mode
==
Precision
::
kFloat32
)
{
// default
}
else
if
(
precision_mode
==
Precision
::
kHalf
||
precision_mode
==
Precision
::
kBf16
)
{
enable_gpu_mixed_
=
true
;
}
else
{
LOG
(
ERROR
)
<<
"The Paddle-GPU inference currently only supports "
"float32/float16/bfloat16 precision. Please check the parameters "
"you specified in EnableUseGpu or enable_use_gpu function."
;
}
#else
LOG
(
ERROR
)
<<
"Please
compile with gpu to EnableGpu()
"
;
LOG
(
ERROR
)
<<
"Please
use PaddlePaddle with GPU version.
"
;
use_gpu_
=
false
;
#endif
...
...
@@ -279,7 +293,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
if
(
ipu_config_mapper_
.
find
(
key
)
==
ipu_config_mapper_
.
end
())
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"invalid key
{}
in IPU config"
,
key
));
"invalid key
%s
in IPU config
:
"
,
key
));
}
switch
(
ipu_config_mapper_
.
at
(
key
))
{
case
ipu_config_code
::
ipu_device_num
:
...
...
@@ -315,7 +329,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"invalid key
{}
in IPU config"
,
key
));
"invalid key
%s
in IPU config"
,
key
));
break
;
}
}
...
...
@@ -372,8 +386,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// Mixed related.
// Mixed
precision
related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_gpu_mixed_
);
CP_MEMBER
(
mixed_precision_mode_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
...
...
@@ -740,13 +756,7 @@ void AnalysisConfig::Update() {
((
use_custom_device
()
^
pass_builder_
->
use_custom_device
())))
{
if
(
use_gpu
())
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
if
(
use_tensorrt_
)
{
// Append after the Affine_channel_conv_fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
else
if
(
use_ipu
())
{
VLOG
(
1
)
<<
"IpuPassStrategy has been used for new."
;
pass_builder_
.
reset
(
new
IpuPassStrategy
);
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
...
...
@@ -946,9 +956,6 @@ void AnalysisConfig::Update() {
"but did not have the option -DWITH_CUSTOM_DEVICE compiled."
));
#endif
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
}
std
::
string
AnalysisConfig
::
SerializeInfoCache
()
{
...
...
@@ -960,6 +967,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
calibration_file_path_
;
ss
<<
use_gpu_
;
ss
<<
enable_gpu_mixed_
;
ss
<<
use_external_stream_
;
ss
<<
exec_stream_
;
ss
<<
use_fc_padding_
;
...
...
@@ -1167,6 +1175,7 @@ std::string AnalysisConfig::Summary() {
os
.
InsertRow
({
"use_gpu"
,
use_gpu_
?
"true"
:
"false"
});
if
(
use_gpu_
)
{
os
.
InsertRow
({
"gpu_device_id"
,
std
::
to_string
(
gpu_device_id_
)});
os
.
InsertRow
({
"enable_gpu_mixed"
,
std
::
to_string
(
enable_gpu_mixed_
)});
os
.
InsertRow
({
"memory_pool_init_size"
,
std
::
to_string
(
memory_pool_init_size_mb_
)
+
"MB"
});
os
.
InsertRow
(
...
...
@@ -1360,7 +1369,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() {
return
trt_allow_build_at_runtime_
;
}
void
AnalysisConfig
::
Exp_
SetBlackListOpsForMixedModel
(
void
AnalysisConfig
::
Exp_
DisableMixedPrecisionOps
(
const
std
::
unordered_set
<
std
::
string
>
&
black_list
)
{
mixed_black_list_
=
black_list
;
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
View file @
dbe08e9b
...
...
@@ -1065,7 +1065,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetUseGPU
(
config_
.
use_gpu
());
argument_
.
SetUseFcPadding
(
config_
.
use_fc_padding
());
argument_
.
SetGPUDeviceId
(
config_
.
gpu_device_id
());
argument_
.
SetEnable
Analysis
Optim
(
config_
.
enable_ir_optim_
);
argument_
.
SetEnable
Ir
Optim
(
config_
.
enable_ir_optim_
);
argument_
.
SetEnableMemoryOptim
(
config_
.
enable_memory_optim
());
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
...
...
@@ -1210,53 +1210,57 @@ void AnalysisPredictor::PrepareArgument() {
}
#endif
auto
pass
es
=
config_
.
pass_builder
()
->
AllPasses
()
;
auto
*
pass
_builder
=
config_
.
pass_builder
();
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
LOG
(
INFO
)
<<
"Model is mixed precision type with "
<<
model_precision_
<<
", we will use a new PassStrategy. Note that only the GPU "
"backend is supported for now."
;
passes
.
clear
();
pass_builder
->
ClearPasses
();
const
auto
&
deleted_passes
=
pass_builder
->
GetAllDeletedPasses
();
if
(
config_
.
tensorrt_engine_enabled
())
{
for
(
const
auto
&
pass
:
kTrtLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
if
(
deleted_passes
.
count
(
pass
))
continue
;
pass_builder
->
AppendPass
(
pass
);
}
}
else
if
(
config_
.
use_gpu
())
{
for
(
const
auto
&
pass
:
kGpuLowerPrecisionPasses
)
{
passes
.
push_back
(
pass
);
if
(
deleted_passes
.
count
(
pass
))
continue
;
pass_builder
->
AppendPass
(
pass
);
}
}
}
const
auto
&
deleted_passes
=
config_
.
pass_builder
()
->
GetAllDeletedPasses
();
for
(
const
auto
&
it
:
deleted_passes
)
{
auto
iterator
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
it
);
if
(
iterator
!=
passes
.
end
())
{
passes
.
erase
(
iterator
);
}
if
(
!
config_
.
ir_optim
())
{
argument_
.
SetEnableIrOptim
(
false
);
if
(
config_
.
enable_gpu_mixed_
)
{
argument_
.
SetEnableIrOptim
(
true
);
pass_builder
->
ClearPasses
();
pass_builder
->
AppendPass
(
"auto_mixed_precision_pass"
);
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode with no ir "
"optimization."
;
}
else
{
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed."
;
}
}
else
{
if
(
config_
.
ir_debug_
)
{
auto
it
=
std
::
begin
(
passes
);
while
(
it
!=
std
::
end
(
passes
))
{
if
(
*
it
!=
"graph_viz_pass"
)
{
it
=
passes
.
insert
(
it
+
1
,
"graph_viz_pass"
);
}
else
{
++
it
;
}
}
pass_builder
->
TurnOnDebug
();
}
if
(
config_
.
enable_gpu_mixed_
)
{
LOG
(
INFO
)
<<
"This model run in Paddle-GPU mixed precision mode."
;
}
}
if
(
!
config_
.
ir_optim
())
{
passes
.
clear
();
LOG
(
INFO
)
<<
"ir_optim is turned off, no IR pass will be executed"
;
}
argument_
.
SetDisableLogs
(
config_
.
glog_info_disabled
());
argument_
.
SetIrAnalysisPasses
(
pass
es
);
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetIrAnalysisPasses
(
pass
_builder
->
AllPasses
()
);
argument_
.
SetAnalysisPasses
(
pass_builder
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
argument_
.
SetEnableGPUMixed
(
config_
.
enable_gpu_mixed_
);
argument_
.
SetMixedPrecisionMode
(
static_cast
<
int
>
(
paddle
::
ConvertPrecision
(
config_
.
mixed_precision_mode_
)));
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
@@ -2107,7 +2111,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
}
x
->
predictor_stream_
=
stream
;
x
->
Init
(
scope_
,
inference_program_
);
#ifdef PADDLE_WITH_TENSORRT
x
->
executor_
->
ResetTrtOps
(
++
AnalysisPredictor
::
clone_num_
);
#endif
return
std
::
unique_ptr
<
PaddlePredictor
>
(
x
);
}
...
...
paddle/fluid/inference/api/demo_ci/.gitignore
0 → 100644
View file @
dbe08e9b
data
paddle/fluid/inference/api/mkldnn_quantizer.cc
View file @
dbe08e9b
...
...
@@ -604,10 +604,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
if
(
predictor_
.
config_
.
ir_debug_
)
builder
->
TurnOnDebug
();
auto
passes
=
builder
->
AllPasses
();
predictor_
.
argument_
.
SetIrAnalysisPasses
(
passes
);
predictor_
.
argument_
.
SetAnalysisPasses
({
"ir_graph_clean_pass"
,
"ir_analysis_pass"
,
"memory_optimize_pass"
,
"ir_graph_to_program_pass"
});
predictor_
.
argument_
.
SetAnalysisPasses
(
{
"ir_analysis_pass"
,
"memory_optimize_pass"
,
"ir_graph_to_program_pass"
});
predictor_
.
argument_
.
SetQuantVarScales
(
scales_
);
}
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
View file @
dbe08e9b
...
...
@@ -247,8 +247,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
/// \param memory_pool_init_size_mb initial size of the GPU memory pool in MB.
/// \param device_id device_id the GPU card to use (default is 0).
/// \param precision the precision used in Paddle-GPU inference.
///
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
);
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
,
Precision
precision_mode
=
Precision
::
kFloat32
);
///
/// \brief Turn off GPU.
///
...
...
@@ -967,7 +971,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist.
///
void
Exp_
SetBlackListOpsForMixedModel
(
void
Exp_
DisableMixedPrecisionOps
(
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
void
SetApplyOptim
(
bool
value
)
{
apply_optim_
=
value
;
}
...
...
@@ -987,13 +991,15 @@ struct PD_INFER_DECL AnalysisConfig {
mutable
std
::
string
params_file_
;
mutable
std
::
string
calibration_file_path_
;
// Mixed precision.
// Mixed precision related.
Precision
mixed_precision_mode_
{
Precision
::
kFloat32
};
std
::
unordered_set
<
std
::
string
>
mixed_black_list_
;
// GPU related.
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
enable_gpu_mixed_
{
false
};
bool
thread_local_stream_
{
false
};
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
View file @
dbe08e9b
...
...
@@ -171,8 +171,9 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
"multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass"
,
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
"gpu_cpu_map_matmul_to_mul_pass"
,
"fc_fuse_pass"
,
"fc_elementwise_layernorm_fuse_pass"
,
//
"fc_elementwise_layernorm_fuse_pass",
"embedding_eltwise_layernorm_fuse_pass"
,
"runtime_context_cache_pass"
,
};
...
...
@@ -227,9 +228,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"constant_folding_pass"
,
"constant_folding_pass"
,
//
// following pass should be located in the last, since it will
// work on all fused ops.
"auto_mixed_precision_pass"
,
//
"runtime_context_cache_pass"
});
...
...
Prev
1
2
3
4
5
6
7
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment