Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
dbe08e9b
Commit
dbe08e9b
authored
Jun 12, 2023
by
yuguo960516yuguo
Browse files
2.4.2
parent
b5499578
Changes
302
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5317 additions
and
3962 deletions
+5317
-3962
python/paddle/fluid/.gitignore
python/paddle/fluid/.gitignore
+2
-0
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
...slim/tests/test_post_training_quantization_mobilenetv1.py
+232
-155
python/paddle/fluid/contrib/sparsity/supported_layer_list.py
python/paddle/fluid/contrib/sparsity/supported_layer_list.py
+33
-22
python/paddle/fluid/contrib/sparsity/utils.py
python/paddle/fluid/contrib/sparsity/utils.py
+92
-61
python/paddle/fluid/core.py
python/paddle/fluid/core.py
+81
-43
python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py
...paddle/fluid/dygraph/dygraph_to_static/partial_program.py
+288
-161
python/paddle/fluid/dygraph/layers.py
python/paddle/fluid/dygraph/layers.py
+329
-176
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+34
-11
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+116
-90
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+3790
-3027
python/paddle/fluid/tests/.gitignore
python/paddle/fluid/tests/.gitignore
+4
-0
python/paddle/fluid/tests/book/.gitignore
python/paddle/fluid/tests/book/.gitignore
+1
-0
python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
...le/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
+16
-11
python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
...ddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
+21
-15
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+1
-1
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
...paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+138
-92
python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
...ittests/dygraph_to_static/test_backward_without_params.py
+45
-0
python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py
...d/tests/unittests/dygraph_to_static/test_for_enumerate.py
+14
-38
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py
...sts/unittests/ir/inference/test_trt_convert_activation.py
+36
-25
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
...ittests/ir/inference/test_trt_convert_anchor_generator.py
+44
-34
No files found.
python/paddle/fluid/.gitignore
0 → 100644
View file @
dbe08e9b
proto
core.so
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
View file @
dbe08e9b
...
...
@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate):
return
img
,
sample
[
1
]
def
_reader_creator
(
file_list
,
def
_reader_creator
(
file_list
,
mode
,
shuffle
=
False
,
color_jitter
=
False
,
rotate
=
False
,
data_dir
=
DATA_DIR
):
data_dir
=
DATA_DIR
,
):
def
reader
():
with
open
(
file_list
)
as
flist
:
full_lines
=
[
line
.
strip
()
for
line
in
flist
]
...
...
@@ -98,10 +99,9 @@ def _reader_creator(file_list,
continue
yield
img_path
,
int
(
label
)
mapper
=
functools
.
partial
(
process_image
,
mode
=
mode
,
color_jitter
=
color_jitter
,
rotate
=
rotate
)
mapper
=
functools
.
partial
(
process_image
,
mode
=
mode
,
color_jitter
=
color_jitter
,
rotate
=
rotate
)
return
paddle
.
reader
.
xmap_readers
(
mapper
,
reader
,
THREAD
,
BUF_SIZE
)
...
...
@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR):
class
TestPostTrainingQuantization
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
int8_download
=
'int8/download'
self
.
cache_folder
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/'
+
self
.
int8_download
)
self
.
cache_folder
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/'
+
self
.
int8_download
)
self
.
data_cache_folder
=
''
data_urls
=
[]
data_md5s
=
[]
...
...
@@ -129,31 +129,34 @@ class TestPostTrainingQuantization(unittest.TestCase):
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
)
data_md5s
.
append
(
'1e9f15f64e015e58d6f9ec3210ed18b5'
)
self
.
data_cache_folder
=
self
.
download_data
(
data_urls
,
data_md5s
,
"full_data"
,
False
)
self
.
data_cache_folder
=
self
.
download_data
(
data_urls
,
data_md5s
,
"full_data"
,
False
)
else
:
data_urls
.
append
(
'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
)
data_md5s
.
append
(
'1b6c1c434172cca1bf9ba1e4d7a3157d'
)
self
.
data_cache_folder
=
self
.
download_data
(
data_urls
,
data_md5s
,
"small_data"
,
False
)
self
.
data_cache_folder
=
self
.
download_data
(
data_urls
,
data_md5s
,
"small_data"
,
False
)
# reader/decorator.py requires the relative path to the data folder
if
not
os
.
path
.
exists
(
"./data/ILSVRC2012"
):
cmd
=
'rm -rf {0} && ln -s {1} {0}'
.
format
(
"data"
,
self
.
data_cache_folder
)
cmd
=
'rm -rf {0} && ln -s {1} {0}'
.
format
(
"data"
,
self
.
data_cache_folder
)
os
.
system
(
cmd
)
self
.
batch_size
=
1
if
os
.
environ
.
get
(
'DATASET'
)
==
'full'
else
50
self
.
sample_iterations
=
50
if
os
.
environ
.
get
(
'DATASET'
)
==
'full'
else
2
self
.
infer_iterations
=
50000
if
os
.
environ
.
get
(
'DATASET'
)
==
'full'
else
2
self
.
infer_iterations
=
(
50000
if
os
.
environ
.
get
(
'DATASET'
)
==
'full'
else
2
)
self
.
root_path
=
tempfile
.
TemporaryDirectory
()
self
.
int8_model
=
os
.
path
.
join
(
self
.
root_path
.
name
,
"post_training_quantization"
)
self
.
int8_model
=
os
.
path
.
join
(
self
.
root_path
.
name
,
"post_training_quantization"
)
def
tearDown
(
self
):
self
.
root_path
.
cleanup
()
...
...
@@ -161,7 +164,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
def
cache_unzipping
(
self
,
target_folder
,
zip_path
):
if
not
os
.
path
.
exists
(
target_folder
):
cmd
=
'mkdir {0} && tar xf {1} -C {0}'
.
format
(
target_folder
,
zip_path
)
target_folder
,
zip_path
)
os
.
system
(
cmd
)
def
download_data
(
self
,
data_urls
,
data_md5s
,
folder_name
,
is_model
=
True
):
...
...
@@ -173,13 +177,15 @@ class TestPostTrainingQuantization(unittest.TestCase):
download
(
data_urls
[
i
],
self
.
int8_download
,
data_md5s
[
i
])
file_names
.
append
(
data_urls
[
i
].
split
(
'/'
)[
-
1
])
zip_path
=
os
.
path
.
join
(
self
.
cache_folder
,
'full_imagenet_val.tar.gz'
)
zip_path
=
os
.
path
.
join
(
self
.
cache_folder
,
'full_imagenet_val.tar.gz'
)
if
not
os
.
path
.
exists
(
zip_path
):
cat_command
=
'cat'
for
file_name
in
file_names
:
cat_command
+=
' '
+
os
.
path
.
join
(
self
.
cache_folder
,
file_name
)
cat_command
+=
' '
+
os
.
path
.
join
(
self
.
cache_folder
,
file_name
)
cat_command
+=
' > '
+
zip_path
os
.
system
(
cat_command
)
...
...
@@ -199,8 +205,16 @@ class TestPostTrainingQuantization(unittest.TestCase):
image_shape
=
[
3
,
224
,
224
]
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
[
infer_program
,
feed_dict
,
fetch_targets
]
=
\
fluid
.
io
.
load_inference_model
(
model_path
,
exe
)
[
infer_program
,
feed_dict
,
fetch_targets
,
]
=
fluid
.
io
.
load_inference_model
(
model_path
,
exe
,
model_filename
=
"inference.pdmodel"
,
params_filename
=
"inference.pdiparams"
,
)
val_reader
=
paddle
.
batch
(
val
(),
batch_size
)
iterations
=
infer_iterations
...
...
@@ -208,23 +222,28 @@ class TestPostTrainingQuantization(unittest.TestCase):
cnt
=
0
periods
=
[]
for
batch_id
,
data
in
enumerate
(
val_reader
()):
image
=
np
.
array
([
x
[
0
].
reshape
(
image_shape
)
for
x
in
data
]).
astype
(
"float32"
)
image
=
np
.
array
([
x
[
0
].
reshape
(
image_shape
)
for
x
in
data
]).
astype
(
"float32"
)
label
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
"int64"
)
label
=
label
.
reshape
([
-
1
,
1
])
t1
=
time
.
time
()
_
,
acc1
,
_
=
exe
.
run
(
infer_program
,
feed
=
{
feed_dict
[
0
]:
image
,
feed_dict
[
1
]:
label
},
fetch_list
=
fetch_targets
)
pred
=
exe
.
run
(
infer_program
,
feed
=
{
feed_dict
[
0
]:
image
},
fetch_list
=
fetch_targets
,
)
t2
=
time
.
time
()
period
=
t2
-
t1
periods
.
append
(
period
)
test_info
.
append
(
np
.
mean
(
acc1
)
*
len
(
data
))
pred
=
np
.
array
(
pred
[
0
])
sort_array
=
pred
.
argsort
(
axis
=
1
)
top_1_pred
=
sort_array
[:,
-
1
:][:,
::
-
1
]
top_1
=
np
.
mean
(
label
==
top_1_pred
)
test_info
.
append
(
np
.
mean
(
top_1
)
*
len
(
data
))
cnt
+=
len
(
data
)
if
(
batch_id
+
1
)
%
100
==
0
:
...
...
@@ -238,7 +257,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
acc1
=
np
.
sum
(
test_info
)
/
cnt
return
(
throughput
,
latency
,
acc1
)
def
generate_quantized_model
(
self
,
def
generate_quantized_model
(
self
,
model_path
,
quantizable_op_type
,
batch_size
,
...
...
@@ -248,12 +268,14 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_use_cache_file
=
False
,
is_optimize_model
=
False
,
batch_nums
=
10
,
onnx_format
=
False
):
onnx_format
=
False
,
):
try
:
os
.
system
(
"mkdir "
+
self
.
int8_model
)
except
Exception
as
e
:
print
(
"Failed to create {} due to {}"
.
format
(
self
.
int8_model
,
str
(
e
)))
print
(
"Failed to create {} due to {}"
.
format
(
self
.
int8_model
,
str
(
e
))
)
sys
.
exit
(
-
1
)
place
=
fluid
.
CPUPlace
()
...
...
@@ -261,9 +283,12 @@ class TestPostTrainingQuantization(unittest.TestCase):
scope
=
fluid
.
global_scope
()
val_reader
=
val
()
ptq
=
PostTrainingQuantization
(
executor
=
exe
,
ptq
=
PostTrainingQuantization
(
executor
=
exe
,
sample_generator
=
val_reader
,
model_dir
=
model_path
,
model_filename
=
"inference.pdmodel"
,
params_filename
=
"inference.pdiparams"
,
batch_size
=
batch_size
,
batch_nums
=
batch_nums
,
algo
=
algo
,
...
...
@@ -272,11 +297,17 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_full_quantize
=
is_full_quantize
,
optimize_model
=
is_optimize_model
,
onnx_format
=
onnx_format
,
is_use_cache_file
=
is_use_cache_file
)
is_use_cache_file
=
is_use_cache_file
,
)
ptq
.
quantize
()
ptq
.
save_quantized_model
(
self
.
int8_model
)
ptq
.
save_quantized_model
(
self
.
int8_model
,
model_filename
=
"inference.pdmodel"
,
params_filename
=
"inference.pdiparams"
,
)
def
run_test
(
self
,
def
run_test
(
self
,
model
,
algo
,
round_type
,
...
...
@@ -288,43 +319,62 @@ class TestPostTrainingQuantization(unittest.TestCase):
is_optimize_model
,
diff_threshold
,
onnx_format
=
False
,
batch_nums
=
10
):
batch_nums
=
10
,
):
infer_iterations
=
self
.
infer_iterations
batch_size
=
self
.
batch_size
sample_iterations
=
self
.
sample_iterations
model_cache_folder
=
self
.
download_data
(
data_urls
,
data_md5s
,
model
)
print
(
"Start FP32 inference for {0} on {1} images ..."
.
format
(
model
,
infer_iterations
*
batch_size
))
print
(
"Start FP32 inference for {0} on {1} images ..."
.
format
(
model
,
infer_iterations
*
batch_size
)
)
(
fp32_throughput
,
fp32_latency
,
fp32_acc1
)
=
self
.
run_program
(
os
.
path
.
join
(
model_cache_folder
,
"model"
),
batch_size
,
infer_iterations
)
print
(
"Start INT8 post training quantization for {0} on {1} images ..."
.
format
(
model
,
sample_iterations
*
batch_size
))
self
.
generate_quantized_model
(
os
.
path
.
join
(
model_cache_folder
,
"model"
),
quantizable_op_type
,
batch_size
,
sample_iterations
,
algo
,
round_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
batch_nums
,
onnx_format
)
print
(
"Start INT8 inference for {0} on {1} images ..."
.
format
(
model
,
infer_iterations
*
batch_size
))
(
int8_throughput
,
int8_latency
,
int8_acc1
)
=
self
.
run_program
(
self
.
int8_model
,
batch_size
,
infer_iterations
)
os
.
path
.
join
(
model_cache_folder
,
"MobileNetV1_infer"
),
batch_size
,
infer_iterations
,
)
print
(
"Start INT8 post training quantization for {0} on {1} images ..."
.
format
(
model
,
batch_nums
*
batch_size
)
)
self
.
generate_quantized_model
(
os
.
path
.
join
(
model_cache_folder
,
"MobileNetV1_infer"
),
quantizable_op_type
,
batch_size
,
algo
,
round_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
batch_nums
,
onnx_format
,
)
print
(
"Start INT8 inference for {0} on {1} images ..."
.
format
(
model
,
infer_iterations
*
batch_size
)
)
(
int8_throughput
,
int8_latency
,
int8_acc1
)
=
self
.
run_program
(
self
.
int8_model
,
batch_size
,
infer_iterations
)
print
(
"---Post training quantization of {} method---"
.
format
(
algo
))
print
(
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}."
.
format
(
model
,
batch_size
,
fp32_throughput
,
fp32_latency
,
fp32_acc1
))
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}."
.
format
(
model
,
batch_size
,
fp32_throughput
,
fp32_latency
,
fp32_acc1
)
)
print
(
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.
\n
"
.
format
(
model
,
batch_size
,
int8_throughput
,
int8_latency
,
int8_acc1
))
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.
\n
"
.
format
(
model
,
batch_size
,
int8_throughput
,
int8_latency
,
int8_acc1
)
)
sys
.
stdout
.
flush
()
delta_value
=
fp32_acc1
-
int8_acc1
...
...
@@ -332,15 +382,14 @@ class TestPostTrainingQuantization(unittest.TestCase):
class
TestPostTrainingKLForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_kl_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"KL"
round_type
=
"round"
data_urls
=
[
'http://paddle-i
nference-dist.bj.bcebos.com/int8/m
obile
n
et
v
1_in
t8_model
.tar
.gz
'
'http
s
://paddle-i
magenet-models-name.bj.bcebos.com/dygraph/inference/M
obile
N
et
V
1_in
fer
.tar'
]
data_md5s
=
[
'
13892b0716d26443a8cdea15b3c6438b
'
]
data_md5s
=
[
'
5ee2b1775b11dc233079236cdc216c2e
'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
...
...
@@ -351,21 +400,30 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
batch_nums
=
3
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
)
class
TestPostTrainingavgForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_avg_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"avg"
round_type
=
"round"
data_urls
=
[
'http://paddle-i
nference-dist.bj.bcebos.com/int8/m
obile
n
et
v
1_in
t8_model
.tar
.gz
'
'http
s
://paddle-i
magenet-models-name.bj.bcebos.com/dygraph/inference/M
obile
N
et
V
1_in
fer
.tar'
]
data_md5s
=
[
'
13892b0716d26443a8cdea15b3c6438b
'
]
data_md5s
=
[
'
5ee2b1775b11dc233079236cdc216c2e
'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
...
...
@@ -375,21 +433,29 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
)
class
TestPostTraininghistForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_hist_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"hist"
round_type
=
"round"
data_urls
=
[
'http://paddle-i
nference-dist.bj.bcebos.com/int8/m
obile
n
et
v
1_in
t8_model
.tar
.gz
'
'http
s
://paddle-i
magenet-models-name.bj.bcebos.com/dygraph/inference/M
obile
N
et
V
1_in
fer
.tar'
]
data_md5s
=
[
'
13892b0716d26443a8cdea15b3c6438b
'
]
data_md5s
=
[
'
5ee2b1775b11dc233079236cdc216c2e
'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
...
...
@@ -400,7 +466,8 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model
=
True
diff_threshold
=
0.03
batch_nums
=
3
self
.
run_test
(
model
,
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
...
...
@@ -410,19 +477,19 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_nums
=
batch_nums
)
batch_nums
=
batch_nums
,
)
class
TestPostTrainingAbsMaxForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_abs_max_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"abs_max"
round_type
=
"round"
data_urls
=
[
'http://paddle-i
nference-dist.bj.bcebos.com/int8/m
obile
n
et
v
1_in
t8_model
.tar
.gz
'
'http
s
://paddle-i
magenet-models-name.bj.bcebos.com/dygraph/inference/M
obile
N
et
V
1_in
fer
.tar'
]
data_md5s
=
[
'
13892b0716d26443a8cdea15b3c6438b
'
]
data_md5s
=
[
'
5ee2b1775b11dc233079236cdc216c2e
'
]
quantizable_op_type
=
[
"conv2d"
,
"mul"
,
...
...
@@ -432,21 +499,29 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model
=
False
# The accuracy diff of post-training quantization (abs_max) maybe bigger
diff_threshold
=
0.05
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
)
class
TestPostTrainingAvgONNXFormatForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_onnx_format_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"emd"
round_type
=
"round"
data_urls
=
[
'http://paddle-i
nference-dist.bj.bcebos.com/int8/m
obile
n
et
v
1_in
t8_model
.tar
.gz
'
'http
s
://paddle-i
magenet-models-name.bj.bcebos.com/dygraph/inference/M
obile
N
et
V
1_in
fer
.tar'
]
data_md5s
=
[
'
13892b0716d26443a8cdea15b3c6438b
'
]
data_md5s
=
[
'
5ee2b1775b11dc233079236cdc216c2e
'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
...
...
@@ -458,7 +533,8 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
onnx_format
=
True
diff_threshold
=
0.05
batch_nums
=
3
self
.
run_test
(
model
,
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
...
...
@@ -469,7 +545,8 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model
,
diff_threshold
,
onnx_format
=
onnx_format
,
batch_nums
=
batch_nums
)
batch_nums
=
batch_nums
,
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/contrib/sparsity/supported_layer_list.py
View file @
dbe08e9b
...
...
@@ -23,9 +23,9 @@ from ...log_helper import get_logger
__all__
=
[
'add_supported_layer'
]
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
def
_default_pruning
(
weight_nparray
,
m
,
n
,
func_name
,
param_name
):
...
...
@@ -38,13 +38,17 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
exlude_cond_shape4
=
len
(
shape
)
==
4
and
shape
[
1
]
<
m
if
exlude_cond_shape2
:
_logger
.
warning
(
'{} is not pruned because the first dimension of {} is smaller than {}'
.
format
(
param_name
,
shape
,
m
))
'{} is not pruned because the first dimension of {} is smaller than {}'
.
format
(
param_name
,
shape
,
m
)
)
return
weight_pruned_nparray
,
weight_sparse_mask
if
exlude_cond_shape4
:
_logger
.
warning
(
'{} is not pruned because the second dimension of {} is smaller than {}'
.
format
(
param_name
,
shape
,
m
))
'{} is not pruned because the second dimension of {} is smaller than {}'
.
format
(
param_name
,
shape
,
m
)
)
return
weight_pruned_nparray
,
weight_sparse_mask
checked_func_name
=
sparsity
.
CheckMethod
.
get_checking_method
(
func_name
)
...
...
@@ -60,13 +64,13 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
# sparsity/utils is row-major pruning. That is the reason we have to transpose weight
# matrices beforce invoking create_mask. Then we transpose the result mask to make
# sure its shape to be the same as the input weight.
weight_sparse_mask
=
sparsity
.
create_mask
(
weight_nparray
.
T
,
func_name
=
func_name
,
n
=
n
,
m
=
m
).
T
weight_sparse_mask
=
sparsity
.
create_mask
(
weight_nparray
.
T
,
func_name
=
func_name
,
n
=
n
,
m
=
m
).
T
weight_pruned_nparray
=
np
.
multiply
(
weight_nparray
,
weight_sparse_mask
)
assert
sparsity
.
check_sparsity
(
weight_pruned_nparray
.
T
,
n
=
n
,
m
=
m
,
func_name
=
checked_func_name
),
\
'Pruning {} weight matrix failure!!!'
.
format
(
param_name
)
assert
sparsity
.
check_sparsity
(
weight_pruned_nparray
.
T
,
n
=
n
,
m
=
m
,
func_name
=
checked_func_name
),
'Pruning {} weight matrix failure!!!'
.
format
(
param_name
)
return
weight_pruned_nparray
,
weight_sparse_mask
...
...
@@ -78,6 +82,7 @@ supported_layers_and_prune_func_map = {}
def
add_supported_layer
(
layer
,
pruning_func
=
None
):
r
"""
Add supported layers and its corresponding pruning function.
Args:
...
...
@@ -87,19 +92,25 @@ def add_supported_layer(layer, pruning_func=None):
pruning_func (function, optional): a function type which receives five argument (weight_nparray,
m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
m, n, and func_name, please see `prune_model` for details.
"""
name
=
None
if
isinstance
(
layer
,
str
):
name
=
layer
elif
isinstance
(
layer
,
paddle
.
fluid
.
dygraph
.
layers
.
Layer
):
name
=
paddle
.
fluid
.
dygraph
.
layers
.
_convert_camel_to_snake
(
type
(
layer
).
__name__
)
type
(
layer
).
__name__
)
elif
issubclass
(
layer
,
paddle
.
fluid
.
dygraph
.
layers
.
Layer
):
name
=
paddle
.
fluid
.
dygraph
.
layers
.
_convert_camel_to_snake
(
layer
.
__name__
)
layer
.
__name__
)
else
:
assert
"The type of layer should be string of Layer, but got {}!"
.
format
(
type
(
layer
))
assert
(
"The type of layer should be string of Layer, but got {}!"
.
format
(
type
(
layer
)
)
)
if
pruning_func
is
None
:
pruning_func
=
_default_pruning
_supported_layers_and_prune_func_map_lock
.
acquire
()
...
...
python/paddle/fluid/contrib/sparsity/utils.py
View file @
dbe08e9b
...
...
@@ -27,9 +27,16 @@ from itertools import permutations
import
threading
__all__
=
[
'calculate_density'
,
'check_mask_1d'
,
'get_mask_1d'
,
'check_mask_2d'
,
'get_mask_2d_greedy'
,
'get_mask_2d_best'
,
'create_mask'
,
'check_sparsity'
,
'MaskAlgo'
,
'CheckMethod'
'calculate_density'
,
'check_mask_1d'
,
'get_mask_1d'
,
'check_mask_2d'
,
'get_mask_2d_greedy'
,
'get_mask_2d_best'
,
'create_mask'
,
'check_sparsity'
,
'MaskAlgo'
,
'CheckMethod'
,
]
...
...
@@ -76,8 +83,9 @@ class CheckMethod(Enum):
CheckMethod.get_checking_method(MaskAlgo.MASK_2D_BEST)
# CheckMethod.CHECK_2D
"""
assert
isinstance
(
mask_algo
,
MaskAlgo
),
\
"mask_algo should be MaskAlgo type"
assert
isinstance
(
mask_algo
,
MaskAlgo
),
"mask_algo should be MaskAlgo type"
if
mask_algo
==
MaskAlgo
.
MASK_1D
:
return
CheckMethod
.
CHECK_1D
else
:
...
...
@@ -86,20 +94,25 @@ class CheckMethod(Enum):
def
calculate_density
(
x
):
r
"""
Return the density of the input tensor.
Args:
x (nparray): The input tensor.
Returns:
float: The density of :attr:`x`.
float, The density of :attr:`x`.
Examples:
.. code-block:: python
import paddle
import numpy as np
x = np.array([[0, 1, 3, 0],
[1, 1, 0, 1]])
paddle.incubate.asp.calculate_density(x) # 0.625
"""
x_flattened
=
x
.
flatten
()
return
float
(
np
.
nonzero
(
x_flattened
)[
0
].
size
)
/
x_flattened
.
size
...
...
@@ -126,7 +139,7 @@ def _reshape_1d(mat, m):
remainder
=
mat
.
shape
[
1
]
%
m
if
mat
.
shape
[
1
]
%
m
>
0
:
mat_padded
=
np
.
zeros
((
mat
.
shape
[
0
],
mat
.
shape
[
1
]
+
(
m
-
remainder
)))
mat_padded
[:,
:
mat
.
shape
[
1
]]
=
mat
mat_padded
[:,
:
mat
.
shape
[
1
]]
=
mat
shape
=
mat_padded
.
shape
return
mat_padded
.
reshape
(
-
1
,
m
),
shape
else
:
...
...
@@ -213,7 +226,7 @@ def get_mask_1d(mat, n, m):
min_order_indices
=
np
.
argsort
(
np
.
absolute
(
sub_mat
))
mask_flattern
[
i
,
min_order_indices
[:
n
].
tolist
()]
=
0
mask_flattern
=
mask_flattern
.
reshape
(
shape
)
mask
[:,
:]
=
mask_flattern
[:,
:
mat
.
shape
[
1
]]
mask
[:,
:]
=
mask_flattern
[:,
:
mat
.
shape
[
1
]]
return
mask
...
...
@@ -239,12 +252,12 @@ def _reshape_2d(mat, m):
remainder_0
=
mat
.
shape
[
0
]
%
m
remainder_1
=
mat
.
shape
[
1
]
%
m
new_shape
=
(
mat
.
shape
[
0
]
if
remainder_0
==
0
\
else
mat
.
shape
[
0
]
+
(
m
-
remainder_0
),
mat
.
shape
[
1
]
if
remainder_1
==
0
\
else
mat
.
shape
[
1
]
+
(
m
-
remainder_1
)
)
new_shape
=
(
mat
.
shape
[
0
]
if
remainder_0
==
0
else
mat
.
shape
[
0
]
+
(
m
-
remainder_0
),
mat
.
shape
[
1
]
if
remainder_1
==
0
else
mat
.
shape
[
1
]
+
(
m
-
remainder_1
),
)
mat_padded
=
np
.
zeros
(
new_shape
)
mat_padded
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
=
mat
mat_padded
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
=
mat
mat_flattern
=
np
.
empty
(
new_shape
).
reshape
(
-
1
,
m
*
m
)
curr_idx
=
0
...
...
@@ -252,9 +265,9 @@ def _reshape_2d(mat, m):
row_end
=
row_start
+
m
for
col_start
in
range
(
0
,
mat_padded
.
shape
[
1
],
m
):
col_end
=
col_start
+
m
sub_mat
=
np
.
squeeze
(
mat_padded
[
row_start
:
row_end
,
\
col_start
:
col_end
]
\
.
reshape
(
-
1
)
)
sub_mat
=
np
.
squeeze
(
mat_padded
[
row_start
:
row_end
,
col_start
:
col_end
]
.
reshape
(
-
1
)
)
mat_flattern
[
curr_idx
]
=
sub_mat
curr_idx
+=
1
return
mat_flattern
,
mat_padded
.
shape
...
...
@@ -304,8 +317,9 @@ def check_mask_2d(mat, n, m):
mat_padded
,
shape
=
_reshape_2d
(
mat
,
m
)
for
sub_mat
in
mat_padded
:
sub_mask
=
np
.
absolute
(
np
.
squeeze
(
sub_mat
.
reshape
(
m
,
m
)))
>
0
if
(
np
.
sum
(
np
.
sum
(
sub_mask
,
axis
=
1
)
>
(
m
-
n
))
!=
0
)
and
\
(
np
.
sum
(
np
.
sum
(
sub_mask
,
axis
=
0
)
>
(
m
-
n
))
!=
0
):
if
(
np
.
sum
(
np
.
sum
(
sub_mask
,
axis
=
1
)
>
(
m
-
n
))
!=
0
)
and
(
np
.
sum
(
np
.
sum
(
sub_mask
,
axis
=
0
)
>
(
m
-
n
))
!=
0
):
return
False
return
True
...
...
@@ -350,15 +364,17 @@ def get_mask_2d_greedy(mat, n, m):
sub_mask
=
np
.
squeeze
(
mask_padded
[
idx
])
min_order_1d_indices
=
np
.
argsort
(
sub_mat
)
min_order_2d_indices
=
[(
int
(
x
/
m
),
x
%
m
)
for
x
in
min_order_1d_indices
]
min_order_2d_indices
=
[
(
int
(
x
/
m
),
x
%
m
)
for
x
in
min_order_1d_indices
]
row_counter
=
collections
.
Counter
()
col_counter
=
collections
.
Counter
()
for
i
in
range
(
len
(
min_order_1d_indices
)
-
1
,
-
1
,
-
1
):
matrix_entry
=
min_order_2d_indices
[
i
]
if
(
row_counter
[
matrix_entry
[
0
]]
==
n
)
or
\
(
col_counter
[
matrix_entry
[
1
]]
==
n
):
if
(
row_counter
[
matrix_entry
[
0
]]
==
n
)
or
(
col_counter
[
matrix_entry
[
1
]]
==
n
):
continue
sub_mask
[
matrix_entry
[
0
],
matrix_entry
[
1
]]
=
1.0
...
...
@@ -373,7 +389,7 @@ def get_mask_2d_greedy(mat, n, m):
col_end
=
col_start
+
m
mask
[
row_start
:
row_end
,
col_start
:
col_end
]
=
mask_padded
[
curr_idx
]
curr_idx
+=
1
return
mask
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
return
mask
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
_valid_2d_patterns_lock
=
threading
.
Lock
()
...
...
@@ -406,8 +422,11 @@ def _compute_valid_2d_patterns(n, m):
patterns
=
patterns
+
patterns
patterns
=
np
.
asarray
(
list
(
set
(
permutations
(
patterns
,
m
))))
valid
=
((
patterns
.
sum
(
axis
=
1
)
<=
n
).
sum
(
axis
=
1
)
==
m
).
nonzero
()[
0
].
reshape
(
-
1
)
valid
=
(
((
patterns
.
sum
(
axis
=
1
)
<=
n
).
sum
(
axis
=
1
)
==
m
)
.
nonzero
()[
0
]
.
reshape
(
-
1
)
)
valid_patterns
=
np
.
empty
((
valid
.
shape
[
0
],
m
,
m
))
valid_patterns
[:]
=
patterns
[
valid
[:]]
...
...
@@ -454,9 +473,10 @@ def get_mask_2d_best(mat, n, m):
mat_flattern
,
shape
=
_reshape_2d
(
mat
,
m
)
mask_flattern
=
np
.
ones_like
(
mat_flattern
).
reshape
(
-
1
,
m
,
m
)
pmax
=
np
.
argmax
(
np
.
matmul
(
mat_flattern
,
patterns
.
reshape
(
patterns
.
shape
[
0
],
m
*
m
).
T
),
axis
=
1
)
pmax
=
np
.
argmax
(
np
.
matmul
(
mat_flattern
,
patterns
.
reshape
(
patterns
.
shape
[
0
],
m
*
m
).
T
),
axis
=
1
,
)
mask_flattern
[:]
=
patterns
[
pmax
[:]]
mask
=
np
.
empty
(
shape
)
...
...
@@ -468,7 +488,7 @@ def get_mask_2d_best(mat, n, m):
col_end
=
col_start
+
m
mask
[
row_start
:
row_end
,
col_start
:
col_end
]
=
mask_flattern
[
curr_idx
]
curr_idx
+=
1
return
mask
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
return
mask
[:
mat
.
shape
[
0
],
:
mat
.
shape
[
1
]]
def
create_mask
(
tensor
,
func_name
=
MaskAlgo
.
MASK_1D
,
n
=
2
,
m
=
4
):
...
...
@@ -508,9 +528,10 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
dtype
=
tensor
.
dtype
t
=
tensor
.
astype
(
float
)
assert
isinstance
(
func_name
,
MaskAlgo
),
\
"func_name argumet of create_mask is only accepted as type MaskAlgo. "
\
assert
isinstance
(
func_name
,
MaskAlgo
),
(
"func_name argumet of create_mask is only accepted as type MaskAlgo. "
"But got {}"
.
format
(
type
(
func_name
))
)
func
=
getattr
(
sys
.
modules
[
__name__
],
func_name
.
value
,
None
)
if
len
(
shape
)
==
1
:
t
=
t
.
reshape
(
1
,
shape
[
0
])
...
...
@@ -520,14 +541,20 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
t
=
t
.
reshape
(
shape
[
0
]
*
shape
[
1
],
shape
[
2
])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif
len
(
shape
)
==
4
:
t
=
t
.
transpose
([
0
,
1
,
3
,
2
]).
reshape
(
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
],
shape
[
2
])
t
=
t
.
transpose
([
0
,
1
,
3
,
2
]).
reshape
(
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
],
shape
[
2
]
)
mask
=
func
(
t
,
n
=
n
,
m
=
m
)
return
mask
.
reshape
([
shape
[
0
],
shape
[
1
],
shape
[
3
],
shape
[
2
]]).
transpose
([
0
,
1
,
3
,
2
]).
astype
(
dtype
)
return
(
mask
.
reshape
([
shape
[
0
],
shape
[
1
],
shape
[
3
],
shape
[
2
]])
.
transpose
([
0
,
1
,
3
,
2
])
.
astype
(
dtype
)
)
else
:
raise
ValueError
(
"The dimension of input tensor is not supported in create_mask, "
\
"Only dimension < 4 is supported but got {}"
.
format
(
len
(
shape
)))
raise
ValueError
(
"The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}"
.
format
(
len
(
shape
))
)
mask
=
func
(
t
,
n
=
n
,
m
=
m
)
return
mask
.
reshape
(
shape
).
astype
(
dtype
)
...
...
@@ -566,9 +593,10 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
shape
=
tensor
.
shape
t
=
tensor
.
astype
(
float
)
assert
type
(
func_name
)
==
CheckMethod
,
\
"func_name argumet of check_sparsity is only accepted as type CheckMethod. "
\
assert
type
(
func_name
)
==
CheckMethod
,
(
"func_name argumet of check_sparsity is only accepted as type CheckMethod. "
"But got {}"
.
format
(
type
(
func_name
))
)
func
=
getattr
(
sys
.
modules
[
__name__
],
func_name
.
value
,
None
)
if
len
(
shape
)
==
1
:
t
=
t
.
reshape
(
1
,
shape
[
0
])
...
...
@@ -578,10 +606,13 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
t
=
t
.
reshape
(
shape
[
0
]
*
shape
[
1
],
shape
[
2
])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif
len
(
shape
)
==
4
:
t
=
t
.
transpose
([
0
,
1
,
3
,
2
]).
reshape
([
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
],
shape
[
2
]])
t
=
t
.
transpose
([
0
,
1
,
3
,
2
]).
reshape
(
[
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
],
shape
[
2
]]
)
else
:
raise
ValueError
(
"The dimension of input tensor is not supported in create_mask, "
\
"Only dimension < 4 is supported but got {}"
.
format
(
len
(
shape
)))
raise
ValueError
(
"The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}"
.
format
(
len
(
shape
))
)
return
func
(
t
,
n
=
n
,
m
=
m
)
python/paddle/fluid/core.py
View file @
dbe08e9b
...
...
@@ -35,9 +35,9 @@ try:
if
os
.
name
==
'nt'
:
third_lib_path
=
current_path
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
# Will load shared library from 'path' on windows
os
.
environ
[
'path'
]
=
current_path
+
';'
+
third_lib_path
+
';'
+
os
.
environ
[
'path'
]
os
.
environ
[
'path'
]
=
(
current_path
+
';'
+
third_lib_path
+
';'
+
os
.
environ
[
'path'
]
)
sys
.
path
.
insert
(
0
,
third_lib_path
)
# Note: from python3.8, PATH will not take effect
# https://github.com/python/cpython/pull/12302
...
...
@@ -47,20 +47,24 @@ try:
except
ImportError
as
e
:
from
..
import
compat
as
cpt
if
os
.
name
==
'nt'
:
executable_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
sys
.
executable
))
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=%s;%%PATH%%
\"
if you encounters
\"
DLL load failed
\"
errors. If you have python
installed in other directory, replace
\"
%s
\"
with your own
directory. The original error is:
\n
%s"""
%
(
executable_path
,
executable_path
,
cpt
.
get_exception_message
(
e
)))
directory. The original error is:
\n
%s"""
%
(
executable_path
,
executable_path
,
cpt
.
get_exception_message
(
e
))
)
else
:
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
installed in other directory, replace
\"
/usr/local/lib
\"
with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
)
)
except
Exception
as
e
:
raise
e
...
...
@@ -70,36 +74,45 @@ def avx_supported():
Whether current system(Linux, MacOS, Windows) is supported with AVX.
"""
from
..
import
compat
as
cpt
sysstr
=
platform
.
system
().
lower
()
has_avx
=
False
if
sysstr
==
'linux'
:
try
:
has_avx
=
os
.
popen
(
'cat /proc/cpuinfo | grep -i avx'
).
read
()
!=
''
pipe
=
os
.
popen
(
'cat /proc/cpuinfo | grep -i avx'
)
has_avx
=
pipe
.
read
()
!=
''
pipe
.
close
()
except
Exception
as
e
:
sys
.
stderr
.
write
(
'Can not get the AVX flag from /proc/cpuinfo.
\n
'
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
))
sys
.
stderr
.
write
(
'Can not get the AVX flag from /proc/cpuinfo.
\n
'
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
)
)
return
has_avx
elif
sysstr
==
'darwin'
:
try
:
has_avx
=
os
.
popen
(
'sysctl machdep.cpu.features | grep -i avx'
).
read
()
!=
''
pipe
=
os
.
popen
(
'sysctl machdep.cpu.features | grep -i avx'
)
has_avx
=
pipe
.
read
()
!=
''
pipe
.
close
()
except
Exception
as
e
:
sys
.
stderr
.
write
(
'Can not get the AVX flag from machdep.cpu.features.
\n
'
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
))
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
)
)
if
not
has_avx
:
import
subprocess
pipe
=
subprocess
.
Popen
(
'sysctl machdep.cpu.leaf7_features | grep -i avx'
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
stderr
=
subprocess
.
PIPE
,
)
_
=
pipe
.
communicate
()
has_avx
=
True
if
pipe
.
returncode
==
0
else
False
return
has_avx
elif
sysstr
==
'windows'
:
import
ctypes
ONE_PAGE
=
ctypes
.
c_size_t
(
0x1000
)
def
asm_func
(
code_str
,
restype
=
ctypes
.
c_uint32
,
argtypes
=
()):
...
...
@@ -109,24 +122,31 @@ def avx_supported():
pfnVirtualAlloc
.
restype
=
ctypes
.
c_void_p
MEM_COMMIT
=
ctypes
.
c_ulong
(
0x1000
)
PAGE_READWRITE
=
ctypes
.
c_ulong
(
0x4
)
address
=
pfnVirtualAlloc
(
None
,
ONE_PAGE
,
MEM_COMMIT
,
PAGE_READWRITE
)
address
=
pfnVirtualAlloc
(
None
,
ONE_PAGE
,
MEM_COMMIT
,
PAGE_READWRITE
)
if
not
address
:
raise
Exception
(
"Failed to VirtualAlloc"
)
# Copy the code into the memory segment
memmove
=
ctypes
.
CFUNCTYPE
(
ctypes
.
c_void_p
,
ctypes
.
c_void_p
,
memmove
=
ctypes
.
CFUNCTYPE
(
ctypes
.
c_void_p
,
ctypes
.
c_void_p
,
ctypes
.
c_void_p
,
ctypes
.
c_size_t
)(
ctypes
.
_memmove_addr
)
ctypes
.
c_size_t
,
)(
ctypes
.
_memmove_addr
)
if
memmove
(
address
,
code_str
,
len
(
code_str
))
<
0
:
raise
Exception
(
"Failed to memmove"
)
# Enable execute permissions
PAGE_EXECUTE
=
ctypes
.
c_ulong
(
0x10
)
pfnVirtualProtect
=
ctypes
.
windll
.
kernel32
.
VirtualProtect
res
=
pfnVirtualProtect
(
ctypes
.
c_void_p
(
address
),
ONE_PAGE
,
PAGE_EXECUTE
,
ctypes
.
byref
(
ctypes
.
c_ulong
(
0
)))
res
=
pfnVirtualProtect
(
ctypes
.
c_void_p
(
address
),
ONE_PAGE
,
PAGE_EXECUTE
,
ctypes
.
byref
(
ctypes
.
c_ulong
(
0
)),
)
if
not
res
:
raise
Exception
(
"Failed VirtualProtect"
)
...
...
@@ -135,7 +155,8 @@ def avx_supported():
pfnGetCurrentProcess
.
restype
=
ctypes
.
c_void_p
prochandle
=
ctypes
.
c_void_p
(
pfnGetCurrentProcess
())
res
=
ctypes
.
windll
.
kernel32
.
FlushInstructionCache
(
prochandle
,
ctypes
.
c_void_p
(
address
),
ONE_PAGE
)
prochandle
,
ctypes
.
c_void_p
(
address
),
ONE_PAGE
)
if
not
res
:
raise
Exception
(
"Failed FlushInstructionCache"
)
...
...
@@ -153,12 +174,14 @@ def avx_supported():
# Convert the code_str into a function that returns uint
func
,
address
=
asm_func
(
code_str
)
retval
=
func
()
ctypes
.
windll
.
kernel32
.
VirtualFree
(
ctypes
.
c_void_p
(
address
),
ctypes
.
c_size_t
(
0
),
ONE_PAGE
)
ctypes
.
windll
.
kernel32
.
VirtualFree
(
ctypes
.
c_void_p
(
address
),
ctypes
.
c_size_t
(
0
),
ONE_PAGE
)
except
Exception
as
e
:
sys
.
stderr
.
write
(
'Failed getting the AVX flag on Windows.
\n
'
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
))
sys
.
stderr
.
write
(
'Failed getting the AVX flag on Windows.
\n
'
'The original error is: %s
\n
'
%
cpt
.
get_exception_message
(
e
)
)
return
(
retval
&
(
1
<<
avx_bit
))
>
0
else
:
sys
.
stderr
.
write
(
'Do not get AVX flag on %s
\n
'
%
sysstr
)
...
...
@@ -167,10 +190,10 @@ def avx_supported():
def
run_shell_command
(
cmd
):
import
subprocess
out
,
err
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
P
IPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
).
communicate
()
out
,
err
=
subprocess
.
P
open
(
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
).
communicate
()
if
err
:
return
None
else
:
...
...
@@ -179,8 +202,9 @@ def run_shell_command(cmd):
def
get_dso_path
(
core_so
,
dso_name
):
if
core_so
and
dso_name
:
return
run_shell_command
(
"ldd %s|grep %s|awk '{print $3}'"
%
(
core_so
,
dso_name
))
return
run_shell_command
(
"ldd %s|grep %s|awk '{print $3}'"
%
(
core_so
,
dso_name
)
)
else
:
return
None
...
...
@@ -189,6 +213,7 @@ def load_dso(dso_absolute_path):
if
dso_absolute_path
:
try
:
from
ctypes
import
cdll
cdll
.
LoadLibrary
(
dso_absolute_path
)
except
:
warnings
.
warn
(
"Load {} failed"
.
format
(
dso_absolute_path
))
...
...
@@ -247,12 +272,14 @@ if platform.system().lower() == 'linux':
try
:
from
.
import
libpaddle
if
avx_supported
()
and
not
libpaddle
.
is_compiled_with_avx
():
sys
.
stderr
.
write
(
"Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
"Hence, no-avx core with worse preformance will be imported.
\n
If you like, you could "
"reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
"to get better performance.
\n
"
)
"to get better performance.
\n
"
)
# assign tensor alias
libpaddle
.
LoDTensor
=
libpaddle
.
Tensor
...
...
@@ -283,6 +310,7 @@ try:
from
.libpaddle
import
_Profiler
,
_ProfilerResult
,
_RecordEvent
from
.libpaddle
import
_set_current_stream
from
.libpaddle
import
_get_phi_kernel_name
if
sys
.
platform
!=
'win32'
:
from
.libpaddle
import
_set_process_pids
from
.libpaddle
import
_erase_process_pids
...
...
@@ -295,12 +323,18 @@ try:
except
Exception
as
e
:
if
has_paddle_dy_lib
:
sys
.
stderr
.
write
(
'Error: Can not import paddle core while this file exists: '
+
current_path
+
os
.
sep
+
'libpaddle.'
+
dy_lib_suffix
+
'
\n
'
)
'Error: Can not import paddle core while this file exists: '
+
current_path
+
os
.
sep
+
'libpaddle.'
+
dy_lib_suffix
+
'
\n
'
)
if
not
avx_supported
()
and
libpaddle
.
is_compiled_with_avx
():
sys
.
stderr
.
write
(
"Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
"you should reinstall paddlepaddle with no-avx core.
\n
"
)
"you should reinstall paddlepaddle with no-avx core.
\n
"
)
raise
e
...
...
@@ -317,22 +351,26 @@ def set_paddle_custom_device_lib_path(lib_path):
# set paddle lib path
def
set_paddle_lib_path
():
site_dirs
=
site
.
getsitepackages
()
if
hasattr
(
site
,
'getsitepackages'
)
else
[
x
for
x
in
sys
.
path
if
'site-packages'
in
x
]
site_dirs
=
(
site
.
getsitepackages
()
if
hasattr
(
site
,
'getsitepackages'
)
else
[
x
for
x
in
sys
.
path
if
'site-packages'
in
x
]
)
for
site_dir
in
site_dirs
:
lib_dir
=
os
.
path
.
sep
.
join
([
site_dir
,
'paddle'
,
'libs'
])
if
os
.
path
.
exists
(
lib_dir
):
_set_paddle_lib_path
(
lib_dir
)
set_paddle_custom_device_lib_path
(
os
.
path
.
sep
.
join
([
lib_dir
,
'..'
,
'..'
,
'paddle-plugins'
]))
os
.
path
.
sep
.
join
([
lib_dir
,
'..'
,
'..'
,
'paddle-plugins'
])
)
return
if
hasattr
(
site
,
'USER_SITE'
):
lib_dir
=
os
.
path
.
sep
.
join
([
site
.
USER_SITE
,
'paddle'
,
'libs'
])
if
os
.
path
.
exists
(
lib_dir
):
_set_paddle_lib_path
(
lib_dir
)
set_paddle_custom_device_lib_path
(
os
.
path
.
sep
.
join
([
lib_dir
,
'..'
,
'..'
,
'paddle-plugins'
]))
os
.
path
.
sep
.
join
([
lib_dir
,
'..'
,
'..'
,
'paddle-plugins'
])
)
set_paddle_lib_path
()
python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py
View file @
dbe08e9b
...
...
@@ -18,19 +18,32 @@ import six
import
paddle
from
paddle.fluid
import
framework
,
backward
,
core
,
program_guard
from
paddle.fluid.executor
import
_is_enable_standalone_executor
,
_is_dy2st_enable_standalone_executor
from
paddle.fluid.executor
import
(
_is_enable_standalone_executor
,
_is_dy2st_enable_standalone_executor
,
)
from
paddle.fluid.dygraph
import
layers
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.dygraph.dygraph_to_static
import
logging_utils
from
paddle.fluid.dygraph.dygraph_to_static.return_transformer
import
RETURN_NO_VALUE_MAGIC_NUM
from
paddle.fluid.dygraph.dygraph_to_static.return_transformer
import
(
RETURN_NO_VALUE_MAGIC_NUM
,
)
from
paddle.fluid.layers.utils
import
flatten
from
paddle.fluid.layers.utils
import
pack_sequence_as
from
paddle.fluid.layers.utils
import
_hash_with_id
from
paddle.fluid.compiler
import
BuildStrategy
from
paddle.fluid.framework
import
_apply_pass
from
paddle.fluid.contrib.mixed_precision.decorator
import
AutoMixedPrecisionLists
from
paddle.fluid.contrib.mixed_precision.fp16_utils
import
rewrite_program
,
cast_model_to_fp16
from
paddle.fluid.dygraph.amp.auto_cast
import
_in_amp_guard
,
_in_pure_fp16_guard
from
paddle.fluid.contrib.mixed_precision.decorator
import
(
AutoMixedPrecisionLists
,
)
from
paddle.fluid.contrib.mixed_precision.fp16_utils
import
(
rewrite_program
,
cast_model_to_fp16
,
)
from
paddle.fluid.dygraph.amp.auto_cast
import
(
_in_amp_guard
,
_in_pure_fp16_guard
,
)
import
paddle.compat
as
cpt
from
paddle
import
_C_ops
,
_legacy_C_ops
...
...
@@ -64,7 +77,8 @@ class NestSequence(object):
var_ids
=
[]
for
idx
,
var
in
enumerate
(
self
.
__input_list
):
if
isinstance
(
var
,
(
framework
.
Variable
,
core
.
VarBase
,
core
.
eager
.
Tensor
)):
var
,
(
framework
.
Variable
,
core
.
VarBase
,
core
.
eager
.
Tensor
)
):
var_ids
.
append
(
idx
)
return
var_ids
...
...
@@ -77,15 +91,17 @@ class NestSequence(object):
warning_types
=
set
()
for
var
in
self
.
__input_list
:
if
not
isinstance
(
var
,
(
framework
.
Variable
,
core
.
VarBase
,
core
.
eager
.
Tensor
)
):
var
,
(
framework
.
Variable
,
core
.
VarBase
,
core
.
eager
.
Tensor
)
):
warning_types
.
add
(
type
(
var
))
if
warning_types
:
logging_utils
.
warn
(
"Output of traced function contains non-tensor type values: {}. "
"Currently, We don't support to update them while training and will return "
"what we first saw. Please try to return them as tensor."
.
format
(
list
(
warning_types
)))
"what we first saw. Please try to return them as tensor."
.
format
(
list
(
warning_types
)
)
)
@
property
def
var_ids
(
self
):
...
...
@@ -139,12 +155,9 @@ class PartialProgramLayer:
Layer: A Layer object that run all ops internally in static mode.
"""
def
__init__
(
self
,
main_program
,
inputs
,
outputs
,
parameters
=
None
,
**
kwargs
):
def
__init__
(
self
,
main_program
,
inputs
,
outputs
,
parameters
=
None
,
**
kwargs
):
super
(
PartialProgramLayer
,
self
).
__init__
()
self
.
_inputs
=
NestSequence
(
inputs
)
self
.
_outputs
=
NestSequence
(
outputs
,
need_check
=
True
)
...
...
@@ -167,7 +180,8 @@ class PartialProgramLayer:
# For AMP training
self
.
_amp_list
=
AutoMixedPrecisionLists
(
custom_white_list
=
custom_white_list
,
custom_black_list
=
custom_black_list
)
custom_black_list
=
custom_black_list
,
)
# program_id -> list(scope)
self
.
_scope_cache
=
{}
...
...
@@ -188,10 +202,6 @@ class PartialProgramLayer:
else
:
return
core
.
Scope
()
@
LazyInitialized
def
__fake_vars
(
self
):
return
_create_fake_var
()
@
LazyInitialized
def
_double_grads
(
self
):
return
self
.
_get_double_grads
(
self
.
_origin_main_program
)
...
...
@@ -203,7 +213,8 @@ class PartialProgramLayer:
return
self
.
_origin_main_program
.
clone
(
for_test
=
is_infer_mode
)
else
:
train_program
=
self
.
_append_backward_desc
(
self
.
_origin_main_program
)
self
.
_origin_main_program
)
# Note: Only set grad type once after initializing train program. So we put it here.
self
.
_set_grad_type
(
self
.
_params
,
train_program
)
return
train_program
...
...
@@ -223,16 +234,18 @@ class PartialProgramLayer:
@
switch_to_static_graph
def
_create_pure_fp16_program
(
self
,
is_infer_mode
=
False
):
pure_fp16_program
=
self
.
_origin_main_program
.
clone
(
for_test
=
is_infer_mode
)
for_test
=
is_infer_mode
)
with
program_guard
(
pure_fp16_program
):
cast_model_to_fp16
(
pure_fp16_program
,
self
.
_amp_list
,
use_fp16_guard
=
False
)
cast_model_to_fp16
(
pure_fp16_program
,
self
.
_amp_list
,
use_fp16_guard
=
False
)
if
is_infer_mode
:
return
pure_fp16_program
else
:
train_pure_fp16_program
=
self
.
_append_backward_desc
(
pure_fp16_program
)
pure_fp16_program
)
self
.
_set_grad_type
(
self
.
_params
,
train_pure_fp16_program
)
return
train_pure_fp16_program
...
...
@@ -240,23 +253,27 @@ class PartialProgramLayer:
def
_create_forward_backward_train_program
(
self
):
whole_program
=
self
.
_create_program
()
forward_end_op_index
=
self
.
_infer_program
.
desc
.
block
(
0
).
op_size
()
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
@
switch_to_static_graph
def
_create_forward_backward_train_amp_program
(
self
):
whole_program
=
self
.
_create_amp_program
()
forward_end_op_index
=
self
.
_infer_amp_program
.
desc
.
block
(
0
).
op_size
()
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
@
switch_to_static_graph
def
_create_forward_backward_train_pure_fp16_program
(
self
):
whole_program
=
self
.
_create_pure_fp16_program
()
forward_end_op_index
=
self
.
_infer_pure_fp16_program
.
desc
.
block
(
0
).
op_size
()
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
0
).
op_size
()
return
self
.
_get_forward_backward_program_form
(
whole_program
,
forward_end_op_index
)
@
LazyInitialized
def
_train_program
(
self
):
...
...
@@ -352,8 +369,9 @@ class PartialProgramLayer:
@
LazyInitialized
def
_train_program_id
(
self
):
program_id
=
_hash_with_id
(
self
.
_train_program
,
self
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
return
program_id
@
LazyInitialized
...
...
@@ -363,8 +381,9 @@ class PartialProgramLayer:
@
LazyInitialized
def
_train_amp_program_id
(
self
):
program_id
=
_hash_with_id
(
self
.
_train_amp_program
,
self
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
return
program_id
@
LazyInitialized
...
...
@@ -374,8 +393,9 @@ class PartialProgramLayer:
@
LazyInitialized
def
_train_pure_fp16_program_id
(
self
):
program_id
=
_hash_with_id
(
self
.
_train_pure_fp16_program
,
self
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
core
.
_set_cached_executor_build_strategy
(
program_id
,
self
.
_build_strategy
)
return
program_id
@
LazyInitialized
...
...
@@ -411,8 +431,9 @@ class PartialProgramLayer:
return
main_program
def
prepare_gradient_aggregation
(
self
,
start_idx
,
main_program
,
target_program
):
def
prepare_gradient_aggregation
(
self
,
start_idx
,
main_program
,
target_program
):
"""
Why we need add gradient aggregation operation ?
In some cases, if non leaf nodes are used as output, gradient overwriting will occur, such as
...
...
@@ -431,7 +452,7 @@ class PartialProgramLayer:
"""
if
not
isinstance
(
var
,
framework
.
Variable
)
or
var
.
type
not
in
[
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
]:
return
False
if
var
.
dtype
not
in
[
paddle
.
float32
,
paddle
.
float64
]:
...
...
@@ -448,20 +469,28 @@ class PartialProgramLayer:
new_grad_name
=
var
.
name
+
suffix
+
"@GRAD"
finded_ops
=
list
(
filter
(
lambda
x
:
x
[
0
]
>=
start_idx
and
any
([
lambda
x
:
x
[
0
]
>=
start_idx
and
any
(
[
out_arg
==
var_grad_name
for
out_arg
in
x
[
1
].
output_arg_names
]),
enumerate
(
target_program
.
block
(
0
).
ops
)))
]
),
enumerate
(
target_program
.
block
(
0
).
ops
),
)
)
# len(finded_ops) may equals zero when stop_gradient works.
# len(finded_ops) may > 1, because we may have fill_constant op.
if
len
(
finded_ops
)
==
0
:
return
None
# step1: create a new var named var.name@GRAD
target_program
.
block
(
0
).
create_var
(
name
=
new_grad_name
,
target_program
.
block
(
0
).
create_var
(
name
=
new_grad_name
,
type
=
var
.
type
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
shape
=
var
.
shape
,
)
# step2: rename the var.name@GRAD to var.name@GRAD@dy2static
for
idx
,
op
in
finded_ops
:
op
.
_rename_input
(
var_grad_name
,
new_grad_name
)
...
...
@@ -472,11 +501,13 @@ class PartialProgramLayer:
finded_ops
[
-
1
][
0
]
+
1
,
type
=
'sum'
,
inputs
=
{
'X'
:
[
var_grad_name
,
new_grad_name
]},
outputs
=
{
"Out"
:
var_grad_name
})
outputs
=
{
"Out"
:
var_grad_name
},
)
return
None
to_processed_vars
=
list
(
filter
(
_need_aggregation
,
self
.
_outputs
.
tolist
()))
filter
(
_need_aggregation
,
self
.
_outputs
.
tolist
())
)
for
_var
in
to_processed_vars
:
_insert_aggregation_ops_for_var
(
target_program
,
_var
)
...
...
@@ -489,11 +520,12 @@ class PartialProgramLayer:
if
isinstance
(
out
,
framework
.
Variable
):
targets
.
append
(
program
.
global_block
().
var
(
out
.
name
))
if
targets
and
self
.
_params
:
if
targets
:
backward
.
gradients
(
targets
=
targets
,
inputs
=
[])
start_idx
=
len
(
main_program
.
block
(
0
).
ops
)
+
2
*
len
(
self
.
_outputs
.
tolist
())
start_idx
=
len
(
main_program
.
block
(
0
).
ops
)
+
2
*
len
(
self
.
_outputs
.
tolist
()
)
self
.
prepare_gradient_aggregation
(
start_idx
,
main_program
,
program
)
...
...
@@ -512,7 +544,10 @@ class PartialProgramLayer:
found_param
=
False
for
block
in
program
.
blocks
:
for
op
in
block
.
ops
:
if
param
.
name
in
op
.
input_arg_names
or
param
.
name
in
op
.
output_arg_names
:
if
(
param
.
name
in
op
.
input_arg_names
or
param
.
name
in
op
.
output_arg_names
):
required_params
.
append
(
param
)
found_param
=
True
break
...
...
@@ -529,15 +564,21 @@ class PartialProgramLayer:
var_desc
=
block
.
vars
[
name
].
desc
var_base
=
None
if
not
framework
.
_in_eager_mode_
:
var_base
=
core
.
VarBase
(
var_desc
.
dtype
(),
var_base
=
core
.
VarBase
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
)
var_desc
.
type
(),
False
,
)
else
:
var_base
=
core
.
eager
.
Tensor
(
var_desc
.
dtype
(),
var_base
=
core
.
eager
.
Tensor
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
)
var_desc
.
type
(),
False
,
)
double_grads
.
append
(
var_base
)
return
self
.
_valid_vars
(
double_grads
)
...
...
@@ -557,36 +598,62 @@ class PartialProgramLayer:
attrs
=
[
'global_block'
,
self
.
program
.
desc
.
block
(
0
),
'start_op_index'
,
0
,
'end_op_index'
,
self
.
_get_end_op_index
(),
'is_test'
,
not
self
.
training
,
'program_id'
,
self
.
program_id
self
.
program
.
desc
.
block
(
0
),
'start_op_index'
,
0
,
'end_op_index'
,
self
.
_get_end_op_index
(),
'is_test'
,
not
self
.
training
,
'program_id'
,
self
.
program_id
,
]
if
self
.
_cuda_graph_capture_mode
:
attrs
.
extend
(
(
'cuda_graph_capture_mode'
,
self
.
_cuda_graph_capture_mode
,
'cuda_graph_pool_id'
,
self
.
_cuda_graph_pool_id
))
use_interpretorcore
=
_is_enable_standalone_executor
(
)
and
_is_dy2st_enable_standalone_executor
()
(
'cuda_graph_capture_mode'
,
self
.
_cuda_graph_capture_mode
,
'cuda_graph_pool_id'
,
self
.
_cuda_graph_pool_id
,
)
)
use_interpretorcore
=
(
_is_enable_standalone_executor
()
and
_is_dy2st_enable_standalone_executor
()
)
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
if
use_interpretorcore
:
attrs
.
extend
(
(
'forward_global_block'
,
self
.
forward_program
.
desc
.
block
(
0
),
'backward_global_block'
,
self
.
backward_program
.
desc
.
block
(
0
)))
(
'forward_global_block'
,
self
.
forward_program
.
desc
.
block
(
0
),
'backward_global_block'
,
self
.
backward_program
.
desc
.
block
(
0
),
)
)
_legacy_C_ops
.
run_program
(
self
.
_valid_vars
(
in_vars
),
self
.
_valid_vars
(
self
.
_params
),
self
.
_valid_vars
(
in_vars
),
self
.
_valid_vars
(
self
.
_params
),
self
.
_valid_vars
(
out_vars
),
self
.
_create_scope_vec
(
program_id
=
self
.
program_id
,
use_scope_cache
=
True
),
self
.
_double_grads
,
self
.
_cuda_graph_vec
,
*
attrs
)
self
.
_create_scope_vec
(
program_id
=
self
.
program_id
,
use_scope_cache
=
True
),
self
.
_double_grads
,
self
.
_cuda_graph_vec
,
*
attrs
)
else
:
_legacy_C_ops
.
run_program
(
self
.
_valid_vars
(
in_vars
),
_legacy_C_ops
.
run_program
(
self
.
_valid_vars
(
in_vars
),
self
.
_valid_vars
(
self
.
_params
),
self
.
_valid_vars
(
out_vars
),
self
.
_create_scope_vec
(),
self
.
_double_grads
,
self
.
_cuda_graph_vec
,
*
attrs
)
self
.
_double_grads
,
self
.
_cuda_graph_vec
,
*
attrs
)
restored_nest_out
=
self
.
_restore_out
(
out_vars
)
return
self
.
_remove_no_value
(
restored_nest_out
)
...
...
@@ -594,9 +661,11 @@ class PartialProgramLayer:
if
_in_pure_fp16_guard
():
for
i
,
var
in
enumerate
(
in_vars
):
name
=
var
.
name
if
(
self
.
program
.
global_block
().
has_var
(
name
)
if
(
self
.
program
.
global_block
().
has_var
(
name
)
and
self
.
program
.
global_block
().
var
(
name
).
dtype
==
paddle
.
float16
):
==
paddle
.
float16
):
in_vars
[
i
]
=
var
.
astype
(
'float16'
)
in_vars
[
i
].
name
=
name
...
...
@@ -627,25 +696,32 @@ class PartialProgramLayer:
return
self
.
_infer_program
@
switch_to_static_graph
def
_get_forward_backward_program_form
(
self
,
whole_program
,
forward_end_op_index
):
def
_get_forward_backward_program_form
(
self
,
whole_program
,
forward_end_op_index
):
forward_builded_program
=
add_build_strategy_for
(
whole_program
,
0
,
forward_end_op_index
,
self
.
_build_strategy
)
whole_program
,
0
,
forward_end_op_index
,
self
.
_build_strategy
)
backward_start_op_index
=
forward_end_op_index
+
2
*
len
(
self
.
_outputs
.
var_ids
)
self
.
_outputs
.
var_ids
)
backward_end_op_index
=
whole_program
.
desc
.
block
(
0
).
op_size
()
backward_builded_program
=
add_build_strategy_for
(
whole_program
,
backward_start_op_index
,
backward_end_op_index
,
self
.
_build_strategy
)
self
.
_apply_inplace_pass
(
forward_builded_program
,
backward_builded_program
)
whole_program
,
backward_start_op_index
,
backward_end_op_index
,
self
.
_build_strategy
,
)
self
.
_apply_inplace_pass
(
forward_builded_program
,
backward_builded_program
)
return
[
forward_builded_program
,
backward_builded_program
]
def
_apply_inplace_pass
(
self
,
forward_program
,
backward_program
):
attr_types
=
{
"use_cuda"
:
"bool"
,
"mem_opt_skip_vars"
:
"list[str]"
,
"for_partial_block"
:
"bool"
"for_partial_block"
:
"bool"
,
}
empty_startup_program
=
paddle
.
static
.
Program
()
use_cuda
=
True
if
core
.
is_compiled_with_cuda
()
else
False
...
...
@@ -667,22 +743,33 @@ class PartialProgramLayer:
forward_mem_opt_skip_vars
.
append
(
var
.
desc
.
name
())
backward_mem_opt_skip_vars
.
append
(
var
.
desc
.
name
())
for
var_name
in
core
.
parse_safe_eager_deletion_skip_vars
(
backward_program
.
desc
):
backward_program
.
desc
):
forward_mem_opt_skip_vars
.
append
(
var_name
)
attrs
=
{
"use_cuda"
:
use_cuda
,
"mem_opt_skip_vars"
:
forward_mem_opt_skip_vars
,
"for_partial_block"
:
True
"for_partial_block"
:
True
,
}
_apply_pass
(
forward_program
,
empty_startup_program
,
"buffer_shared_inplace_pass"
,
attrs
,
attr_types
)
_apply_pass
(
forward_program
,
empty_startup_program
,
"buffer_shared_inplace_pass"
,
attrs
,
attr_types
,
)
attrs
=
{
"use_cuda"
:
use_cuda
,
"mem_opt_skip_vars"
:
backward_mem_opt_skip_vars
,
"for_partial_block"
:
True
"for_partial_block"
:
True
,
}
_apply_pass
(
backward_program
,
empty_startup_program
,
"buffer_shared_inplace_pass"
,
attrs
,
attr_types
)
_apply_pass
(
backward_program
,
empty_startup_program
,
"buffer_shared_inplace_pass"
,
attrs
,
attr_types
,
)
def
_prepare
(
self
,
inputs
):
"""
...
...
@@ -698,23 +785,28 @@ class PartialProgramLayer:
if
isinstance
(
value
,
np
.
ndarray
):
var
=
None
if
not
framework
.
_in_eager_mode_
:
var
=
core
.
VarBase
(
value
=
value
,
var
=
core
.
VarBase
(
value
=
value
,
name
=
self
.
_inputs
[
i
].
desc
.
name
(),
persistable
=
False
,
place
=
expected_place
,
zero_copy
=
True
)
zero_copy
=
True
,
)
else
:
var
=
core
.
eager
.
Tensor
(
value
=
value
,
var
=
core
.
eager
.
Tensor
(
value
=
value
,
name
=
self
.
_inputs
[
i
].
desc
.
name
(),
persistable
=
False
,
place
=
expected_place
,
zero_copy
=
True
)
zero_copy
=
True
,
)
elif
isinstance
(
value
,
(
core
.
VarBase
,
core
.
eager
.
Tensor
)):
# NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times
# into CUDAPlace when it's as input of multi Ops. so we move it in advance
# to avoid this problem.
if
value
.
stop_gradient
and
not
value
.
place
.
_equals
(
expected_place
):
expected_place
):
var
=
value
.
_copy_to
(
expected_place
,
False
)
var
.
stop_gradient
=
True
else
:
...
...
@@ -737,12 +829,21 @@ class PartialProgramLayer:
return
out_varbase_map
[
var_desc
.
name
()]
if
not
framework
.
_in_eager_mode_
:
var_base
=
core
.
VarBase
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
)
var_base
=
core
.
VarBase
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
,
)
else
:
var_base
=
core
.
eager
.
Tensor
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
)
var_base
=
core
.
eager
.
Tensor
(
var_desc
.
dtype
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
,
)
var_base
.
stop_gradient
=
var
.
stop_gradient
out_varbase_map
[
var_desc
.
name
()]
=
var_base
return
var_base
...
...
@@ -755,20 +856,30 @@ class PartialProgramLayer:
def
_create_scope_vec
(
self
,
program_id
=
None
,
use_scope_cache
=
False
):
# Hold forward variables
tmp_scope_vec
=
None
inner_scope
=
self
.
_get_scope
(
program_id
=
program_id
,
use_scope_cache
=
use_scope_cache
)
inner_scope
=
self
.
_get_scope
(
program_id
=
program_id
,
use_scope_cache
=
use_scope_cache
)
if
not
framework
.
_in_eager_mode_
:
tmp_scope_vec
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
tmp_scope_vec
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"program_out_scope"
,
core
.
VarDesc
.
VarType
.
STEP_SCOPES
,
True
)
core
.
VarDesc
.
VarType
.
STEP_SCOPES
,
True
,
)
tmp_scope_vec
.
value
().
set_scope
(
inner_scope
)
else
:
tmp_scope_vec
=
[
inner_scope
]
return
tmp_scope_vec
def
_create_cuda_graph_vec
(
self
):
var
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"cuda_graph"
,
core
.
VarDesc
.
VarType
.
RAW
,
True
)
var
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"cuda_graph"
,
core
.
VarDesc
.
VarType
.
RAW
,
True
,
)
var
.
stop_gradient
=
True
return
var
...
...
@@ -791,8 +902,9 @@ class PartialProgramLayer:
return
main_program
.
clone
(
for_test
=
True
)
def
_is_no_value
(
self
,
var
):
if
isinstance
(
var
,
(
core
.
VarBase
,
core
.
eager
.
Tensor
))
and
var
.
shape
==
[
1
]:
if
isinstance
(
var
,
(
core
.
VarBase
,
core
.
eager
.
Tensor
))
and
var
.
shape
==
[
1
]:
# NOTE: .numpy() will insert MemcpySync operation, it hits performance.
if
var
.
numpy
()[
0
]
==
RETURN_NO_VALUE_MAGIC_NUM
:
return
True
...
...
@@ -808,13 +920,14 @@ class PartialProgramLayer:
return
out_vars
elif
isinstance
(
out_vars
,
(
tuple
,
list
)):
if
isinstance
(
out_vars
,
tuple
):
res
=
tuple
(
var
for
var
in
out_vars
if
not
self
.
_is_no_value
(
var
))
res
=
tuple
(
var
for
var
in
out_vars
if
not
self
.
_is_no_value
(
var
)
)
else
:
# isinstance(out_vars, list)
res
=
[
var
for
var
in
out_vars
if
not
self
.
_is_no_value
(
var
)]
has_removed
=
(
len
(
out_vars
)
>
len
(
res
)
)
has_removed
=
len
(
out_vars
)
>
len
(
res
)
# len(out_vars) > len(res) means we have removed var. This is
# preventing out_vars is empty or just one element at the beginning
if
len
(
res
)
==
0
and
has_removed
:
...
...
@@ -835,7 +948,8 @@ class PartialProgramLayer:
for
param
in
params
:
grad_name
=
param
.
name
+
core
.
grad_var_suffix
()
grad_var
=
train_program
.
desc
.
block
(
0
).
find_var
(
cpt
.
to_bytes
(
grad_name
))
cpt
.
to_bytes
(
grad_name
)
)
# NOTE: cannot find var desc maybe no problem, such as in batch_norm
if
grad_var
is
None
:
continue
...
...
@@ -864,15 +978,18 @@ class PartialProgramLayer:
if
not
isinstance
(
self
.
_params
,
(
list
,
tuple
)):
raise
TypeError
(
"Type of self._params in PartialProgramLayer should be list or tuple, but received %s."
%
type
(
self
.
_params
))
%
type
(
self
.
_params
)
)
param_and_buffer_names_set
=
set
()
for
i
,
var
in
enumerate
(
self
.
_params
):
# self._params constains parameters and buffers with persistable=True.
if
not
isinstance
(
var
,
(
core
.
VarBase
,
core
.
eager
.
Tensor
)):
raise
TypeError
(
'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'
.
format
(
i
,
type
(
var
)))
'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'
.
format
(
i
,
type
(
var
)
)
)
param_and_buffer_names_set
.
add
(
var
.
name
)
for
block
in
main_program
.
blocks
:
...
...
@@ -886,15 +1003,11 @@ class PartialProgramLayer:
"
\n\t
Revise suggestion: "
"
\n\t\t
1. Please ensure all your sublayers are inheritted from nn.Layer."
"
\n\t\t
2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as List"
%
name
)
%
name
)
def
_valid_vars
(
self
,
vars
):
"""
Note: run_program_op.InferShape requires `X`/'Out' not be null.
But it's common in dy2static, fake varBase is created to handle the
problem.
"""
return
vars
if
vars
else
self
.
__fake_vars
return
vars
if
vars
else
None
def
_create_fake_var
():
...
...
@@ -903,13 +1016,23 @@ def _create_fake_var():
"""
if
not
framework
.
_in_eager_mode_
:
return
[
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
)
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
,
)
]
else
:
return
[
core
.
eager
.
Tensor
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
)
core
.
eager
.
Tensor
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
,
)
]
...
...
@@ -918,23 +1041,27 @@ def partial_program_from(concrete_program):
if
inputs
and
isinstance
(
inputs
[
0
],
layers
.
Layer
):
inputs
=
inputs
[
1
:]
return
PartialProgramLayer
(
concrete_program
.
main_program
,
inputs
,
return
PartialProgramLayer
(
concrete_program
.
main_program
,
inputs
,
concrete_program
.
outputs
,
concrete_program
.
parameters
,
**
concrete_program
.
kwargs
)
**
concrete_program
.
kwargs
)
@
switch_to_static_graph
def
add_build_strategy_for
(
program
,
start_op_index
,
end_op_index
,
build_strategy
=
None
):
if
(
start_op_index
<
end_op_index
):
def
add_build_strategy_for
(
program
,
start_op_index
,
end_op_index
,
build_strategy
=
None
):
if
start_op_index
<
end_op_index
:
compiled_program
=
paddle
.
static
.
CompiledProgram
(
core
.
Graph
(
program
.
desc
,
start_op_index
,
end_op_index
),
build_strategy
=
build_strategy
)
compiled_program
.
_compile
(
core
.
Scope
(),
framework
.
_current_expected_place
())
build_strategy
=
build_strategy
,
)
compiled_program
.
_compile
(
core
.
Scope
(),
framework
.
_current_expected_place
()
)
ir_graph
=
framework
.
IrGraph
(
compiled_program
.
_graph
)
builded_program
=
ir_graph
.
to_program
()
if
hasattr
(
compiled_program
.
_program
,
'lr_sheduler'
):
...
...
python/paddle/fluid/dygraph/layers.py
View file @
dbe08e9b
...
...
@@ -32,12 +32,25 @@ from . import parallel_helper
from
..
import
unique_name
from
paddle.fluid
import
core
from
.layer_object_helper
import
LayerObjectHelper
from
.layer_hooks
import
record_program_ops_pre_hook
,
set_op_customized_attrs_post_hook
,
LayerOpsRecoder
from
.base
import
program_desc_tracing_guard
,
param_guard
,
in_declarative_mode
,
_convert_into_variable
from
.layer_hooks
import
(
record_program_ops_pre_hook
,
set_op_customized_attrs_post_hook
,
LayerOpsRecoder
,
)
from
.base
import
(
program_desc_tracing_guard
,
param_guard
,
in_declarative_mode
,
_convert_into_variable
,
)
from
paddle.fluid
import
framework
from
..param_attr
import
ParamAttr
from
paddle.fluid.executor
import
Executor
,
global_scope
from
paddle.fluid.framework
import
_non_static_mode
,
convert_np_dtype_to_dtype_
,
in_dygraph_mode
from
paddle.fluid.framework
import
(
_non_static_mode
,
convert_np_dtype_to_dtype_
,
in_dygraph_mode
,
)
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
_current_expected_place
as
_get_device
from
paddle.fluid.core
import
VarDesc
...
...
@@ -67,7 +80,7 @@ def _addindent(string, indent):
class
HookRemoveHelper
(
object
):
"""
A HookRemoveHelper that can be used to remove hook.
"""
"""A HookRemoveHelper that can be used to remove hook."""
next_hook_id
=
0
...
...
@@ -153,13 +166,14 @@ class Layer(object):
def
train
(
self
):
"""
Sets this Layer and all its sublayers to training mode.
This only effects certain modules like `Dropout` and `BatchNorm`.
Returns:
None
Example
:
:
Example
s
:
.. code-block:: python
import paddle
...
...
@@ -236,6 +250,7 @@ class Layer(object):
def
apply
(
self
,
fn
):
"""
Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
as well as self. Typical use includes initializing the parameters of a model.
...
...
@@ -243,7 +258,7 @@ class Layer(object):
fn (function): a function to be applied to each sublayer
Returns:
Layer
:
self
Layer
,
self
Example::
.. code-block:: python
...
...
@@ -263,6 +278,7 @@ class Layer(object):
net.apply(init_weights)
print(net.state_dict())
"""
for
layer
in
self
.
children
():
layer
.
apply
(
fn
)
...
...
@@ -272,10 +288,12 @@ class Layer(object):
return
self
def
full_name
(
self
):
"""Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
"""
Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
Returns:
str
:
full name of this layer.
str
,
full name of this layer.
Example::
.. code-block:: python
...
...
@@ -297,7 +315,9 @@ class Layer(object):
return
self
.
_full_name
def
register_forward_post_hook
(
self
,
hook
):
"""Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
"""
Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
...
...
@@ -308,7 +328,7 @@ class Layer(object):
hook(function): a function registered as a forward post-hook
Returns:
HookRemoveHelper
:
a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
HookRemoveHelper
,
a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
Examples:
.. code-block:: python
...
...
@@ -340,13 +360,16 @@ class Layer(object):
# hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
assert (out0.numpy() == (out1.numpy()) * 2).any()
"""
hook_remove_helper
=
HookRemoveHelper
(
self
.
_forward_post_hooks
)
self
.
_forward_post_hooks
[
hook_remove_helper
.
_hook_id
]
=
hook
return
hook_remove_helper
def
register_forward_pre_hook
(
self
,
hook
):
"""Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
"""
Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
It should have the following form, `input` of the `hook` is `input` of the `Layer`,
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
...
...
@@ -359,7 +382,7 @@ class Layer(object):
hook(function): a function registered as a forward pre-hook
Returns:
HookRemoveHelper
:
a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
HookRemoveHelper
,
a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
Examples:
.. code-block:: python
...
...
@@ -398,12 +421,14 @@ class Layer(object):
self
.
_forward_pre_hooks
[
hook_remove_helper
.
_hook_id
]
=
hook
return
hook_remove_helper
def
create_parameter
(
self
,
def
create_parameter
(
self
,
shape
,
attr
=
None
,
dtype
=
None
,
is_bias
=
False
,
default_initializer
=
None
):
default_initializer
=
None
,
):
"""Create parameters for this layer.
Parameters:
...
...
@@ -443,12 +468,15 @@ class Layer(object):
temp_attr
=
copy
.
deepcopy
(
attr
)
if
isinstance
(
temp_attr
,
six
.
string_types
)
and
temp_attr
==
""
:
temp_attr
=
None
return
self
.
_helper
.
create_parameter
(
temp_attr
,
shape
,
dtype
,
is_bias
,
default_initializer
)
return
self
.
_helper
.
create_parameter
(
temp_attr
,
shape
,
dtype
,
is_bias
,
default_initializer
)
@
deprecated
(
since
=
"2.0.0"
,
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.nn.Layer.create_tensor"
,
reason
=
"New api in create_tensor, easier to use."
)
reason
=
"New api in create_tensor, easier to use."
,
)
def
create_variable
(
self
,
name
=
None
,
persistable
=
None
,
dtype
=
None
):
"""
...
...
@@ -488,14 +516,16 @@ class Layer(object):
if
name
is
not
None
:
var_name
=
"."
.
join
([
self
.
_full_name
,
name
])
else
:
var_name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
_full_name
,
"_generated_var"
]))
var_name
=
unique_name
.
generate
(
"."
.
join
([
self
.
_full_name
,
"_generated_var"
])
)
return
self
.
_helper
.
main_program
.
current_block
().
create_var
(
name
=
var_name
,
persistable
=
persistable
,
dtype
=
dtype
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
)
# TODO: Add more parameter list when we need them
def
create_tensor
(
self
,
name
=
None
,
persistable
=
None
,
dtype
=
None
):
...
...
@@ -538,20 +568,24 @@ class Layer(object):
if
name
is
not
None
:
var_name
=
"."
.
join
([
self
.
_full_name
,
name
])
else
:
var_name
=
unique_name
.
generate
(
"."
.
join
(
[
self
.
_full_name
,
"_generated_var"
]))
var_name
=
unique_name
.
generate
(
"."
.
join
([
self
.
_full_name
,
"_generated_var"
])
)
return
self
.
_helper
.
main_program
.
current_block
().
create_var
(
name
=
var_name
,
persistable
=
persistable
,
dtype
=
dtype
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
)
def
parameters
(
self
,
include_sublayers
=
True
):
"""Returns a list of all Parameters from current layer and its sub-layers.
"""
Returns a list of all Parameters from current layer and its sub-layers.
Returns:
list of Tensor
:
a list of Parameters.
list of Tensor
,
a list of Parameters.
Examples:
.. code-block:: python
...
...
@@ -563,13 +597,17 @@ class Layer(object):
"""
ret
=
[
param
for
_
,
param
in
self
.
named_parameters
(
include_sublayers
=
include_sublayers
)
param
for
_
,
param
in
self
.
named_parameters
(
include_sublayers
=
include_sublayers
)
]
return
ret
def
children
(
self
):
"""Returns an iterator over immediate children layers.
"""
Returns an iterator over immediate children layers.
Yields:
Layer: a child layer
...
...
@@ -619,13 +657,15 @@ class Layer(object):
yield
name
,
layer
def
sublayers
(
self
,
include_self
=
False
):
"""Returns a list of sub layers.
"""
Returns a list of sub layers.
Parameters:
include_self(bool, optional): Whether return self as sublayers. Default: False
Returns:
list of Layer
:
a list of sub layers.
list of Layer
,
a list of sub layers.
Examples:
.. code-block:: python
...
...
@@ -678,9 +718,11 @@ class Layer(object):
"""
params_set
=
set
()
named_sublayers
=
self
.
named_sublayers
(
prefix
=
prefix
,
include_self
=
True
)
if
include_sublayers
else
zip
(
[
prefix
],
[
self
])
named_sublayers
=
(
self
.
named_sublayers
(
prefix
=
prefix
,
include_self
=
True
)
if
include_sublayers
else
zip
([
prefix
],
[
self
])
)
for
layer_prefix
,
sublayer
in
named_sublayers
:
params
=
sublayer
.
_parameters
.
items
()
for
key
,
param
in
params
:
...
...
@@ -724,9 +766,9 @@ class Layer(object):
if
layer
is
None
:
continue
layer_prefix
=
prefix
+
(
'.'
if
prefix
else
''
)
+
key
for
p
,
l
in
layer
.
named_sublayers
(
prefix
=
layer_prefix
,
include_self
=
True
,
layers_set
=
layers_set
):
for
p
,
l
in
layer
.
named_sublayers
(
prefix
=
layer_prefix
,
include_self
=
True
,
layers_set
=
layers_set
):
yield
p
,
l
def
register_buffer
(
self
,
name
,
tensor
,
persistable
=
True
):
...
...
@@ -769,25 +811,32 @@ class Layer(object):
if
'_buffers'
not
in
self
.
__dict__
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
"super(YourLayer, self).__init__() should be called first"
)
elif
not
isinstance
(
name
,
six
.
string_types
):
raise
TypeError
(
"The name of buffer should be a string, but received {}."
.
format
(
type
(
name
).
__name__
))
"The name of buffer should be a string, but received {}."
.
format
(
type
(
name
).
__name__
)
)
elif
'.'
in
name
:
raise
KeyError
(
"The name of buffer can not contain `.`, "
"because when you access the newly added buffer in the "
"form of `self.**.**`, it will cause AttributeError."
)
"form of `self.**.**`, it will cause AttributeError."
)
elif
name
==
''
:
raise
KeyError
(
"The name of buffer can not be empty."
)
elif
hasattr
(
self
,
name
)
and
name
not
in
self
.
_buffers
:
raise
KeyError
(
"attribute '{}' already exists."
.
format
(
name
))
elif
tensor
is
not
None
and
not
(
type
(
tensor
)
==
core
.
VarBase
or
type
(
tensor
)
==
core
.
eager
.
Tensor
):
elif
tensor
is
not
None
and
not
(
type
(
tensor
)
==
core
.
VarBase
or
type
(
tensor
)
==
core
.
eager
.
Tensor
):
raise
TypeError
(
"The registered buffer should be a Paddle.Tensor, but received {}."
.
format
(
type
(
tensor
).
__name__
))
"The registered buffer should be a Paddle.Tensor, but received {}."
.
format
(
type
(
tensor
).
__name__
)
)
else
:
self
.
_buffers
[
name
]
=
tensor
if
persistable
:
...
...
@@ -797,13 +846,14 @@ class Layer(object):
def
buffers
(
self
,
include_sublayers
=
True
):
"""
Returns a list of all buffers from current layer and its sub-layers.
Parameters:
include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True
Returns:
list of Tensor
:
a list of buffers.
list of Tensor
,
a list of buffers.
Examples:
.. code-block:: python
...
...
@@ -820,8 +870,10 @@ class Layer(object):
"""
ret
=
[
buffer
for
_
,
buffer
in
self
.
named_buffers
(
include_sublayers
=
include_sublayers
)
buffer
for
_
,
buffer
in
self
.
named_buffers
(
include_sublayers
=
include_sublayers
)
]
return
ret
...
...
@@ -862,9 +914,11 @@ class Layer(object):
"""
buffers_set
=
set
()
named_sublayers
=
self
.
named_sublayers
(
prefix
=
prefix
,
include_self
=
True
)
if
include_sublayers
else
zip
(
[
prefix
],
[
self
])
named_sublayers
=
(
self
.
named_sublayers
(
prefix
=
prefix
,
include_self
=
True
)
if
include_sublayers
else
zip
([
prefix
],
[
self
])
)
for
layer_prefix
,
sublayer
in
named_sublayers
:
buffers
=
sublayer
.
_buffers
.
items
()
for
key
,
buffer
in
buffers
:
...
...
@@ -910,7 +964,7 @@ class Layer(object):
hook_result
=
forward_pre_hook
(
self
,
inputs
)
if
hook_result
is
not
None
:
if
not
isinstance
(
hook_result
,
tuple
):
hook_result
=
(
hook_result
,
)
hook_result
=
(
hook_result
,)
inputs
=
hook_result
if
not
self
.
_built
:
...
...
@@ -920,16 +974,20 @@ class Layer(object):
# TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel
# to realize the parameter synchronization among multiply cards.
if
parallel_helper
.
_is_data_parallel_mode
(
)
and
paddle
.
is_compiled_with_xpu
():
if
(
parallel_helper
.
_is_data_parallel_mode
()
and
paddle
.
is_compiled_with_xpu
()
):
parallel_helper
.
_broadcast_parameters
(
self
.
_parameters
.
values
())
self
.
_parameters
.
values
()
)
self
.
_built
=
True
if
in_profiler_mode
():
with
profiler
.
RecordEvent
(
self
.
__class__
.
__name__
,
profiler
.
TracerEventType
.
Forward
):
with
profiler
.
RecordEvent
(
self
.
__class__
.
__name__
,
profiler
.
TracerEventType
.
Forward
):
outputs
=
self
.
forward
(
*
inputs
,
**
kwargs
)
else
:
outputs
=
self
.
forward
(
*
inputs
,
**
kwargs
)
...
...
@@ -942,8 +1000,14 @@ class Layer(object):
return
outputs
def
__call__
(
self
,
*
inputs
,
**
kwargs
):
if
(
not
in_declarative_mode
())
and
(
not
self
.
_forward_pre_hooks
)
\
and
(
not
self
.
_forward_post_hooks
)
and
(
not
self
.
_built
)
and
in_dygraph_mode
()
and
(
not
in_profiler_mode
()):
if
(
(
not
in_declarative_mode
())
and
(
not
self
.
_forward_pre_hooks
)
and
(
not
self
.
_forward_post_hooks
)
and
(
not
self
.
_built
)
and
in_dygraph_mode
()
and
(
not
in_profiler_mode
())
):
self
.
_build_once
(
*
inputs
,
**
kwargs
)
return
self
.
forward
(
*
inputs
,
**
kwargs
)
else
:
...
...
@@ -964,7 +1028,9 @@ class Layer(object):
raise
ValueError
(
"Layer shouldn't implement backward"
)
def
add_sublayer
(
self
,
name
,
sublayer
):
"""Adds a sub Layer instance.
"""
Adds a sub Layer instance.
Added sublayer can be accessed by self.name
...
...
@@ -972,7 +1038,7 @@ class Layer(object):
name(str): name of this sublayer.
sublayer(Layer): an instance of Layer.
Returns:
Layer
:
the sublayer passed in.
Layer
,
the sublayer passed in.
Examples:
.. code-block:: python
...
...
@@ -999,8 +1065,9 @@ class Layer(object):
model = MySequential(fc1, fc2)
for prefix, layer in model.named_sublayers():
print(prefix, layer)
"""
assert
(
isinstance
(
sublayer
,
Layer
)
or
sublayer
==
None
)
assert
isinstance
(
sublayer
,
Layer
)
or
sublayer
==
None
self
.
_sub_layers
[
name
]
=
sublayer
return
sublayer
...
...
@@ -1014,7 +1081,7 @@ class Layer(object):
name(str): name of this sublayer.
parameter(Parameter): an instance of Parameter.
Returns:
Parameter
:
the parameter passed in.
Parameter
,
the parameter passed in.
Examples:
.. code-block:: python
...
...
@@ -1037,32 +1104,42 @@ class Layer(object):
"""
if
'_parameters'
not
in
self
.
__dict__
:
raise
RuntimeError
(
"super(YourLayer, self).__init__() should be called firstly."
)
"super(YourLayer, self).__init__() should be called firstly."
)
elif
not
isinstance
(
name
,
six
.
string_types
):
raise
TypeError
(
"The name of parameter should be a string, but received {}."
.
format
(
type
(
name
).
__name__
))
"The name of parameter should be a string, but received {}."
.
format
(
type
(
name
).
__name__
)
)
elif
'.'
in
name
:
raise
KeyError
(
"The name of parameter can not contain `.`, "
"because when you access the newly added parameter in the "
"form of `self.**.**`, it will cause AttributeError."
)
"form of `self.**.**`, it will cause AttributeError."
)
elif
name
==
''
:
raise
KeyError
(
"The name of parameter can not be empty."
)
elif
hasattr
(
self
,
name
)
and
name
not
in
self
.
_parameters
:
raise
KeyError
(
"The parameter '{}' already exists."
.
format
(
name
))
elif
parameter
is
not
None
and
not
isinstance
(
parameter
,
framework
.
Parameter
):
elif
parameter
is
not
None
and
not
isinstance
(
parameter
,
framework
.
Parameter
):
raise
TypeError
(
"The parameter to be added should be a Parameter, but received {}."
.
format
(
type
(
parameter
).
__name__
))
"The parameter to be added should be a Parameter, but received {}."
.
format
(
type
(
parameter
).
__name__
)
)
else
:
if
parameter
is
None
:
self
.
_parameters
[
name
]
=
None
if
len
(
self
.
_loaddict_holder
)
>
0
:
assert
parameter
.
name
in
self
.
_loaddict_holder
,
"Parameter not found, Can't not find [ {} ] in state_dict"
.
format
(
parameter
.
name
)
assert
(
parameter
.
name
in
self
.
_loaddict_holder
),
"Parameter not found, Can't not find [ {} ] in state_dict"
.
format
(
parameter
.
name
)
parameter
.
set_value
(
self
.
_loaddict_holder
[
parameter
.
name
])
...
...
@@ -1081,37 +1158,50 @@ class Layer(object):
"""
def
is_already_registered
(
is_pre_hook
):
layers_hooks
=
self
.
_forward_pre_hooks
if
is_pre_hook
else
self
.
_forward_post_hooks
candidate_hook
=
record_program_ops_pre_hook
if
is_pre_hook
else
set_op_customized_attrs_post_hook
layers_hooks
=
(
self
.
_forward_pre_hooks
if
is_pre_hook
else
self
.
_forward_post_hooks
)
candidate_hook
=
(
record_program_ops_pre_hook
if
is_pre_hook
else
set_op_customized_attrs_post_hook
)
already_registed
=
False
if
layers_hooks
:
last_key
=
next
(
reversed
(
layers_hooks
))
already_registed
=
(
layers_hooks
[
last_key
]
==
candidate_hook
)
already_registed
=
layers_hooks
[
last_key
]
==
candidate_hook
return
already_registed
if
not
isinstance
(
attrs
,
dict
):
raise
TypeError
(
"attrs should be type(dict), but received {}"
.
format
(
type
(
attrs
).
__name__
))
type
(
attrs
).
__name__
)
)
# NOTE: Overwrite behavior for same key.
self
.
_customized_attrs
.
update
(
attrs
)
if
not
is_already_registered
(
is_pre_hook
=
True
):
pre_hook_helper
=
self
.
register_forward_pre_hook
(
record_program_ops_pre_hook
)
record_program_ops_pre_hook
)
assert
len
(
self
.
_op_recorder
.
hooks
)
==
0
self
.
_op_recorder
.
hooks
=
[
pre_hook_helper
]
# manually register post_hook to ensure it is inserted into the head.
if
not
is_already_registered
(
is_pre_hook
=
False
):
post_hook_helper
=
self
.
register_forward_post_hook
(
set_op_customized_attrs_post_hook
)
set_op_customized_attrs_post_hook
)
if
len
(
self
.
_forward_post_hooks
)
>
1
:
self
.
_forward_post_hooks
.
move_to_end
(
post_hook_helper
.
_hook_id
,
last
=
False
)
self
.
_forward_post_hooks
.
move_to_end
(
post_hook_helper
.
_hook_id
,
last
=
False
)
assert
len
(
self
.
_op_recorder
.
hooks
)
==
1
...
...
@@ -1144,7 +1234,6 @@ class Layer(object):
return
object
.
__getattribute__
(
self
,
name
)
def
__setattr__
(
self
,
name
,
value
):
def
_remove_if_exist
(
*
dicts
):
for
d
in
dicts
:
if
name
in
d
:
...
...
@@ -1156,10 +1245,14 @@ class Layer(object):
if
isinstance
(
value
,
framework
.
Parameter
):
if
params
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
"super(YourLayer, self).__init__() should be called first"
)
if
len
(
self
.
_loaddict_holder
)
>
0
:
assert
value
.
name
in
self
.
_loaddict_holder
,
"Parameter not found, Can't not find [ {} ] in state_dict"
.
format
(
value
.
name
)
assert
(
value
.
name
in
self
.
_loaddict_holder
),
"Parameter not found, Can't not find [ {} ] in state_dict"
.
format
(
value
.
name
)
value
.
set_value
(
self
.
_loaddict_holder
[
value
.
name
])
...
...
@@ -1168,9 +1261,10 @@ class Layer(object):
elif
params
is
not
None
and
name
in
params
:
if
value
is
not
None
:
raise
TypeError
(
"assignment to parameter '{}' should be of type Parameter or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
))
"assignment to parameter '{}' should be of type Parameter or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
)
)
params
[
name
]
=
None
else
:
layers
=
self
.
__dict__
.
get
(
'_sub_layers'
,
None
)
...
...
@@ -1185,9 +1279,10 @@ class Layer(object):
elif
layers
is
not
None
and
name
in
layers
:
if
value
is
not
None
:
raise
TypeError
(
"assignment to sublayer '{}' should be of type Layer or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
))
"assignment to sublayer '{}' should be of type Layer or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
)
)
layers
[
name
]
=
None
else
:
_buffers
=
self
.
__dict__
.
get
(
'_buffers'
,
None
)
...
...
@@ -1196,8 +1291,9 @@ class Layer(object):
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
_remove_if_exist
(
self
.
__dict__
,
self
.
_parameters
,
self
.
_sub_layers
)
_remove_if_exist
(
self
.
__dict__
,
self
.
_parameters
,
self
.
_sub_layers
)
# Set persistable=False by default. Only `register_buffer` can
# add a persistable buffer.
if
name
not
in
self
.
_buffers
:
...
...
@@ -1211,6 +1307,7 @@ class Layer(object):
# value via `assign`.
if
type
(
value
)
==
framework
.
Variable
:
from
paddle
import
assign
# Note(zhhsplendid): the condition below happens in PaddleGan model,
# but should all non-Variable _buffers[name] be re-assign? We
# should consider it in the future. I current wrote this as
...
...
@@ -1218,18 +1315,23 @@ class Layer(object):
if
in_declarative_mode
()
and
_buffers
[
name
]
is
None
:
raise
RuntimeError
(
'In Dy2stat, self.{0} is a buffer and self.{0} is '
'not allowed to be set to Variable when self.{0} is None.'
.
format
(
name
))
elif
_buffers
[
name
]
is
None
or
type
(
getattr
(
self
,
name
))
==
core
.
VarBase
:
'not allowed to be set to Variable when self.{0} is None.'
.
format
(
name
)
)
elif
(
_buffers
[
name
]
is
None
or
type
(
getattr
(
self
,
name
))
==
core
.
VarBase
):
_buffers
[
name
]
=
assign
(
value
)
else
:
assign
(
value
,
getattr
(
self
,
name
))
elif
value
is
not
None
:
raise
TypeError
(
"assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
))
"assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'"
.
format
(
name
,
type
(
value
).
__name__
)
)
else
:
# Assigning None will remove the buffer, but if re-assign a new varBase to it,
# it will be remarked as a buffer with same `persistable` attribute.
...
...
@@ -1316,10 +1418,12 @@ class Layer(object):
self
.
_state_dict_hooks
[
hook_remove_helper
.
_hook_id
]
=
hook
return
hook_remove_helper
def
_obtain_parameters_buffers
(
self
,
def
_obtain_parameters_buffers
(
self
,
destination
=
None
,
include_sublayers
=
True
,
structured_name_prefix
=
""
):
structured_name_prefix
=
""
,
):
"""
The difference from state_dict() is that state_dict_hook will not be called,
but the original types of parameters and buffers will be maintained.
...
...
@@ -1330,7 +1434,10 @@ class Layer(object):
if
data
is
not
None
:
destination
[
structured_name_prefix
+
name
]
=
data
for
name
,
buffer
in
self
.
_buffers
.
items
():
if
buffer
is
not
None
and
name
not
in
self
.
_non_persistable_buffer_names_set
:
if
(
buffer
is
not
None
and
name
not
in
self
.
_non_persistable_buffer_names_set
):
destination
[
structured_name_prefix
+
name
]
=
buffer
if
include_sublayers
:
...
...
@@ -1339,17 +1446,22 @@ class Layer(object):
destination_temp
=
destination
.
copy
()
destination_temp
.
update
(
layer_item
.
_obtain_parameters_buffers
(
destination_temp
,
include_sublayers
,
structured_name_prefix
+
layer_name
+
"."
))
destination_temp
,
include_sublayers
,
structured_name_prefix
+
layer_name
+
"."
,
)
)
destination
=
destination_temp
return
destination
def
_state_dict_impl
(
self
,
def
_state_dict_impl
(
self
,
destination
=
None
,
include_sublayers
=
True
,
structured_name_prefix
=
""
,
include_non_persistable_buffer
=
False
,
use_hook
=
True
):
use_hook
=
True
,
):
"""
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
...
...
@@ -1367,7 +1479,10 @@ class Layer(object):
destination
[
structured_name_prefix
+
name
]
=
data
for
name
,
buffer
in
self
.
_buffers
.
items
():
if
not
include_non_persistable_buffer
:
if
buffer
is
not
None
and
name
not
in
self
.
_non_persistable_buffer_names_set
:
if
(
buffer
is
not
None
and
name
not
in
self
.
_non_persistable_buffer_names_set
):
destination
[
structured_name_prefix
+
name
]
=
buffer
else
:
if
buffer
is
not
None
:
...
...
@@ -1379,9 +1494,13 @@ class Layer(object):
destination_temp
=
destination
.
copy
()
destination_temp
.
update
(
layer_item
.
_state_dict_impl
(
destination_temp
,
include_sublayers
,
destination_temp
,
include_sublayers
,
structured_name_prefix
+
layer_name
+
"."
,
include_non_persistable_buffer
,
use_hook
))
include_non_persistable_buffer
,
use_hook
,
)
)
destination
=
destination_temp
if
use_hook
:
for
state_dict_hook
in
self
.
_state_dict_hooks
.
values
():
...
...
@@ -1391,12 +1510,15 @@ class Layer(object):
return
destination
def
to_static_state_dict
(
self
,
def
to_static_state_dict
(
self
,
destination
=
None
,
include_sublayers
=
True
,
structured_name_prefix
=
""
,
use_hook
=
True
):
use_hook
=
True
,
):
'''
Get all parameters and buffers of current layer and its sub-layers. And set them into a dict
Parameters:
...
...
@@ -1405,7 +1527,7 @@ class Layer(object):
use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True
Retruns:
dict
:
a dict contains all the parameters and persistable buffers.
dict
,
a dict contains all the parameters and persistable buffers.
Examples:
.. code-block:: python
...
...
@@ -1423,13 +1545,16 @@ class Layer(object):
include_sublayers
=
include_sublayers
,
structured_name_prefix
=
structured_name_prefix
,
include_non_persistable_buffer
=
True
,
use_hook
=
use_hook
)
use_hook
=
use_hook
,
)
def
state_dict
(
self
,
def
state_dict
(
self
,
destination
=
None
,
include_sublayers
=
True
,
structured_name_prefix
=
""
,
use_hook
=
True
):
use_hook
=
True
,
):
'''
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
...
...
@@ -1457,7 +1582,8 @@ class Layer(object):
include_sublayers
=
include_sublayers
,
structured_name_prefix
=
structured_name_prefix
,
include_non_persistable_buffer
=
False
,
use_hook
=
use_hook
)
use_hook
=
use_hook
,
)
@
framework
.
deprecate_stat_dict
def
set_state_dict
(
self
,
state_dict
,
use_structured_name
=
True
):
...
...
@@ -1489,22 +1615,31 @@ class Layer(object):
state
=
state_dict
.
get
(
key
,
None
)
if
state
is
None
:
raise
ValueError
(
"{} is not found in the provided dict."
.
format
(
key
))
if
(
isinstance
(
state
,
dict
)
or
isinstance
(
state
,
list
)):
if
(
len
(
state
)
!=
len
(
param
)):
raise
ValueError
(
"{} receieves the length of {}, "
"{} is not found in the provided dict."
.
format
(
key
)
)
if
isinstance
(
state
,
dict
)
or
isinstance
(
state
,
list
):
if
len
(
state
)
!=
len
(
param
):
raise
ValueError
(
"{} receieves the length of {}, "
"but the expected shape is {}"
.
format
(
key
,
len
(
state
),
len
(
param
)))
key
,
len
(
state
),
len
(
param
)
)
)
else
:
return
param
,
state
else
:
state_shape
=
state
.
shape
()
if
inspect
.
ismethod
(
state
.
shape
)
else
state
.
shape
state_shape
=
(
state
.
shape
()
if
inspect
.
ismethod
(
state
.
shape
)
else
state
.
shape
)
if
list
(
state_shape
)
!=
list
(
param
.
shape
):
raise
ValueError
(
"{} receives a shape {}, but the expected shape is {}."
.
format
(
key
,
list
(
state_shape
),
list
(
param
.
shape
)))
"{} receives a shape {}, but the expected shape is {}."
.
format
(
key
,
list
(
state_shape
),
list
(
param
.
shape
)
)
)
return
param
,
state
matched_param_state
=
[]
...
...
@@ -1541,8 +1676,10 @@ class Layer(object):
executor
=
Executor
(
_get_device
()).
_default_executor
# restore parameter states
core
.
_create_loaded_parameter
(
[
param
for
param
,
state
in
matched_param_state
],
global_scope
(),
executor
)
[
param
for
param
,
state
in
matched_param_state
],
global_scope
(),
executor
,
)
for
param
,
state
in
matched_param_state
:
_set_var
(
param
,
state
)
...
...
@@ -1594,11 +1731,13 @@ class Layer(object):
# [ 0.33960250, 0.96878713]])
'''
return
self
.
_to_impl
(
device
=
device
,
return
self
.
_to_impl
(
device
=
device
,
dtype
=
dtype
,
blocking
=
blocking
,
include_sublayers
=
True
,
floating_only
=
False
)
floating_only
=
False
,
)
def
_apply
(
self
,
func
,
device
,
dtype
,
blocking
,
include_sublayers
=
True
):
if
include_sublayers
:
...
...
@@ -1612,8 +1751,9 @@ class Layer(object):
if
param
.
grad
is
not
None
:
with
no_grad
():
grad_applied
=
func
(
param
.
_grad_ivar
(),
device
,
dtype
,
blocking
)
grad_applied
=
func
(
param
.
_grad_ivar
(),
device
,
dtype
,
blocking
)
for
key
,
buf
in
self
.
_buffers
.
items
():
if
buf
is
not
None
:
...
...
@@ -1637,12 +1777,14 @@ class Layer(object):
# Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
# Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
waiting_alloc_memory
=
(
(
np
.
prod
(
t
.
shape
)
*
size_dtype
)
/
256
+
1
)
*
256
*
1.2
((
np
.
prod
(
t
.
shape
)
*
size_dtype
)
/
256
+
1
)
*
256
*
1.2
)
gpu_memory_available
=
core
.
gpu_memory_available
()
if
gpu_memory_available
<
waiting_alloc_memory
:
# Copy param / Tensor to cpu
t_used
=
t
.
_copy_to
(
paddle
.
CPUPlace
(),
blocking
)
# k-v type will error
t_used
=
t
.
_copy_to
(
paddle
.
CPUPlace
(),
blocking
)
# k-v type will error
# Release mem of t
t
.
value
().
get_tensor
().
_clear
()
else
:
...
...
@@ -1653,7 +1795,8 @@ class Layer(object):
# 2. cast param / Tensor to dtype
if
dtype
is
not
None
and
dtype
!=
t_used
.
dtype
:
with
paddle
.
fluid
.
framework
.
_dygraph_place_guard
(
place
=
t_used
.
place
):
place
=
t_used
.
place
):
t_casted
=
t_used
.
cast
(
dtype
=
dtype
)
else
:
t_casted
=
t_used
...
...
@@ -1671,12 +1814,14 @@ class Layer(object):
return
t
def
_to_impl
(
self
,
def
_to_impl
(
self
,
device
=
None
,
dtype
=
None
,
blocking
=
None
,
include_sublayers
=
True
,
floating_only
=
False
):
floating_only
=
False
,
):
'''
Cast the parameters and buffers of Layer by the give device, dtype and blocking.
...
...
@@ -1705,20 +1850,28 @@ class Layer(object):
if
device
is
not
None
:
if
isinstance
(
device
,
str
):
device
=
paddle
.
device
.
_convert_to_place
(
device
)
elif
isinstance
(
device
,
(
core
.
CPUPlace
,
core
.
CUDAPlace
,
core
.
CUDAPinnedPlace
,
core
.
XPUPlace
)):
elif
isinstance
(
device
,
(
core
.
CPUPlace
,
core
.
CUDAPlace
,
core
.
CUDAPinnedPlace
,
core
.
XPUPlace
,
),
):
pass
else
:
raise
ValueError
(
"device value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is "
+
type
(
device
).
__name__
)
+
type
(
device
).
__name__
)
if
blocking
is
None
:
blocking
=
True
else
:
assert
isinstance
(
blocking
,
bool
),
"blocking value error, must be the True, False or None"
blocking
,
bool
),
"blocking value error, must be the True, False or None"
def
transform
(
t
,
device
,
dtype
,
blocking
):
if
floating_only
and
(
not
paddle
.
is_floating_point
(
t
)):
...
...
python/paddle/fluid/framework.py
View file @
dbe08e9b
...
...
@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass):
@
six
.
add_metaclass
(
VariableMetaClass
)
class
Variable
(
object
):
"""
**Notes**:
**The constructor of Variable should not be invoked directly.**
**In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
Notes:
The constructor of Variable should not be invoked directly.
In Static Graph Mode: Please use ** `Block.create_var` ** to create a Static variable which has no data until being feed.
**
In Dygraph Mode: Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph variable with real data
**
In Dygraph Mode: Please use
** :ref:`api_fluid_dygraph_to_variable` **
to create a dygraph variable with real data
.
In Fluid, every input and output of an OP is a variable. In most
cases, variables are used for holding different kinds of data or training
...
...
@@ -1514,12 +1515,13 @@ class Variable(object):
def
detach
(
self
):
"""
Returns a new Variable, detached from the current graph.
It will share data with origin Variable and without tensor copy.
In addition, the detached Variable doesn't provide gradient propagation.
Returns:
( :ref:`api_guide_Variable_en` | dtype is same as current Variable)
:
The detached Variable.
( :ref:`api_guide_Variable_en` | dtype is same as current Variable)
,
The detached Variable.
Examples:
.. code-block:: python
...
...
@@ -1533,6 +1535,7 @@ class Variable(object):
# create a detached Variable
y = x.detach()
"""
assert
(
...
...
@@ -2085,6 +2088,7 @@ class Variable(object):
@
property
def
T
(
self
):
"""
Permute current Variable with its dimensions reversed.
If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`.
...
...
@@ -2103,6 +2107,7 @@ class Variable(object):
x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0]
print(x_T_np.shape)
# (5, 3, 2)
"""
if
len
(
self
.
shape
)
==
1
:
return
self
...
...
@@ -2141,7 +2146,7 @@ class Variable(object):
as ``out = assign(tensor)`` .
Returns:
Variable
:
The cloned Variable.
Variable
,
The cloned Variable.
Examples:
.. code-block:: python
...
...
@@ -2171,6 +2176,7 @@ class Variable(object):
def
_set_error_clip
(
self
,
error_clip
):
"""
Set the error_clip.
Args:
...
...
@@ -2178,11 +2184,13 @@ class Variable(object):
Returns:
None
"""
self
.
error_clip
=
error_clip
def
_set_info
(
self
,
key
,
value
):
"""
Set key-value information for this variable.
Args:
...
...
@@ -2191,6 +2199,7 @@ class Variable(object):
Returns:
None
"""
if
not
hasattr
(
self
,
"_info"
):
self
.
_info
=
{}
...
...
@@ -2198,6 +2207,7 @@ class Variable(object):
def
_get_info
(
self
,
key
):
"""
Get the information of this variable corresponding to key.
Args:
...
...
@@ -2205,6 +2215,7 @@ class Variable(object):
Returns:
object
"""
if
hasattr
(
self
,
"_info"
)
and
key
in
self
.
_info
:
return
self
.
_info
[
key
]
...
...
@@ -2212,7 +2223,9 @@ class Variable(object):
def
_slice_indices
(
self
,
slice
,
length
):
"""
Reference implementation for the slice.indices method.
"""
# Compute step and length as integers.
step
=
1
if
slice
.
step
is
None
else
slice
.
step
...
...
@@ -2383,7 +2396,7 @@ class Variable(object):
Default: None
Returns:
Tensor
:
the value in given scope.
Tensor
,
the value in given scope.
Examples:
.. code-block:: python
...
...
@@ -2438,6 +2451,7 @@ class Variable(object):
def
set_value
(
self
,
value
,
scope
=
None
):
'''
Set the value to the tensor in given scope.
Args:
...
...
@@ -2477,6 +2491,7 @@ class Variable(object):
if var.persistable:
t_load = paddle.load(path+var.name+'.pdtensor')
var.set_value(t_load)
'''
# The 'framework' is a low-level module, and 'executor'
...
...
@@ -2547,10 +2562,11 @@ class Variable(object):
def
size
(
self
):
"""
Returns the number of elements for current Variable, which is a int64 Variable with shape [1]
Returns:
Variable
:
the number of elements for current Variable
Variable
,
the number of elements for current Variable
Examples:
.. code-block:: python
...
...
@@ -2564,6 +2580,7 @@ class Variable(object):
# get the number of elements of the Variable
y = x.size()
"""
output
=
self
.
block
.
create_var
(
...
...
@@ -2578,23 +2595,27 @@ class Variable(object):
def
_set_attr
(
self
,
name
,
val
):
"""
Set the value of attribute by attribute's name.
Args:
name(str): the attribute name.
val(int|str|list): the value of the attribute.
"""
self
.
_update_desc_attr
(
name
,
val
)
def
_has_attr
(
self
,
name
):
"""
Whether this Variable has the attribute with the name `name` or not.
Args:
name(str): the attribute name.
Returns:
bool: True if has this attribute.
bool, True if has this attribute.
"""
return
self
.
desc
.
has_attr
(
name
)
...
...
@@ -2624,7 +2645,7 @@ class Variable(object):
name(str): the attribute name.
Returns:
int|str|list
:
The attribute value. The return value
int|str|list
,
The attribute value. The return value
can be any valid attribute type.
"""
return
self
.
desc
.
attr
(
name
)
...
...
@@ -3196,14 +3217,16 @@ class Operator(object):
def
input
(
self
,
name
):
r
"""
Get the input arguments according to the input parameter name.
Args:
name(str): The input parameter name.
Returns:
list
:
return the list of argument names that associated with \
list
,
return the list of argument names that associated with \
the specific parameter name.
"""
return
self
.
desc
.
input
(
name
)
...
...
python/paddle/fluid/layers/metric_op.py
View file @
dbe08e9b
...
...
@@ -20,7 +20,13 @@ from __future__ import print_function
import
warnings
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
,
_non_static_mode
,
_varbase_creator
,
_in_legacy_dygraph
,
in_dygraph_mode
from
..framework
import
(
Variable
,
_non_static_mode
,
_varbase_creator
,
_in_legacy_dygraph
,
in_dygraph_mode
,
)
from
..
import
core
from
..param_attr
import
ParamAttr
from
.
import
nn
...
...
@@ -33,22 +39,29 @@ __all__ = ['accuracy', 'auc']
def
accuracy
(
input
,
label
,
k
=
1
,
correct
=
None
,
total
=
None
):
"""
accuracy layer.
Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
This function computes the accuracy using the input and label.
If the correct label occurs in top k predictions, then correct will increment by one.
Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
Note:
the dtype of accuracy is determined by input. the input and label dtype can be different.
Args:
input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
The shape is ``[sample_number, class_dim]`` .
label(Tensor): The label of dataset. Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
correct(Tensor): The correct predictions count. A Tensor with type int64 or int32.
total(Tensor): The total entries count. A tensor with type int64 or int32.
k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.
Returns:
Tensor: The correct rate. A Tensor with type float32.
Tensor, The correct rate. A Tensor with type float32.
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.static as static
...
...
@@ -68,6 +81,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
fetch_list=[result[0]])
print(output)
#[array([0.], dtype=float32)]
"""
if
_non_static_mode
():
if
correct
is
None
:
...
...
@@ -76,15 +90,18 @@ def accuracy(input, label, k=1, correct=None, total=None):
total
=
_varbase_creator
(
dtype
=
"int32"
)
_k
=
k
.
numpy
().
item
(
0
)
if
isinstance
(
k
,
Variable
)
else
k
topk_out
,
topk_indices
=
_legacy_C_ops
.
top_k_v2
(
input
,
'k'
,
_k
,
'sorted'
,
False
)
_acc
,
_
,
_
=
_legacy_C_ops
.
accuracy
(
topk_out
,
topk_indices
,
label
,
correct
,
total
)
topk_out
,
topk_indices
=
_legacy_C_ops
.
top_k_v2
(
input
,
'k'
,
_k
,
'sorted'
,
False
)
_acc
,
_
,
_
=
_legacy_C_ops
.
accuracy
(
topk_out
,
topk_indices
,
label
,
correct
,
total
)
return
_acc
helper
=
LayerHelper
(
"accuracy"
,
**
locals
())
check_variable_and_dtype
(
input
,
'input'
,
[
'float16'
,
'float32'
,
'float64'
],
'accuracy'
)
check_variable_and_dtype
(
input
,
'input'
,
[
'float16'
,
'float32'
,
'float64'
],
'accuracy'
)
topk_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
inputs
=
{
"X"
:
[
input
]}
...
...
@@ -93,39 +110,38 @@ def accuracy(input, label, k=1, correct=None, total=None):
else
:
attrs
=
{
'k'
:
k
}
attrs
[
'sorted'
]
=
False
helper
.
append_op
(
type
=
"top_k_v2"
,
helper
.
append_op
(
type
=
"top_k_v2"
,
inputs
=
inputs
,
attrs
=
attrs
,
outputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
]
})
outputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
]},
)
acc_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float32"
)
if
correct
is
None
:
correct
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int32"
)
if
total
is
None
:
total
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int32"
)
helper
.
append_op
(
type
=
"accuracy"
,
inputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
],
"Label"
:
[
label
]
},
helper
.
append_op
(
type
=
"accuracy"
,
inputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
],
"Label"
:
[
label
]},
outputs
=
{
"Accuracy"
:
[
acc_out
],
"Correct"
:
[
correct
],
"Total"
:
[
total
],
})
},
)
return
acc_out
def
auc
(
input
,
def
auc
(
input
,
label
,
curve
=
'ROC'
,
num_thresholds
=
2
**
12
-
1
,
topk
=
1
,
slide_steps
=
1
,
ins_tag_weight
=
None
):
ins_tag_weight
=
None
,
):
"""
**Area Under the Curve (AUC) Layer**
...
...
@@ -216,13 +232,14 @@ def auc(input,
helper
=
LayerHelper
(
"auc"
,
**
locals
())
if
ins_tag_weight
is
None
:
ins_tag_weight
=
tensor
.
fill_constant
(
shape
=
[
1
,
1
],
dtype
=
"float32"
,
value
=
1.0
)
ins_tag_weight
=
tensor
.
fill_constant
(
shape
=
[
1
,
1
],
dtype
=
"float32"
,
value
=
1.0
)
check_variable_and_dtype
(
input
,
'input'
,
[
'float32'
,
'float64'
],
'auc'
)
check_variable_and_dtype
(
label
,
'label'
,
[
'int32'
,
'int64'
],
'auc'
)
check_variable_and_dtype
(
ins_tag_weight
,
'ins_tag_weight'
,
[
'float32'
,
'float64'
],
'auc'
)
check_variable_and_dtype
(
ins_tag_weight
,
'ins_tag_weight'
,
[
'float32'
,
'float64'
],
'auc'
)
auc_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float64"
)
batch_auc_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
...
...
@@ -236,62 +253,71 @@ def auc(input,
batch_stat_pos
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[(
1
+
slide_steps
)
*
(
num_thresholds
+
1
)
+
1
])
shape
=
[(
1
+
slide_steps
)
*
(
num_thresholds
+
1
)
+
1
],
)
batch_stat_neg
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[(
1
+
slide_steps
)
*
(
num_thresholds
+
1
)
+
1
])
shape
=
[(
1
+
slide_steps
)
*
(
num_thresholds
+
1
)
+
1
],
)
# for global auc
# Needn't maintain the batch id
stat_pos
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
1
,
num_thresholds
+
1
]
)
stat_neg
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
1
,
num_thresholds
+
1
]
)
stat_pos
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
1
,
num_thresholds
+
1
]
)
stat_neg
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
1
,
num_thresholds
+
1
]
)
for
var
in
[
batch_stat_pos
,
batch_stat_neg
,
stat_pos
,
stat_neg
]:
helper
.
set_variable_initializer
(
var
,
Constant
(
value
=
0.0
,
force_cpu
=
False
))
helper
.
set_variable_initializer
(
var
,
Constant
(
value
=
0.0
,
force_cpu
=
False
)
)
#"InsTagWeight": [ins_tag_weight]
#
"InsTagWeight": [ins_tag_weight]
# Batch AUC
helper
.
append_op
(
type
=
"auc"
,
helper
.
append_op
(
type
=
"auc"
,
inputs
=
{
"Predict"
:
[
input
],
"Label"
:
[
label
],
"StatPos"
:
[
batch_stat_pos
],
"StatNeg"
:
[
batch_stat_neg
]
"StatNeg"
:
[
batch_stat_neg
]
,
},
attrs
=
{
"curve"
:
curve
,
"num_thresholds"
:
num_thresholds
,
"slide_steps"
:
slide_steps
"slide_steps"
:
slide_steps
,
},
outputs
=
{
"AUC"
:
[
batch_auc_out
],
"StatPosOut"
:
[
batch_stat_pos
],
"StatNegOut"
:
[
batch_stat_neg
]
})
"StatNegOut"
:
[
batch_stat_neg
],
},
)
# Global AUC
helper
.
append_op
(
type
=
"auc"
,
helper
.
append_op
(
type
=
"auc"
,
inputs
=
{
"Predict"
:
[
input
],
"Label"
:
[
label
],
"StatPos"
:
[
stat_pos
],
"StatNeg"
:
[
stat_neg
]
"StatNeg"
:
[
stat_neg
]
,
},
attrs
=
{
"curve"
:
curve
,
"num_thresholds"
:
num_thresholds
,
"slide_steps"
:
0
"slide_steps"
:
0
,
},
outputs
=
{
"AUC"
:
[
auc_out
],
"StatPosOut"
:
[
stat_pos
],
"StatNegOut"
:
[
stat_neg
]
})
return
auc_out
,
batch_auc_out
,
[
batch_stat_pos
,
batch_stat_neg
,
stat_pos
,
stat_neg
]
"StatNegOut"
:
[
stat_neg
],
},
)
return
(
auc_out
,
batch_auc_out
,
[
batch_stat_pos
,
batch_stat_neg
,
stat_pos
,
stat_neg
],
)
python/paddle/fluid/layers/nn.py
View file @
dbe08e9b
This source diff could not be displayed because it is too large. You can
view the blob
instead.
python/paddle/fluid/tests/.gitignore
0 → 100644
View file @
dbe08e9b
image/
fit_a_line.model/
tmp
cuda_profiler.txt
python/paddle/fluid/tests/book/.gitignore
0 → 100644
View file @
dbe08e9b
*.inference.model
python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
View file @
dbe08e9b
...
...
@@ -13,18 +13,19 @@
# limitations under the License.
import
os
from
paddle.fluid
import
core
from
distutils.sysconfig
import
get_python_lib
from
distutils.core
import
setup
,
Extension
from
setuptools
import
Extension
,
setup
from
setuptools.command.build_ext
import
build_ext
from
paddle.fluid
import
core
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++
class
BuildExt
(
build_ext
):
def
build_extensions
(
self
):
if
'-Wstrict-prototypes'
in
self
.
compiler
.
compiler_so
:
self
.
compiler
.
compiler_so
.
remove
(
'-Wstrict-prototypes'
)
...
...
@@ -48,8 +49,9 @@ paddle_custom_kernel_include = [
os
.
path
.
join
(
site_packages_path
,
'paddle'
,
'include'
),
]
# include path third_party
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_BINARY_DIR'
],
'third_party'
)
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_BINARY_DIR'
],
'third_party'
)
paddle_custom_kernel_include
+=
[
os
.
path
.
join
(
compile_third_party_path
,
'install/gflags/include'
),
# gflags
os
.
path
.
join
(
compile_third_party_path
,
'install/glog/include'
),
# glog
...
...
@@ -69,10 +71,13 @@ custom_kernel_dot_module = Extension(
include_dirs
=
paddle_custom_kernel_include
,
library_dirs
=
paddle_custom_kernel_library_dir
,
libraries
=
libs
,
extra_compile_args
=
paddle_extra_compile_args
)
extra_compile_args
=
paddle_extra_compile_args
,
)
setup
(
name
=
'custom_kernel_dot_c'
,
setup
(
name
=
'custom_kernel_dot_c'
,
version
=
'1.0'
,
description
=
'custom kernel fot compiling'
,
cmdclass
=
{
'build_ext'
:
BuildExt
},
ext_modules
=
[
custom_kernel_dot_module
])
ext_modules
=
[
custom_kernel_dot_module
],
)
python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
View file @
dbe08e9b
...
...
@@ -14,18 +14,17 @@
import
os
import
site
from
paddle.fluid
import
core
from
distutils.sysconfig
import
get_python_lib
from
distutils.core
import
setup
,
Extension
from
setuptools
import
Extension
,
setup
from
setuptools.command.build_ext
import
build_ext
from
paddle.fluid
import
core
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++
class
BuildExt
(
build_ext
):
def
build_extensions
(
self
):
if
'-Wstrict-prototypes'
in
self
.
compiler
.
compiler_so
:
self
.
compiler
.
compiler_so
.
remove
(
'-Wstrict-prototypes'
)
...
...
@@ -46,12 +45,15 @@ if core.is_compiled_with_npu():
# include path
site_packages_path
=
site
.
getsitepackages
()
paddle_custom_kernel_include
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'include'
),
site_packages_path
))
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'include'
),
site_packages_path
)
)
# include path third_party
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_BINARY_DIR'
],
'third_party'
)
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_BINARY_DIR'
],
'third_party'
)
paddle_custom_kernel_include
+=
[
os
.
path
.
join
(
compile_third_party_path
,
'install/gflags/include'
),
# gflags
os
.
path
.
join
(
compile_third_party_path
,
'install/glog/include'
),
# glog
...
...
@@ -59,7 +61,8 @@ paddle_custom_kernel_include += [
# libs path
paddle_custom_kernel_library_dir
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'fluid'
),
site_packages_path
))
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'fluid'
),
site_packages_path
)
)
# libs
libs
=
[
':libpaddle.so'
]
...
...
@@ -70,10 +73,13 @@ custom_kernel_dot_module = Extension(
include_dirs
=
paddle_custom_kernel_include
,
library_dirs
=
paddle_custom_kernel_library_dir
,
libraries
=
libs
,
extra_compile_args
=
paddle_extra_compile_args
)
extra_compile_args
=
paddle_extra_compile_args
,
)
setup
(
name
=
'custom_kernel_dot'
,
setup
(
name
=
'custom_kernel_dot'
,
version
=
'1.0'
,
description
=
'custom kernel fot compiling'
,
cmdclass
=
{
'build_ext'
:
BuildExt
},
ext_modules
=
[
custom_kernel_dot_module
])
ext_modules
=
[
custom_kernel_dot_module
],
)
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
View file @
dbe08e9b
...
...
@@ -44,7 +44,7 @@ __global__ void relu_cuda_double_backward_kernel(const data_t* out_data,
data_t
*
ddout_data
,
int64_t
num
)
{
int64_t
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int64_t
i
=
num
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int64_t
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
ddout_data
[
i
]
=
ddx_data
[
i
]
*
(
out_data
[
i
]
>
static_cast
<
data_t
>
(
0.
)
?
static_cast
<
data_t
>
(
1.
)
:
static_cast
<
data_t
>
(
0.
));
...
...
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
View file @
dbe08e9b
...
...
@@ -21,6 +21,7 @@ import paddle.static as static
import
tempfile
import
subprocess
import
numpy
as
np
from
paddle
import
fluid
from
paddle.vision.transforms
import
Compose
,
Normalize
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
from
paddle.fluid.framework
import
_test_eager_guard
...
...
@@ -43,12 +44,9 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
return
out
.
numpy
(),
t
.
grad
.
numpy
()
def
custom_relu_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
,
test_infer
=
False
):
def
custom_relu_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
,
test_infer
=
False
):
paddle
.
enable_static
()
paddle
.
set_device
(
device
)
...
...
@@ -62,9 +60,11 @@ def custom_relu_static(func,
exe
=
static
.
Executor
()
exe
.
run
(
static
.
default_startup_program
())
# in static mode, x data has been covered by out
out_v
=
exe
.
run
(
static
.
default_main_program
(),
out_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'X'
:
np_x
},
fetch_list
=
[
out
.
name
])
fetch_list
=
[
out
.
name
],
)
paddle
.
disable_static
()
return
out_v
...
...
@@ -87,11 +87,11 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
# in static mode, x data has been covered by out
compiled_prog
=
static
.
CompiledProgram
(
static
.
default_main_program
()
).
with_data_parallel
(
loss_name
=
out
.
name
,
places
=
places
)
out_v
=
exe
.
run
(
compiled_prog
,
feed
=
{
'X'
:
np_x
},
fetch_list
=
[
out
.
name
]
)
static
.
default_main_program
()
).
with_data_parallel
(
loss_name
=
out
.
name
,
places
=
places
)
out_v
=
exe
.
run
(
compiled_prog
,
feed
=
{
'X'
:
np_x
},
fetch_list
=
[
out
.
name
]
)
paddle
.
disable_static
()
return
out_v
...
...
@@ -103,9 +103,9 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
with
static
.
scope_guard
(
static
.
Scope
()):
with
static
.
program_guard
(
static
.
Program
()):
# simple module
data
=
static
.
data
(
name
=
'data'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
data
=
static
.
data
(
name
=
'data'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
static
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
hidden
=
static
.
nn
.
fc
(
data
,
size
=
128
)
...
...
@@ -124,23 +124,21 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
# train
for
i
in
range
(
4
):
avg_loss_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
avg_loss
])
avg_loss_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
avg_loss
],
)
# save inference model
static
.
save_inference_model
(
path_prefix
,
[
data
],
[
predict
],
exe
)
# get train predict value
predict_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
predict
])
predict_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
predict
],
)
return
predict_v
...
...
@@ -151,30 +149,37 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
t
=
paddle
.
to_tensor
(
np_x
,
dtype
=
dtype
,
stop_gradient
=
False
)
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
.
stop_gradient
=
False
dx
=
paddle
.
grad
(
outputs
=
[
out
]
,
inputs
=
[
t
]
,
dx
=
paddle
.
grad
(
outputs
=
out
,
inputs
=
t
,
grad_outputs
=
paddle
.
ones_like
(
t
)
,
create_graph
=
True
,
retain_graph
=
True
)
retain_graph
=
True
,
)
dx
[
0
].
backward
()
ddout
=
paddle
.
grad
(
outputs
=
dx
[
0
],
inputs
=
out
.
grad
,
grad_outputs
=
paddle
.
ones_like
(
t
),
create_graph
=
False
,
)
assert
d
x
[
0
].
grad
is
not
None
return
dx
[
0
].
numpy
(),
d
x
[
0
].
grad
.
numpy
()
assert
d
dout
[
0
].
numpy
()
is
not
None
return
dx
[
0
].
numpy
(),
d
dout
[
0
]
.
numpy
()
class
TestNewCustomOpSetUpInstall
(
unittest
.
TestCase
):
def
setUp
(
self
):
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# compile, install the custom op egg into site-packages under background
if
os
.
name
==
'nt'
:
cmd
=
'cd /d {} && python custom_relu_setup.py install'
.
format
(
cur_dir
)
cur_dir
)
else
:
cmd
=
'cd {} && {} custom_relu_setup.py install'
.
format
(
cur_dir
,
sys
.
executable
)
cur_dir
,
sys
.
executable
)
run_cmd
(
cmd
)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
...
...
@@ -190,16 +195,18 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
custom_egg_path
=
[
x
for
x
in
os
.
listdir
(
site_dir
)
if
'custom_relu_module_setup'
in
x
]
assert
len
(
custom_egg_path
)
==
1
,
"Matched egg number is %d."
%
len
(
custom_egg_path
)
assert
len
(
custom_egg_path
)
==
1
,
"Matched egg number is %d."
%
len
(
custom_egg_path
)
sys
.
path
.
append
(
os
.
path
.
join
(
site_dir
,
custom_egg_path
[
0
]))
# usage: import the package directly
import
custom_relu_module_setup
# `custom_relu_dup` is same as `custom_relu_dup`
self
.
custom_ops
=
[
custom_relu_module_setup
.
custom_relu
,
custom_relu_module_setup
.
custom_relu_dup
custom_relu_module_setup
.
custom_relu_dup
,
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
...
...
@@ -222,13 +229,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
np
.
testing
.
assert_array_equal
(
out
,
pd_out
,
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
))
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
),
)
def
test_static_pe
(
self
):
for
device
in
self
.
devices
:
...
...
@@ -238,13 +248,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
pd_out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
np
.
testing
.
assert_array_equal
(
out
,
pd_out
,
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
))
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
),
)
def
func_dynamic
(
self
):
for
device
in
self
.
devices
:
...
...
@@ -253,20 +266,26 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
custom_op
,
device
,
dtype
,
x
,
False
)
np
.
testing
.
assert_array_equal
(
out
,
pd_out
,
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
))
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
),
)
np
.
testing
.
assert_array_equal
(
x_grad
,
pd_x_grad
,
err_msg
=
'custom op x grad: {},
\n
paddle api x grad: {}'
.
format
(
x_grad
,
pd_x_grad
))
err_msg
=
'custom op x grad: {},
\n
paddle api x grad: {}'
.
format
(
x_grad
,
pd_x_grad
),
)
def
test_dynamic
(
self
):
with
_test_eager_guard
():
...
...
@@ -279,22 +298,29 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
np_label
=
np
.
random
.
random
((
1
,
1
)).
astype
(
"int64"
)
path_prefix
=
"custom_op_inference/custom_relu"
for
device
in
self
.
devices
:
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
# load inference model
with
static
.
scope_guard
(
static
.
Scope
()):
exe
=
static
.
Executor
()
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
static
.
load_inference_model
(
path_prefix
,
exe
)
predict_infer
=
exe
.
run
(
inference_program
,
[
inference_program
,
feed_target_names
,
fetch_targets
,
]
=
static
.
load_inference_model
(
path_prefix
,
exe
)
predict_infer
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
np_data
},
fetch_list
=
fetch_targets
)
fetch_list
=
fetch_targets
,
)
np
.
testing
.
assert_array_equal
(
predict
,
predict_infer
,
err_msg
=
'custom op predict: {},
\n
custom op infer predict: {}'
.
format
(
predict
,
predict_infer
))
err_msg
=
'custom op predict: {},
\n
custom op infer predict: {}'
.
format
(
predict
,
predict_infer
),
)
paddle
.
disable_static
()
def
test_static_save_and_run_inference_predictor
(
self
):
...
...
@@ -304,62 +330,80 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
path_prefix
=
"custom_op_inference/custom_relu"
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
for
device
in
self
.
devices
:
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
# load inference model
config
=
Config
(
path_prefix
+
".pdmodel"
,
path_prefix
+
".pdiparams"
)
config
=
Config
(
path_prefix
+
".pdmodel"
,
path_prefix
+
".pdiparams"
)
predictor
=
create_predictor
(
config
)
input_tensor
=
predictor
.
get_input_handle
(
predictor
.
get_input_names
()[
0
])
predictor
.
get_input_names
()[
0
]
)
input_tensor
.
reshape
(
np_data
.
shape
)
input_tensor
.
copy_from_cpu
(
np_data
.
copy
())
predictor
.
run
()
output_tensor
=
predictor
.
get_output_handle
(
predictor
.
get_output_names
()[
0
])
predictor
.
get_output_names
()[
0
]
)
predict_infer
=
output_tensor
.
copy_to_cpu
()
self
.
assertTrue
(
np
.
isclose
(
predict
,
predict_infer
,
rtol
=
5e-5
).
any
(),
"custom op predict: {},
\n
custom op infer predict: {}"
.
format
(
predict
,
predict_infer
))
predict
,
predict_infer
),
)
paddle
.
disable_static
()
def
test_func_double_grad_dynamic
(
self
):
def
test_double_grad_dynamic
(
self
):
fluid
.
set_flags
({
"FLAGS_retain_grad_for_all_tensor"
:
True
})
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
if
device
==
'cpu'
and
dtype
==
'float16'
:
continue
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
out
,
dx_grad
=
custom_relu_double_grad_dynamic
(
self
.
custom_ops
[
0
],
device
,
dtype
,
x
)
self
.
custom_ops
[
0
],
device
,
dtype
,
x
)
pd_out
,
pd_dx_grad
=
custom_relu_double_grad_dynamic
(
self
.
custom_ops
[
0
],
device
,
dtype
,
x
,
False
)
self
.
custom_ops
[
0
],
device
,
dtype
,
x
,
False
)
np
.
testing
.
assert_array_equal
(
out
,
pd_out
,
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
))
out
,
pd_out
),
)
np
.
testing
.
assert_array_equal
(
dx_grad
,
pd_dx_grad
,
err_msg
=
'custom op dx grad: {},
\n
paddle api dx grad: {}'
.
format
(
dx_grad
,
pd_dx_grad
))
err_msg
=
'custom op dx grad: {},
\n
paddle api dx grad: {}'
.
format
(
dx_grad
,
pd_dx_grad
),
)
fluid
.
set_flags
({
"FLAGS_retain_grad_for_all_tensor"
:
False
})
def
test_with_dataloader
(
self
):
for
device
in
self
.
devices
:
paddle
.
set_device
(
device
)
# data loader
transform
=
Compose
(
[
Normalize
(
mean
=
[
127.5
],
std
=
[
127.5
],
data_format
=
'CHW'
)])
train_dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'train'
,
transform
=
transform
)
train_loader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
[
Normalize
(
mean
=
[
127.5
],
std
=
[
127.5
],
data_format
=
'CHW'
)]
)
train_dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'train'
,
transform
=
transform
)
train_loader
=
paddle
.
io
.
DataLoader
(
train_dataset
,
batch_size
=
64
,
shuffle
=
True
,
drop_last
=
True
,
num_workers
=
0
)
num_workers
=
0
,
)
for
batch_id
,
(
image
,
_
)
in
enumerate
(
train_loader
()):
out
=
self
.
custom_ops
[
0
](
image
)
...
...
@@ -368,7 +412,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
out
,
pd_out
,
err_msg
=
'custom op out: {},
\n
paddle api out: {}'
.
format
(
out
,
pd_out
))
out
,
pd_out
),
)
if
batch_id
==
5
:
break
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
0 → 100644
View file @
dbe08e9b
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle
class
Net
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
):
super
(
Net
,
self
).
__init__
()
@
paddle
.
jit
.
to_static
def
forward
(
self
,
x
):
out
=
x
+
1
return
out
class
TestBackwardWithoutParams
(
unittest
.
TestCase
):
def
test_run
(
self
):
net
=
Net
()
x
=
paddle
.
ones
([
2
,
2
])
x
.
stop_gradient
=
False
out
=
net
(
x
)
loss
=
paddle
.
mean
(
out
)
loss
.
backward
()
np
.
testing
.
assert_equal
(
x
.
grad
.
numpy
(),
np
.
full
(
x
.
shape
,
0.25
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py
View file @
dbe08e9b
...
...
@@ -292,7 +292,6 @@ def for_tuple_as_enumerate_value(x_array):
# 20. test for function in a class
class
ForwardContainsForLayer
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
):
super
(
ForwardContainsForLayer
,
self
).
__init__
()
self
.
high
=
5
...
...
@@ -328,8 +327,8 @@ def for_original_tuple():
# 23. for zip error
@
paddle
.
jit
.
to_static
(
input_spec
=
[
InputSpec
(
shape
=
[
None
,
10
]),
InputSpec
(
shape
=
[
None
,
10
])]
)
input_spec
=
[
InputSpec
(
shape
=
[
None
,
10
]),
InputSpec
(
shape
=
[
None
,
10
])]
)
def
for_zip_error
(
x
,
y
):
for
i
,
j
in
zip
(
x
,
y
):
a
=
i
+
j
...
...
@@ -338,8 +337,8 @@ def for_zip_error(x, y):
# 24. for zip
@
paddle
.
jit
.
to_static
(
input_spec
=
[
InputSpec
(
shape
=
[
2
,
10
]),
InputSpec
(
shape
=
[
2
,
10
])]
)
input_spec
=
[
InputSpec
(
shape
=
[
2
,
10
]),
InputSpec
(
shape
=
[
2
,
10
])]
)
def
for_zip
(
x
,
y
):
for
i
,
j
in
zip
(
x
,
y
):
a
=
i
+
j
...
...
@@ -347,10 +346,12 @@ def for_zip(x, y):
class
TestTransformBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
self
.
place
=
(
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
)
self
.
set_input
()
self
.
set_test_func
()
...
...
@@ -359,7 +360,8 @@ class TestTransformBase(unittest.TestCase):
def
set_test_func
(
self
):
raise
NotImplementedError
(
"For Enumerate test should implement set_test_func"
)
"For Enumerate test should implement set_test_func"
)
def
_run
(
self
,
to_static
):
program_translator
.
enable
(
to_static
)
...
...
@@ -374,22 +376,21 @@ class TestTransformBase(unittest.TestCase):
class
TestTransform
(
TestTransformBase
):
def
transformed_result_compare
(
self
):
dy_outs
=
self
.
get_dygraph_output
()
if
not
isinstance
(
dy_outs
,
(
tuple
,
list
)):
dy_outs
=
(
dy_outs
,
)
dy_outs
=
(
dy_outs
,)
self
.
dygraph_func
.
eval
()
st_outs
=
self
.
get_static_output
()
if
not
isinstance
(
st_outs
,
(
tuple
,
list
)):
st_outs
=
(
st_outs
,
)
st_outs
=
(
st_outs
,)
for
x
,
y
in
zip
(
dy_outs
,
st_outs
):
np
.
testing
.
assert_allclose
(
x
.
numpy
(),
y
.
numpy
(),
rtol
=
1e-05
)
class
TestTransformForOriginalList
(
TestTransform
):
def
_run
(
self
,
to_static
):
program_translator
.
enable
(
to_static
)
with
fluid
.
dygraph
.
guard
():
...
...
@@ -397,7 +398,6 @@ class TestTransformForOriginalList(TestTransform):
class
TestTransformError
(
TestTransformBase
):
def
transformed_error
(
self
,
etype
):
with
self
.
assertRaises
(
etype
):
dy_out
=
self
.
get_dygraph_output
()
...
...
@@ -405,7 +405,6 @@ class TestTransformError(TestTransformBase):
class
TestForInRange
(
TestTransform
):
def
set_input
(
self
):
self
.
input
=
np
.
array
([
5
])
...
...
@@ -417,7 +416,6 @@ class TestForInRange(TestTransform):
class
TestForIterList
(
TestTransform
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_iter_list
...
...
@@ -426,19 +424,16 @@ class TestForIterList(TestTransform):
class
TestForEnumerateSimple
(
TestForIterList
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_list
class
TestForInRangeWithBreak
(
TestForInRange
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_in_range_with_break
class
TestForIterVarNumpy
(
TestTransform
):
def
set_input
(
self
):
self
.
input
=
np
.
array
([
1
,
2
,
3
,
4
,
5
])
...
...
@@ -450,103 +445,86 @@ class TestForIterVarNumpy(TestTransform):
class
TestForEnumerateVarNumpy
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy
class
TestForEnumerateVarNumpyWithStart
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy_with_start
class
TestForEnumerateVarNumpyWithBreak
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy_with_break
class
TestForEnumerateVarNumpyWithContinue
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy_with_continue
class
TestForEnumerateVarNumpyWithStartAndBreak
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy_with_start_break
class
TestForEnumerateVarNumpyWithStartAndContinue
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_numpy_with_start_continue
class
TestForIterVar
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_iter_var
class
TestForIterVarIdx
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_iter_var_idx
class
TestForEnumerateVar
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var
class
TestForEnumerateVarWithNestedRange
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_with_nested_range
class
TestForIterVarList
(
TestForInRange
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_iter_var_list
class
TestForEnumerateVarList
(
TestForInRange
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_enumerate_var_list
class
TestForTupleAsIterVar
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_tuple_as_iter_var
class
TestForTupleAsEnumerateIter
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_tuple_as_enumerate_iter
class
TestForTupleAsEnumerateValue
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_tuple_as_enumerate_value
class
TestForwardContainsForLayer
(
TestForIterVarNumpy
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
ForwardContainsForLayer
()
class
TestForOriginalList
(
TestTransformForOriginalList
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_original_list
...
...
@@ -555,7 +533,6 @@ class TestForOriginalList(TestTransformForOriginalList):
class
TestForOriginalTuple
(
TestTransformForOriginalList
):
def
set_test_func
(
self
):
self
.
dygraph_func
=
for_original_tuple
...
...
@@ -564,7 +541,6 @@ class TestForOriginalTuple(TestTransformForOriginalList):
class
TestForZip
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py
View file @
dbe08e9b
...
...
@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set
class
TrtConvertActivationTest
(
TrtLayerAutoScanTest
):
def
is_program_valid
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
return
True
def
sample_program_configs
(
self
):
def
generate_input1
(
dims
,
batch
,
attrs
:
List
[
Dict
[
str
,
Any
]]):
if
dims
==
1
:
return
np
.
random
.
random
([
32
]).
astype
(
np
.
float32
)
...
...
@@ -41,11 +39,19 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
for
dims
in
[
1
,
2
,
3
,
4
]:
for
batch
in
[
1
,
4
]:
for
op_type
in
[
"relu"
,
"sigmoid"
,
"tanh"
,
"relu6"
,
"elu"
,
"selu"
,
"softsign"
,
"stanh"
,
"thresholded_relu"
,
"softplus"
"relu"
,
"sigmoid"
,
"tanh"
,
"relu6"
,
"elu"
,
"selu"
,
"softsign"
,
"stanh"
,
"thresholded_relu"
,
"softplus"
,
]:
# few samples to reduce time
#for beta in [-0.2, 0.5, 0.67, 3]:
#
for beta in [-0.2, 0.5, 0.67, 3]:
# for alpha in [-0.2, 0.5, 0.67, 3]:
for
beta
in
[
0.67
]:
for
alpha
in
[
0.67
]:
...
...
@@ -62,33 +68,34 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
if
op_type
==
"softplus"
:
dics
=
[{
"beta"
:
beta
}]
ops_config
=
[{
ops_config
=
[
{
"op_type"
:
op_type
,
"op_inputs"
:
{
"X"
:
[
"input_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"output_data"
]
},
"op_attrs"
:
dics
[
0
]
}]
"op_inputs"
:
{
"X"
:
[
"input_data"
]},
"op_outputs"
:
{
"Out"
:
[
"output_data"
]},
"op_attrs"
:
dics
[
0
],
}
]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
dims
,
batch
,
dics
))
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
dims
,
batch
,
dics
)
)
},
outputs
=
[
"output_data"
])
outputs
=
[
"output_data"
],
)
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
if
self
.
dims
==
1
:
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
]}
...
...
@@ -131,19 +138,23 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
clear_dynamic_shape
()
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
attrs
,
False
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
attrs
,
False
),
1e-3
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
attrs
,
True
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
attrs
,
True
),
1e-3
def
test
(
self
):
self
.
run_test
()
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
View file @
dbe08e9b
...
...
@@ -22,60 +22,66 @@ from typing import Optional, List, Callable, Dict, Any, Set
class
TrtConvertAnchorGeneratorTest
(
TrtLayerAutoScanTest
):
def
is_program_valid
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
return
True
def
sample_program_configs
(
self
):
def
generate_input1
(
batch
,
attrs
:
List
[
Dict
[
str
,
Any
]]):
return
np
.
random
.
random
([
batch
,
3
,
64
,
64
]).
astype
(
np
.
float32
)
for
batch
in
[
1
,
2
,
4
]:
for
anchor_sizes
in
[[
64.0
,
128.0
,
256.0
,
512.0
]]:
for
aspect_ratios
in
[[
0.5
,
1
,
2
],
[
0.4
,
1.2
,
3
]]:
for
variances
in
[[
1.0
,
1.0
,
1.0
,
1.0
],
[
0.5
,
1.0
,
0.5
,
1.0
]]:
for
variances
in
[
[
1.0
,
1.0
,
1.0
,
1.0
],
[
0.5
,
1.0
,
0.5
,
1.0
],
]:
for
stride
in
[[
16.0
,
16.0
],
[
16.0
,
32.0
]]:
for
offset
in
[
0.5
,
0.8
]:
dics
=
[{
dics
=
[
{
"anchor_sizes"
:
anchor_sizes
,
"aspect_ratios"
:
aspect_ratios
,
"variances"
:
variances
,
"stride"
:
stride
,
"offset"
:
offset
}]
"offset"
:
offset
,
}
]
ops_config
=
[{
ops_config
=
[
{
"op_type"
:
"anchor_generator"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
]
},
"op_inputs"
:
{
"Input"
:
[
"input_data"
]},
"op_outputs"
:
{
"Anchors"
:
[
"output_anchors"
],
"Variances"
:
[
"output_variances"
]
"Variances"
:
[
"output_variances"
]
,
},
"op_attrs"
:
dics
[
0
]
}]
"op_attrs"
:
dics
[
0
],
}
]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
batch
,
dics
))
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
batch
,
dics
)
)
},
outputs
=
[
"output_anchors"
,
"output_variances"
])
"output_anchors"
,
"output_variances"
,
],
)
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
...
...
@@ -100,19 +106,23 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest):
clear_dynamic_shape
()
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
attrs
,
False
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
attrs
,
False
),
1e-3
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
attrs
,
True
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
attrs
,
True
),
1e-3
def
test
(
self
):
self
.
run_test
()
...
...
Prev
1
…
6
7
8
9
10
11
12
13
14
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment