Commit dbe08e9b authored by yuguo960516yuguo's avatar yuguo960516yuguo
Browse files

2.4.2

parent b5499578
...@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate): ...@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate):
return img, sample[1] return img, sample[1]
def _reader_creator(file_list, def _reader_creator(
mode, file_list,
shuffle=False, mode,
color_jitter=False, shuffle=False,
rotate=False, color_jitter=False,
data_dir=DATA_DIR): rotate=False,
data_dir=DATA_DIR,
):
def reader(): def reader():
with open(file_list) as flist: with open(file_list) as flist:
full_lines = [line.strip() for line in flist] full_lines = [line.strip() for line in flist]
...@@ -98,10 +99,9 @@ def _reader_creator(file_list, ...@@ -98,10 +99,9 @@ def _reader_creator(file_list,
continue continue
yield img_path, int(label) yield img_path, int(label)
mapper = functools.partial(process_image, mapper = functools.partial(
mode=mode, process_image, mode=mode, color_jitter=color_jitter, rotate=rotate
color_jitter=color_jitter, )
rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
...@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR): ...@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR):
class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self): def setUp(self):
self.int8_download = 'int8/download' self.int8_download = 'int8/download'
self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + self.cache_folder = os.path.expanduser(
self.int8_download) '~/.cache/paddle/dataset/' + self.int8_download
)
self.data_cache_folder = '' self.data_cache_folder = ''
data_urls = [] data_urls = []
data_md5s = [] data_md5s = []
...@@ -129,31 +129,34 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -129,31 +129,34 @@ class TestPostTrainingQuantization(unittest.TestCase):
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
) )
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
self.data_cache_folder = self.download_data(data_urls, data_md5s, self.data_cache_folder = self.download_data(
"full_data", False) data_urls, data_md5s, "full_data", False
)
else: else:
data_urls.append( data_urls.append(
'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz' 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
) )
data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d') data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d')
self.data_cache_folder = self.download_data(data_urls, data_md5s, self.data_cache_folder = self.download_data(
"small_data", False) data_urls, data_md5s, "small_data", False
)
# reader/decorator.py requires the relative path to the data folder # reader/decorator.py requires the relative path to the data folder
if not os.path.exists("./data/ILSVRC2012"): if not os.path.exists("./data/ILSVRC2012"):
cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data", cmd = 'rm -rf {0} && ln -s {1} {0}'.format(
self.data_cache_folder) "data", self.data_cache_folder
)
os.system(cmd) os.system(cmd)
self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50 self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50
self.sample_iterations = 50 if os.environ.get( self.infer_iterations = (
'DATASET') == 'full' else 2 50000 if os.environ.get('DATASET') == 'full' else 2
self.infer_iterations = 50000 if os.environ.get( )
'DATASET') == 'full' else 2
self.root_path = tempfile.TemporaryDirectory() self.root_path = tempfile.TemporaryDirectory()
self.int8_model = os.path.join(self.root_path.name, self.int8_model = os.path.join(
"post_training_quantization") self.root_path.name, "post_training_quantization"
)
def tearDown(self): def tearDown(self):
self.root_path.cleanup() self.root_path.cleanup()
...@@ -161,7 +164,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -161,7 +164,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
def cache_unzipping(self, target_folder, zip_path): def cache_unzipping(self, target_folder, zip_path):
if not os.path.exists(target_folder): if not os.path.exists(target_folder):
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(
target_folder, zip_path) target_folder, zip_path
)
os.system(cmd) os.system(cmd)
def download_data(self, data_urls, data_md5s, folder_name, is_model=True): def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
...@@ -173,13 +177,15 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -173,13 +177,15 @@ class TestPostTrainingQuantization(unittest.TestCase):
download(data_urls[i], self.int8_download, data_md5s[i]) download(data_urls[i], self.int8_download, data_md5s[i])
file_names.append(data_urls[i].split('/')[-1]) file_names.append(data_urls[i].split('/')[-1])
zip_path = os.path.join(self.cache_folder, zip_path = os.path.join(
'full_imagenet_val.tar.gz') self.cache_folder, 'full_imagenet_val.tar.gz'
)
if not os.path.exists(zip_path): if not os.path.exists(zip_path):
cat_command = 'cat' cat_command = 'cat'
for file_name in file_names: for file_name in file_names:
cat_command += ' ' + os.path.join(self.cache_folder, cat_command += ' ' + os.path.join(
file_name) self.cache_folder, file_name
)
cat_command += ' > ' + zip_path cat_command += ' > ' + zip_path
os.system(cat_command) os.system(cat_command)
...@@ -199,8 +205,16 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -199,8 +205,16 @@ class TestPostTrainingQuantization(unittest.TestCase):
image_shape = [3, 224, 224] image_shape = [3, 224, 224]
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
[infer_program, feed_dict, fetch_targets] = \ [
fluid.io.load_inference_model(model_path, exe) infer_program,
feed_dict,
fetch_targets,
] = fluid.io.load_inference_model(
model_path,
exe,
model_filename="inference.pdmodel",
params_filename="inference.pdiparams",
)
val_reader = paddle.batch(val(), batch_size) val_reader = paddle.batch(val(), batch_size)
iterations = infer_iterations iterations = infer_iterations
...@@ -208,23 +222,28 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -208,23 +222,28 @@ class TestPostTrainingQuantization(unittest.TestCase):
cnt = 0 cnt = 0
periods = [] periods = []
for batch_id, data in enumerate(val_reader()): for batch_id, data in enumerate(val_reader()):
image = np.array([x[0].reshape(image_shape) image = np.array([x[0].reshape(image_shape) for x in data]).astype(
for x in data]).astype("float32") "float32"
)
label = np.array([x[1] for x in data]).astype("int64") label = np.array([x[1] for x in data]).astype("int64")
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
t1 = time.time() t1 = time.time()
_, acc1, _ = exe.run(infer_program, pred = exe.run(
feed={ infer_program,
feed_dict[0]: image, feed={feed_dict[0]: image},
feed_dict[1]: label fetch_list=fetch_targets,
}, )
fetch_list=fetch_targets)
t2 = time.time() t2 = time.time()
period = t2 - t1 period = t2 - t1
periods.append(period) periods.append(period)
test_info.append(np.mean(acc1) * len(data)) pred = np.array(pred[0])
sort_array = pred.argsort(axis=1)
top_1_pred = sort_array[:, -1:][:, ::-1]
top_1 = np.mean(label == top_1_pred)
test_info.append(np.mean(top_1) * len(data))
cnt += len(data) cnt += len(data)
if (batch_id + 1) % 100 == 0: if (batch_id + 1) % 100 == 0:
...@@ -238,22 +257,25 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -238,22 +257,25 @@ class TestPostTrainingQuantization(unittest.TestCase):
acc1 = np.sum(test_info) / cnt acc1 = np.sum(test_info) / cnt
return (throughput, latency, acc1) return (throughput, latency, acc1)
def generate_quantized_model(self, def generate_quantized_model(
model_path, self,
quantizable_op_type, model_path,
batch_size, quantizable_op_type,
algo="KL", batch_size,
round_type="round", algo="KL",
is_full_quantize=False, round_type="round",
is_use_cache_file=False, is_full_quantize=False,
is_optimize_model=False, is_use_cache_file=False,
batch_nums=10, is_optimize_model=False,
onnx_format=False): batch_nums=10,
onnx_format=False,
):
try: try:
os.system("mkdir " + self.int8_model) os.system("mkdir " + self.int8_model)
except Exception as e: except Exception as e:
print("Failed to create {} due to {}".format( print(
self.int8_model, str(e))) "Failed to create {} due to {}".format(self.int8_model, str(e))
)
sys.exit(-1) sys.exit(-1)
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -261,70 +283,98 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -261,70 +283,98 @@ class TestPostTrainingQuantization(unittest.TestCase):
scope = fluid.global_scope() scope = fluid.global_scope()
val_reader = val() val_reader = val()
ptq = PostTrainingQuantization(executor=exe, ptq = PostTrainingQuantization(
sample_generator=val_reader, executor=exe,
model_dir=model_path, sample_generator=val_reader,
batch_size=batch_size, model_dir=model_path,
batch_nums=batch_nums, model_filename="inference.pdmodel",
algo=algo, params_filename="inference.pdiparams",
quantizable_op_type=quantizable_op_type, batch_size=batch_size,
round_type=round_type, batch_nums=batch_nums,
is_full_quantize=is_full_quantize, algo=algo,
optimize_model=is_optimize_model, quantizable_op_type=quantizable_op_type,
onnx_format=onnx_format, round_type=round_type,
is_use_cache_file=is_use_cache_file) is_full_quantize=is_full_quantize,
optimize_model=is_optimize_model,
onnx_format=onnx_format,
is_use_cache_file=is_use_cache_file,
)
ptq.quantize() ptq.quantize()
ptq.save_quantized_model(self.int8_model) ptq.save_quantized_model(
self.int8_model,
def run_test(self, model_filename="inference.pdmodel",
model, params_filename="inference.pdiparams",
algo, )
round_type,
data_urls, def run_test(
data_md5s, self,
quantizable_op_type, model,
is_full_quantize, algo,
is_use_cache_file, round_type,
is_optimize_model, data_urls,
diff_threshold, data_md5s,
onnx_format=False, quantizable_op_type,
batch_nums=10): is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
onnx_format=False,
batch_nums=10,
):
infer_iterations = self.infer_iterations infer_iterations = self.infer_iterations
batch_size = self.batch_size batch_size = self.batch_size
sample_iterations = self.sample_iterations
model_cache_folder = self.download_data(data_urls, data_md5s, model) model_cache_folder = self.download_data(data_urls, data_md5s, model)
print("Start FP32 inference for {0} on {1} images ...".format( print(
model, infer_iterations * batch_size)) "Start FP32 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size
)
)
(fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program(
os.path.join(model_cache_folder, "model"), batch_size, os.path.join(model_cache_folder, "MobileNetV1_infer"),
infer_iterations) batch_size,
infer_iterations,
print("Start INT8 post training quantization for {0} on {1} images ...". )
format(model, sample_iterations * batch_size))
self.generate_quantized_model(os.path.join(model_cache_folder, "model"), print(
quantizable_op_type, batch_size, "Start INT8 post training quantization for {0} on {1} images ...".format(
sample_iterations, algo, round_type, model, batch_nums * batch_size
is_full_quantize, is_use_cache_file, )
is_optimize_model, batch_nums, )
onnx_format) self.generate_quantized_model(
os.path.join(model_cache_folder, "MobileNetV1_infer"),
print("Start INT8 inference for {0} on {1} images ...".format( quantizable_op_type,
model, infer_iterations * batch_size)) batch_size,
(int8_throughput, int8_latency, algo,
int8_acc1) = self.run_program(self.int8_model, batch_size, round_type,
infer_iterations) is_full_quantize,
is_use_cache_file,
is_optimize_model,
batch_nums,
onnx_format,
)
print(
"Start INT8 inference for {0} on {1} images ...".format(
model, infer_iterations * batch_size
)
)
(int8_throughput, int8_latency, int8_acc1) = self.run_program(
self.int8_model, batch_size, infer_iterations
)
print("---Post training quantization of {} method---".format(algo)) print("---Post training quantization of {} method---".format(algo))
print( print(
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}." "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
.format(model, batch_size, fp32_throughput, fp32_latency, model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
fp32_acc1)) )
)
print( print(
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n" "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
.format(model, batch_size, int8_throughput, int8_latency, model, batch_size, int8_throughput, int8_latency, int8_acc1
int8_acc1)) )
)
sys.stdout.flush() sys.stdout.flush()
delta_value = fp32_acc1 - int8_acc1 delta_value = fp32_acc1 - int8_acc1
...@@ -332,15 +382,14 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -332,15 +382,14 @@ class TestPostTrainingQuantization(unittest.TestCase):
class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_kl_mobilenetv1(self): def test_post_training_kl_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "KL" algo = "KL"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -351,21 +400,30 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): ...@@ -351,21 +400,30 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file = False is_use_cache_file = False
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.025 diff_threshold = 0.025
self.run_test(model, algo, round_type, data_urls, data_md5s, batch_nums = 3
quantizable_op_type, is_full_quantize, is_use_cache_file, self.run_test(
is_optimize_model, diff_threshold) model,
algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_avg_mobilenetv1(self): def test_post_training_avg_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "avg" algo = "avg"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -375,21 +433,29 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): ...@@ -375,21 +433,29 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file = False is_use_cache_file = False
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.025 diff_threshold = 0.025
self.run_test(model, algo, round_type, data_urls, data_md5s, self.run_test(
quantizable_op_type, is_full_quantize, is_use_cache_file, model,
is_optimize_model, diff_threshold) algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_hist_mobilenetv1(self): def test_post_training_hist_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "hist" algo = "hist"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -400,29 +466,30 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): ...@@ -400,29 +466,30 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model = True is_optimize_model = True
diff_threshold = 0.03 diff_threshold = 0.03
batch_nums = 3 batch_nums = 3
self.run_test(model, self.run_test(
algo, model,
round_type, algo,
data_urls, round_type,
data_md5s, data_urls,
quantizable_op_type, data_md5s,
is_full_quantize, quantizable_op_type,
is_use_cache_file, is_full_quantize,
is_optimize_model, is_use_cache_file,
diff_threshold, is_optimize_model,
batch_nums=batch_nums) diff_threshold,
batch_nums=batch_nums,
)
class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_abs_max_mobilenetv1(self): def test_post_training_abs_max_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "abs_max" algo = "abs_max"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"mul", "mul",
...@@ -432,21 +499,29 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): ...@@ -432,21 +499,29 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model = False is_optimize_model = False
# The accuracy diff of post-training quantization (abs_max) maybe bigger # The accuracy diff of post-training quantization (abs_max) maybe bigger
diff_threshold = 0.05 diff_threshold = 0.05
self.run_test(model, algo, round_type, data_urls, data_md5s, self.run_test(
quantizable_op_type, is_full_quantize, is_use_cache_file, model,
is_optimize_model, diff_threshold) algo,
round_type,
data_urls,
data_md5s,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
)
class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
def test_post_training_onnx_format_mobilenetv1(self): def test_post_training_onnx_format_mobilenetv1(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "emd" algo = "emd"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
] ]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
quantizable_op_type = [ quantizable_op_type = [
"conv2d", "conv2d",
"depthwise_conv2d", "depthwise_conv2d",
...@@ -458,18 +533,20 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): ...@@ -458,18 +533,20 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
onnx_format = True onnx_format = True
diff_threshold = 0.05 diff_threshold = 0.05
batch_nums = 3 batch_nums = 3
self.run_test(model, self.run_test(
algo, model,
round_type, algo,
data_urls, round_type,
data_md5s, data_urls,
quantizable_op_type, data_md5s,
is_full_quantize, quantizable_op_type,
is_use_cache_file, is_full_quantize,
is_optimize_model, is_use_cache_file,
diff_threshold, is_optimize_model,
onnx_format=onnx_format, diff_threshold,
batch_nums=batch_nums) onnx_format=onnx_format,
batch_nums=batch_nums,
)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -23,9 +23,9 @@ from ...log_helper import get_logger ...@@ -23,9 +23,9 @@ from ...log_helper import get_logger
__all__ = ['add_supported_layer'] __all__ = ['add_supported_layer']
_logger = get_logger(__name__, _logger = get_logger(
logging.INFO, __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
fmt='%(asctime)s-%(levelname)s: %(message)s') )
def _default_pruning(weight_nparray, m, n, func_name, param_name): def _default_pruning(weight_nparray, m, n, func_name, param_name):
...@@ -38,13 +38,17 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name): ...@@ -38,13 +38,17 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
exlude_cond_shape4 = len(shape) == 4 and shape[1] < m exlude_cond_shape4 = len(shape) == 4 and shape[1] < m
if exlude_cond_shape2: if exlude_cond_shape2:
_logger.warning( _logger.warning(
'{} is not pruned because the first dimension of {} is smaller than {}' '{} is not pruned because the first dimension of {} is smaller than {}'.format(
.format(param_name, shape, m)) param_name, shape, m
)
)
return weight_pruned_nparray, weight_sparse_mask return weight_pruned_nparray, weight_sparse_mask
if exlude_cond_shape4: if exlude_cond_shape4:
_logger.warning( _logger.warning(
'{} is not pruned because the second dimension of {} is smaller than {}' '{} is not pruned because the second dimension of {} is smaller than {}'.format(
.format(param_name, shape, m)) param_name, shape, m
)
)
return weight_pruned_nparray, weight_sparse_mask return weight_pruned_nparray, weight_sparse_mask
checked_func_name = sparsity.CheckMethod.get_checking_method(func_name) checked_func_name = sparsity.CheckMethod.get_checking_method(func_name)
...@@ -60,13 +64,13 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name): ...@@ -60,13 +64,13 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
# sparsity/utils is row-major pruning. That is the reason we have to transpose weight # sparsity/utils is row-major pruning. That is the reason we have to transpose weight
# matrices beforce invoking create_mask. Then we transpose the result mask to make # matrices beforce invoking create_mask. Then we transpose the result mask to make
# sure its shape to be the same as the input weight. # sure its shape to be the same as the input weight.
weight_sparse_mask = sparsity.create_mask(weight_nparray.T, weight_sparse_mask = sparsity.create_mask(
func_name=func_name, weight_nparray.T, func_name=func_name, n=n, m=m
n=n, ).T
m=m).T
weight_pruned_nparray = np.multiply(weight_nparray, weight_sparse_mask) weight_pruned_nparray = np.multiply(weight_nparray, weight_sparse_mask)
assert sparsity.check_sparsity(weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name), \ assert sparsity.check_sparsity(
'Pruning {} weight matrix failure!!!'.format(param_name) weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name
), 'Pruning {} weight matrix failure!!!'.format(param_name)
return weight_pruned_nparray, weight_sparse_mask return weight_pruned_nparray, weight_sparse_mask
...@@ -78,28 +82,35 @@ supported_layers_and_prune_func_map = {} ...@@ -78,28 +82,35 @@ supported_layers_and_prune_func_map = {}
def add_supported_layer(layer, pruning_func=None): def add_supported_layer(layer, pruning_func=None):
r""" r"""
Add supported layers and its corresponding pruning function. Add supported layers and its corresponding pruning function.
Args: Args:
name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then
it would be turn to string internally. ASP would use this name to match parameter's name and call it would be turn to string internally. ASP would use this name to match parameter's name and call
its the corresponding pruning function. its the corresponding pruning function.
pruning_func (function, optional): a function type which receives five argument (weight_nparray, pruning_func (function, optional): a function type which receives five argument (weight_nparray,
m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight, m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
m, n, and func_name, please see `prune_model` for details. m, n, and func_name, please see `prune_model` for details.
""" """
name = None name = None
if isinstance(layer, str): if isinstance(layer, str):
name = layer name = layer
elif isinstance(layer, paddle.fluid.dygraph.layers.Layer): elif isinstance(layer, paddle.fluid.dygraph.layers.Layer):
name = paddle.fluid.dygraph.layers._convert_camel_to_snake( name = paddle.fluid.dygraph.layers._convert_camel_to_snake(
type(layer).__name__) type(layer).__name__
)
elif issubclass(layer, paddle.fluid.dygraph.layers.Layer): elif issubclass(layer, paddle.fluid.dygraph.layers.Layer):
name = paddle.fluid.dygraph.layers._convert_camel_to_snake( name = paddle.fluid.dygraph.layers._convert_camel_to_snake(
layer.__name__) layer.__name__
)
else: else:
assert "The type of layer should be string of Layer, but got {}!".format( assert (
type(layer)) "The type of layer should be string of Layer, but got {}!".format(
type(layer)
)
)
if pruning_func is None: if pruning_func is None:
pruning_func = _default_pruning pruning_func = _default_pruning
_supported_layers_and_prune_func_map_lock.acquire() _supported_layers_and_prune_func_map_lock.acquire()
......
...@@ -27,9 +27,16 @@ from itertools import permutations ...@@ -27,9 +27,16 @@ from itertools import permutations
import threading import threading
__all__ = [ __all__ = [
'calculate_density', 'check_mask_1d', 'get_mask_1d', 'check_mask_2d', 'calculate_density',
'get_mask_2d_greedy', 'get_mask_2d_best', 'create_mask', 'check_sparsity', 'check_mask_1d',
'MaskAlgo', 'CheckMethod' 'get_mask_1d',
'check_mask_2d',
'get_mask_2d_greedy',
'get_mask_2d_best',
'create_mask',
'check_sparsity',
'MaskAlgo',
'CheckMethod',
] ]
...@@ -76,8 +83,9 @@ class CheckMethod(Enum): ...@@ -76,8 +83,9 @@ class CheckMethod(Enum):
CheckMethod.get_checking_method(MaskAlgo.MASK_2D_BEST) CheckMethod.get_checking_method(MaskAlgo.MASK_2D_BEST)
# CheckMethod.CHECK_2D # CheckMethod.CHECK_2D
""" """
assert isinstance(mask_algo, MaskAlgo), \ assert isinstance(
"mask_algo should be MaskAlgo type" mask_algo, MaskAlgo
), "mask_algo should be MaskAlgo type"
if mask_algo == MaskAlgo.MASK_1D: if mask_algo == MaskAlgo.MASK_1D:
return CheckMethod.CHECK_1D return CheckMethod.CHECK_1D
else: else:
...@@ -86,20 +94,25 @@ class CheckMethod(Enum): ...@@ -86,20 +94,25 @@ class CheckMethod(Enum):
def calculate_density(x): def calculate_density(x):
r""" r"""
Return the density of the input tensor. Return the density of the input tensor.
Args: Args:
x (nparray): The input tensor. x (nparray): The input tensor.
Returns: Returns:
float: The density of :attr:`x`. float, The density of :attr:`x`.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import numpy as np
x = np.array([[0, 1, 3, 0], import paddle
import numpy as np
x = np.array([[0, 1, 3, 0],
[1, 1, 0, 1]]) [1, 1, 0, 1]])
paddle.incubate.asp.calculate_density(x) # 0.625 paddle.incubate.asp.calculate_density(x) # 0.625
""" """
x_flattened = x.flatten() x_flattened = x.flatten()
return float(np.nonzero(x_flattened)[0].size) / x_flattened.size return float(np.nonzero(x_flattened)[0].size) / x_flattened.size
...@@ -108,7 +121,7 @@ def calculate_density(x): ...@@ -108,7 +121,7 @@ def calculate_density(x):
def _reshape_1d(mat, m): def _reshape_1d(mat, m):
r""" r"""
Reshape the input 2D matrix to shape (-1, m). Reshape the input 2D matrix to shape (-1, m).
If the second dimension of :attr:`mat` is not a multiples of :attr:`m`, If the second dimension of :attr:`mat` is not a multiples of :attr:`m`,
then this function would pad the remainder with 0 before reshaping. then this function would pad the remainder with 0 before reshaping.
.. math:: .. math::
...@@ -126,7 +139,7 @@ def _reshape_1d(mat, m): ...@@ -126,7 +139,7 @@ def _reshape_1d(mat, m):
remainder = mat.shape[1] % m remainder = mat.shape[1] % m
if mat.shape[1] % m > 0: if mat.shape[1] % m > 0:
mat_padded = np.zeros((mat.shape[0], mat.shape[1] + (m - remainder))) mat_padded = np.zeros((mat.shape[0], mat.shape[1] + (m - remainder)))
mat_padded[:, :mat.shape[1]] = mat mat_padded[:, : mat.shape[1]] = mat
shape = mat_padded.shape shape = mat_padded.shape
return mat_padded.reshape(-1, m), shape return mat_padded.reshape(-1, m), shape
else: else:
...@@ -136,7 +149,7 @@ def _reshape_1d(mat, m): ...@@ -136,7 +149,7 @@ def _reshape_1d(mat, m):
def check_mask_1d(mat, n, m): def check_mask_1d(mat, n, m):
r""" r"""
Check if every row of the input matrix :attr:`mat` is in 1D `n:m` sparse pattern. Check if every row of the input matrix :attr:`mat` is in 1D `n:m` sparse pattern.
This function would pad the second dimension of :attr:`mat` by zero This function would pad the second dimension of :attr:`mat` by zero
to be a multiples of :attr:`m` if necessary. to be a multiples of :attr:`m` if necessary.
1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block. 1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block.
...@@ -179,8 +192,8 @@ def check_mask_1d(mat, n, m): ...@@ -179,8 +192,8 @@ def check_mask_1d(mat, n, m):
def get_mask_1d(mat, n, m): def get_mask_1d(mat, n, m):
r""" r"""
Generate 1D `n:m` sparse pattern mask of the input matrix :attr:`mat` Generate 1D `n:m` sparse pattern mask of the input matrix :attr:`mat`
in row-directory. This function would pad the second dimension of :attr:`mat` in row-directory. This function would pad the second dimension of :attr:`mat`
by zero to be a multiples of :attr:`m` before mask generation. by zero to be a multiples of :attr:`m` before mask generation.
1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block. 1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block.
...@@ -213,7 +226,7 @@ def get_mask_1d(mat, n, m): ...@@ -213,7 +226,7 @@ def get_mask_1d(mat, n, m):
min_order_indices = np.argsort(np.absolute(sub_mat)) min_order_indices = np.argsort(np.absolute(sub_mat))
mask_flattern[i, min_order_indices[:n].tolist()] = 0 mask_flattern[i, min_order_indices[:n].tolist()] = 0
mask_flattern = mask_flattern.reshape(shape) mask_flattern = mask_flattern.reshape(shape)
mask[:, :] = mask_flattern[:, :mat.shape[1]] mask[:, :] = mask_flattern[:, : mat.shape[1]]
return mask return mask
...@@ -239,12 +252,12 @@ def _reshape_2d(mat, m): ...@@ -239,12 +252,12 @@ def _reshape_2d(mat, m):
remainder_0 = mat.shape[0] % m remainder_0 = mat.shape[0] % m
remainder_1 = mat.shape[1] % m remainder_1 = mat.shape[1] % m
new_shape = (mat.shape[0] if remainder_0 == 0 \ new_shape = (
else mat.shape[0] + (m - remainder_0), mat.shape[0] if remainder_0 == 0 else mat.shape[0] + (m - remainder_0),
mat.shape[1] if remainder_1 == 0 \ mat.shape[1] if remainder_1 == 0 else mat.shape[1] + (m - remainder_1),
else mat.shape[1] + (m - remainder_1)) )
mat_padded = np.zeros(new_shape) mat_padded = np.zeros(new_shape)
mat_padded[:mat.shape[0], :mat.shape[1]] = mat mat_padded[: mat.shape[0], : mat.shape[1]] = mat
mat_flattern = np.empty(new_shape).reshape(-1, m * m) mat_flattern = np.empty(new_shape).reshape(-1, m * m)
curr_idx = 0 curr_idx = 0
...@@ -252,9 +265,9 @@ def _reshape_2d(mat, m): ...@@ -252,9 +265,9 @@ def _reshape_2d(mat, m):
row_end = row_start + m row_end = row_start + m
for col_start in range(0, mat_padded.shape[1], m): for col_start in range(0, mat_padded.shape[1], m):
col_end = col_start + m col_end = col_start + m
sub_mat = np.squeeze(mat_padded[row_start:row_end, \ sub_mat = np.squeeze(
col_start:col_end] \ mat_padded[row_start:row_end, col_start:col_end].reshape(-1)
.reshape(-1)) )
mat_flattern[curr_idx] = sub_mat mat_flattern[curr_idx] = sub_mat
curr_idx += 1 curr_idx += 1
return mat_flattern, mat_padded.shape return mat_flattern, mat_padded.shape
...@@ -263,10 +276,10 @@ def _reshape_2d(mat, m): ...@@ -263,10 +276,10 @@ def _reshape_2d(mat, m):
def check_mask_2d(mat, n, m): def check_mask_2d(mat, n, m):
r""" r"""
Check if every :math:`m \times m` block of the input matrix :attr:`mat` is in 2D `n:m` sparse pattern. Check if every :math:`m \times m` block of the input matrix :attr:`mat` is in 2D `n:m` sparse pattern.
This function would pad each dimension of :attr:`mat` by zero to be a multiples of This function would pad each dimension of :attr:`mat` by zero to be a multiples of
:attr:`m` if necessary. :attr:`m` if necessary.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block 2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column. under the constraint of at least :attr:`n` zeros for each row and column.
Args: Args:
...@@ -304,18 +317,19 @@ def check_mask_2d(mat, n, m): ...@@ -304,18 +317,19 @@ def check_mask_2d(mat, n, m):
mat_padded, shape = _reshape_2d(mat, m) mat_padded, shape = _reshape_2d(mat, m)
for sub_mat in mat_padded: for sub_mat in mat_padded:
sub_mask = np.absolute(np.squeeze(sub_mat.reshape(m, m))) > 0 sub_mask = np.absolute(np.squeeze(sub_mat.reshape(m, m))) > 0
if (np.sum(np.sum(sub_mask, axis=1) > (m-n)) != 0) and \ if (np.sum(np.sum(sub_mask, axis=1) > (m - n)) != 0) and (
(np.sum(np.sum(sub_mask, axis=0) > (m-n)) != 0): np.sum(np.sum(sub_mask, axis=0) > (m - n)) != 0
):
return False return False
return True return True
def get_mask_2d_greedy(mat, n, m): def get_mask_2d_greedy(mat, n, m):
r""" r"""
Greedily generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`. Greedily generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`.
This function would pad each dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation. This function would pad each dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block 2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column. under the constraint of at least :attr:`n` zeros for each row and column.
Greedily generating: For each :math:`m \times m` block, selecting values to keep in descent order. Greedily generating: For each :math:`m \times m` block, selecting values to keep in descent order.
...@@ -350,15 +364,17 @@ def get_mask_2d_greedy(mat, n, m): ...@@ -350,15 +364,17 @@ def get_mask_2d_greedy(mat, n, m):
sub_mask = np.squeeze(mask_padded[idx]) sub_mask = np.squeeze(mask_padded[idx])
min_order_1d_indices = np.argsort(sub_mat) min_order_1d_indices = np.argsort(sub_mat)
min_order_2d_indices = [(int(x / m), x % m) min_order_2d_indices = [
for x in min_order_1d_indices] (int(x / m), x % m) for x in min_order_1d_indices
]
row_counter = collections.Counter() row_counter = collections.Counter()
col_counter = collections.Counter() col_counter = collections.Counter()
for i in range(len(min_order_1d_indices) - 1, -1, -1): for i in range(len(min_order_1d_indices) - 1, -1, -1):
matrix_entry = min_order_2d_indices[i] matrix_entry = min_order_2d_indices[i]
if (row_counter[matrix_entry[0]] == n) or \ if (row_counter[matrix_entry[0]] == n) or (
(col_counter[matrix_entry[1]] == n): col_counter[matrix_entry[1]] == n
):
continue continue
sub_mask[matrix_entry[0], matrix_entry[1]] = 1.0 sub_mask[matrix_entry[0], matrix_entry[1]] = 1.0
...@@ -373,7 +389,7 @@ def get_mask_2d_greedy(mat, n, m): ...@@ -373,7 +389,7 @@ def get_mask_2d_greedy(mat, n, m):
col_end = col_start + m col_end = col_start + m
mask[row_start:row_end, col_start:col_end] = mask_padded[curr_idx] mask[row_start:row_end, col_start:col_end] = mask_padded[curr_idx]
curr_idx += 1 curr_idx += 1
return mask[:mat.shape[0], :mat.shape[1]] return mask[: mat.shape[0], : mat.shape[1]]
_valid_2d_patterns_lock = threading.Lock() _valid_2d_patterns_lock = threading.Lock()
...@@ -384,7 +400,7 @@ def _compute_valid_2d_patterns(n, m): ...@@ -384,7 +400,7 @@ def _compute_valid_2d_patterns(n, m):
r""" r"""
Compute all vaild 2D `n:m` sparse patterns. Compute all vaild 2D `n:m` sparse patterns.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block 2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column. under the constraint of at least :attr:`n` zeros for each row and column.
Args: Args:
...@@ -406,8 +422,11 @@ def _compute_valid_2d_patterns(n, m): ...@@ -406,8 +422,11 @@ def _compute_valid_2d_patterns(n, m):
patterns = patterns + patterns patterns = patterns + patterns
patterns = np.asarray(list(set(permutations(patterns, m)))) patterns = np.asarray(list(set(permutations(patterns, m))))
valid = ((patterns.sum(axis=1) <= n).sum( valid = (
axis=1) == m).nonzero()[0].reshape(-1) ((patterns.sum(axis=1) <= n).sum(axis=1) == m)
.nonzero()[0]
.reshape(-1)
)
valid_patterns = np.empty((valid.shape[0], m, m)) valid_patterns = np.empty((valid.shape[0], m, m))
valid_patterns[:] = patterns[valid[:]] valid_patterns[:] = patterns[valid[:]]
...@@ -420,11 +439,11 @@ def _compute_valid_2d_patterns(n, m): ...@@ -420,11 +439,11 @@ def _compute_valid_2d_patterns(n, m):
def get_mask_2d_best(mat, n, m): def get_mask_2d_best(mat, n, m):
r""" r"""
Generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat` Generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`
to form sparse matrix with maximun L1 norm .This function would pad each to form sparse matrix with maximun L1 norm .This function would pad each
dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation. dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block 2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column. under the constraint of at least :attr:`n` zeros for each row and column.
*Note*: L1 norm of sparse matrix from `Best` API is greater than or equal to the one from `Greedy`. *Note*: L1 norm of sparse matrix from `Best` API is greater than or equal to the one from `Greedy`.
...@@ -454,9 +473,10 @@ def get_mask_2d_best(mat, n, m): ...@@ -454,9 +473,10 @@ def get_mask_2d_best(mat, n, m):
mat_flattern, shape = _reshape_2d(mat, m) mat_flattern, shape = _reshape_2d(mat, m)
mask_flattern = np.ones_like(mat_flattern).reshape(-1, m, m) mask_flattern = np.ones_like(mat_flattern).reshape(-1, m, m)
pmax = np.argmax(np.matmul(mat_flattern, pmax = np.argmax(
patterns.reshape(patterns.shape[0], m * m).T), np.matmul(mat_flattern, patterns.reshape(patterns.shape[0], m * m).T),
axis=1) axis=1,
)
mask_flattern[:] = patterns[pmax[:]] mask_flattern[:] = patterns[pmax[:]]
mask = np.empty(shape) mask = np.empty(shape)
...@@ -468,7 +488,7 @@ def get_mask_2d_best(mat, n, m): ...@@ -468,7 +488,7 @@ def get_mask_2d_best(mat, n, m):
col_end = col_start + m col_end = col_start + m
mask[row_start:row_end, col_start:col_end] = mask_flattern[curr_idx] mask[row_start:row_end, col_start:col_end] = mask_flattern[curr_idx]
curr_idx += 1 curr_idx += 1
return mask[:mat.shape[0], :mat.shape[1]] return mask[: mat.shape[0], : mat.shape[1]]
def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4): def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
...@@ -508,9 +528,10 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4): ...@@ -508,9 +528,10 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
dtype = tensor.dtype dtype = tensor.dtype
t = tensor.astype(float) t = tensor.astype(float)
assert isinstance(func_name, MaskAlgo), \ assert isinstance(func_name, MaskAlgo), (
"func_name argumet of create_mask is only accepted as type MaskAlgo. " \ "func_name argumet of create_mask is only accepted as type MaskAlgo. "
"But got {}".format(type(func_name)) "But got {}".format(type(func_name))
)
func = getattr(sys.modules[__name__], func_name.value, None) func = getattr(sys.modules[__name__], func_name.value, None)
if len(shape) == 1: if len(shape) == 1:
t = t.reshape(1, shape[0]) t = t.reshape(1, shape[0])
...@@ -520,14 +541,20 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4): ...@@ -520,14 +541,20 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
t = t.reshape(shape[0] * shape[1], shape[2]) t = t.reshape(shape[0] * shape[1], shape[2])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op # 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif len(shape) == 4: elif len(shape) == 4:
t = t.transpose([0, 1, 3, 2]).reshape(shape[0] * shape[1] * shape[3], t = t.transpose([0, 1, 3, 2]).reshape(
shape[2]) shape[0] * shape[1] * shape[3], shape[2]
)
mask = func(t, n=n, m=m) mask = func(t, n=n, m=m)
return mask.reshape([shape[0], shape[1], shape[3], return (
shape[2]]).transpose([0, 1, 3, 2]).astype(dtype) mask.reshape([shape[0], shape[1], shape[3], shape[2]])
.transpose([0, 1, 3, 2])
.astype(dtype)
)
else: else:
raise ValueError("The dimension of input tensor is not supported in create_mask, " \ raise ValueError(
"Only dimension < 4 is supported but got {}".format(len(shape))) "The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}".format(len(shape))
)
mask = func(t, n=n, m=m) mask = func(t, n=n, m=m)
return mask.reshape(shape).astype(dtype) return mask.reshape(shape).astype(dtype)
...@@ -566,9 +593,10 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4): ...@@ -566,9 +593,10 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
shape = tensor.shape shape = tensor.shape
t = tensor.astype(float) t = tensor.astype(float)
assert type(func_name) == CheckMethod, \ assert type(func_name) == CheckMethod, (
"func_name argumet of check_sparsity is only accepted as type CheckMethod. " \ "func_name argumet of check_sparsity is only accepted as type CheckMethod. "
"But got {}".format(type(func_name)) "But got {}".format(type(func_name))
)
func = getattr(sys.modules[__name__], func_name.value, None) func = getattr(sys.modules[__name__], func_name.value, None)
if len(shape) == 1: if len(shape) == 1:
t = t.reshape(1, shape[0]) t = t.reshape(1, shape[0])
...@@ -578,10 +606,13 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4): ...@@ -578,10 +606,13 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
t = t.reshape(shape[0] * shape[1], shape[2]) t = t.reshape(shape[0] * shape[1], shape[2])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op # 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif len(shape) == 4: elif len(shape) == 4:
t = t.transpose([0, 1, 3, t = t.transpose([0, 1, 3, 2]).reshape(
2]).reshape([shape[0] * shape[1] * shape[3], shape[2]]) [shape[0] * shape[1] * shape[3], shape[2]]
)
else: else:
raise ValueError("The dimension of input tensor is not supported in create_mask, " \ raise ValueError(
"Only dimension < 4 is supported but got {}".format(len(shape))) "The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}".format(len(shape))
)
return func(t, n=n, m=m) return func(t, n=n, m=m)
...@@ -35,9 +35,9 @@ try: ...@@ -35,9 +35,9 @@ try:
if os.name == 'nt': if os.name == 'nt':
third_lib_path = current_path + os.sep + '..' + os.sep + 'libs' third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
# Will load shared library from 'path' on windows # Will load shared library from 'path' on windows
os.environ[ os.environ['path'] = (
'path'] = current_path + ';' + third_lib_path + ';' + os.environ[ current_path + ';' + third_lib_path + ';' + os.environ['path']
'path'] )
sys.path.insert(0, third_lib_path) sys.path.insert(0, third_lib_path)
# Note: from python3.8, PATH will not take effect # Note: from python3.8, PATH will not take effect
# https://github.com/python/cpython/pull/12302 # https://github.com/python/cpython/pull/12302
...@@ -47,20 +47,24 @@ try: ...@@ -47,20 +47,24 @@ try:
except ImportError as e: except ImportError as e:
from .. import compat as cpt from .. import compat as cpt
if os.name == 'nt': if os.name == 'nt':
executable_path = os.path.abspath(os.path.dirname(sys.executable)) executable_path = os.path.abspath(os.path.dirname(sys.executable))
raise ImportError( raise ImportError(
"""NOTE: You may need to run \"set PATH=%s;%%PATH%%\" """NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
if you encounters \"DLL load failed\" errors. If you have python if you encounters \"DLL load failed\" errors. If you have python
installed in other directory, replace \"%s\" with your own installed in other directory, replace \"%s\" with your own
directory. The original error is: \n %s""" % directory. The original error is: \n %s"""
(executable_path, executable_path, cpt.get_exception_message(e))) % (executable_path, executable_path, cpt.get_exception_message(e))
)
else: else:
raise ImportError( raise ImportError(
"""NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
if you encounters \"libmkldnn.so not found\" errors. If you have python if you encounters \"libmkldnn.so not found\" errors. If you have python
installed in other directory, replace \"/usr/local/lib\" with your own installed in other directory, replace \"/usr/local/lib\" with your own
directory. The original error is: \n""" + cpt.get_exception_message(e)) directory. The original error is: \n"""
+ cpt.get_exception_message(e)
)
except Exception as e: except Exception as e:
raise e raise e
...@@ -70,36 +74,45 @@ def avx_supported(): ...@@ -70,36 +74,45 @@ def avx_supported():
Whether current system(Linux, MacOS, Windows) is supported with AVX. Whether current system(Linux, MacOS, Windows) is supported with AVX.
""" """
from .. import compat as cpt from .. import compat as cpt
sysstr = platform.system().lower() sysstr = platform.system().lower()
has_avx = False has_avx = False
if sysstr == 'linux': if sysstr == 'linux':
try: try:
has_avx = os.popen('cat /proc/cpuinfo | grep -i avx').read() != '' pipe = os.popen('cat /proc/cpuinfo | grep -i avx')
has_avx = pipe.read() != ''
pipe.close()
except Exception as e: except Exception as e:
sys.stderr.write('Can not get the AVX flag from /proc/cpuinfo.\n' sys.stderr.write(
'The original error is: %s\n' % 'Can not get the AVX flag from /proc/cpuinfo.\n'
cpt.get_exception_message(e)) 'The original error is: %s\n' % cpt.get_exception_message(e)
)
return has_avx return has_avx
elif sysstr == 'darwin': elif sysstr == 'darwin':
try: try:
has_avx = os.popen( pipe = os.popen('sysctl machdep.cpu.features | grep -i avx')
'sysctl machdep.cpu.features | grep -i avx').read() != '' has_avx = pipe.read() != ''
pipe.close()
except Exception as e: except Exception as e:
sys.stderr.write( sys.stderr.write(
'Can not get the AVX flag from machdep.cpu.features.\n' 'Can not get the AVX flag from machdep.cpu.features.\n'
'The original error is: %s\n' % cpt.get_exception_message(e)) 'The original error is: %s\n' % cpt.get_exception_message(e)
)
if not has_avx: if not has_avx:
import subprocess import subprocess
pipe = subprocess.Popen( pipe = subprocess.Popen(
'sysctl machdep.cpu.leaf7_features | grep -i avx', 'sysctl machdep.cpu.leaf7_features | grep -i avx',
shell=True, shell=True,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE,
)
_ = pipe.communicate() _ = pipe.communicate()
has_avx = True if pipe.returncode == 0 else False has_avx = True if pipe.returncode == 0 else False
return has_avx return has_avx
elif sysstr == 'windows': elif sysstr == 'windows':
import ctypes import ctypes
ONE_PAGE = ctypes.c_size_t(0x1000) ONE_PAGE = ctypes.c_size_t(0x1000)
def asm_func(code_str, restype=ctypes.c_uint32, argtypes=()): def asm_func(code_str, restype=ctypes.c_uint32, argtypes=()):
...@@ -109,24 +122,31 @@ def avx_supported(): ...@@ -109,24 +122,31 @@ def avx_supported():
pfnVirtualAlloc.restype = ctypes.c_void_p pfnVirtualAlloc.restype = ctypes.c_void_p
MEM_COMMIT = ctypes.c_ulong(0x1000) MEM_COMMIT = ctypes.c_ulong(0x1000)
PAGE_READWRITE = ctypes.c_ulong(0x4) PAGE_READWRITE = ctypes.c_ulong(0x4)
address = pfnVirtualAlloc(None, ONE_PAGE, MEM_COMMIT, address = pfnVirtualAlloc(
PAGE_READWRITE) None, ONE_PAGE, MEM_COMMIT, PAGE_READWRITE
)
if not address: if not address:
raise Exception("Failed to VirtualAlloc") raise Exception("Failed to VirtualAlloc")
# Copy the code into the memory segment # Copy the code into the memory segment
memmove = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, memmove = ctypes.CFUNCTYPE(
ctypes.c_void_p, ctypes.c_void_p,
ctypes.c_size_t)(ctypes._memmove_addr) ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_size_t,
)(ctypes._memmove_addr)
if memmove(address, code_str, len(code_str)) < 0: if memmove(address, code_str, len(code_str)) < 0:
raise Exception("Failed to memmove") raise Exception("Failed to memmove")
# Enable execute permissions # Enable execute permissions
PAGE_EXECUTE = ctypes.c_ulong(0x10) PAGE_EXECUTE = ctypes.c_ulong(0x10)
pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect
res = pfnVirtualProtect(ctypes.c_void_p(address), res = pfnVirtualProtect(
ONE_PAGE, PAGE_EXECUTE, ctypes.c_void_p(address),
ctypes.byref(ctypes.c_ulong(0))) ONE_PAGE,
PAGE_EXECUTE,
ctypes.byref(ctypes.c_ulong(0)),
)
if not res: if not res:
raise Exception("Failed VirtualProtect") raise Exception("Failed VirtualProtect")
...@@ -135,7 +155,8 @@ def avx_supported(): ...@@ -135,7 +155,8 @@ def avx_supported():
pfnGetCurrentProcess.restype = ctypes.c_void_p pfnGetCurrentProcess.restype = ctypes.c_void_p
prochandle = ctypes.c_void_p(pfnGetCurrentProcess()) prochandle = ctypes.c_void_p(pfnGetCurrentProcess())
res = ctypes.windll.kernel32.FlushInstructionCache( res = ctypes.windll.kernel32.FlushInstructionCache(
prochandle, ctypes.c_void_p(address), ONE_PAGE) prochandle, ctypes.c_void_p(address), ONE_PAGE
)
if not res: if not res:
raise Exception("Failed FlushInstructionCache") raise Exception("Failed FlushInstructionCache")
...@@ -153,12 +174,14 @@ def avx_supported(): ...@@ -153,12 +174,14 @@ def avx_supported():
# Convert the code_str into a function that returns uint # Convert the code_str into a function that returns uint
func, address = asm_func(code_str) func, address = asm_func(code_str)
retval = func() retval = func()
ctypes.windll.kernel32.VirtualFree(ctypes.c_void_p(address), ctypes.windll.kernel32.VirtualFree(
ctypes.c_size_t(0), ONE_PAGE) ctypes.c_void_p(address), ctypes.c_size_t(0), ONE_PAGE
)
except Exception as e: except Exception as e:
sys.stderr.write('Failed getting the AVX flag on Windows.\n' sys.stderr.write(
'The original error is: %s\n' % 'Failed getting the AVX flag on Windows.\n'
cpt.get_exception_message(e)) 'The original error is: %s\n' % cpt.get_exception_message(e)
)
return (retval & (1 << avx_bit)) > 0 return (retval & (1 << avx_bit)) > 0
else: else:
sys.stderr.write('Do not get AVX flag on %s\n' % sysstr) sys.stderr.write('Do not get AVX flag on %s\n' % sysstr)
...@@ -167,10 +190,10 @@ def avx_supported(): ...@@ -167,10 +190,10 @@ def avx_supported():
def run_shell_command(cmd): def run_shell_command(cmd):
import subprocess import subprocess
out, err = subprocess.Popen(cmd,
stdout=subprocess.PIPE, out, err = subprocess.Popen(
stderr=subprocess.PIPE, cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
shell=True).communicate() ).communicate()
if err: if err:
return None return None
else: else:
...@@ -179,8 +202,9 @@ def run_shell_command(cmd): ...@@ -179,8 +202,9 @@ def run_shell_command(cmd):
def get_dso_path(core_so, dso_name): def get_dso_path(core_so, dso_name):
if core_so and dso_name: if core_so and dso_name:
return run_shell_command("ldd %s|grep %s|awk '{print $3}'" % return run_shell_command(
(core_so, dso_name)) "ldd %s|grep %s|awk '{print $3}'" % (core_so, dso_name)
)
else: else:
return None return None
...@@ -189,6 +213,7 @@ def load_dso(dso_absolute_path): ...@@ -189,6 +213,7 @@ def load_dso(dso_absolute_path):
if dso_absolute_path: if dso_absolute_path:
try: try:
from ctypes import cdll from ctypes import cdll
cdll.LoadLibrary(dso_absolute_path) cdll.LoadLibrary(dso_absolute_path)
except: except:
warnings.warn("Load {} failed".format(dso_absolute_path)) warnings.warn("Load {} failed".format(dso_absolute_path))
...@@ -247,12 +272,14 @@ if platform.system().lower() == 'linux': ...@@ -247,12 +272,14 @@ if platform.system().lower() == 'linux':
try: try:
from . import libpaddle from . import libpaddle
if avx_supported() and not libpaddle.is_compiled_with_avx(): if avx_supported() and not libpaddle.is_compiled_with_avx():
sys.stderr.write( sys.stderr.write(
"Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. " "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
"Hence, no-avx core with worse preformance will be imported.\nIf you like, you could " "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
"reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' " "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
"to get better performance.\n") "to get better performance.\n"
)
# assign tensor alias # assign tensor alias
libpaddle.LoDTensor = libpaddle.Tensor libpaddle.LoDTensor = libpaddle.Tensor
...@@ -283,6 +310,7 @@ try: ...@@ -283,6 +310,7 @@ try:
from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
from .libpaddle import _set_current_stream from .libpaddle import _set_current_stream
from .libpaddle import _get_phi_kernel_name from .libpaddle import _get_phi_kernel_name
if sys.platform != 'win32': if sys.platform != 'win32':
from .libpaddle import _set_process_pids from .libpaddle import _set_process_pids
from .libpaddle import _erase_process_pids from .libpaddle import _erase_process_pids
...@@ -295,12 +323,18 @@ try: ...@@ -295,12 +323,18 @@ try:
except Exception as e: except Exception as e:
if has_paddle_dy_lib: if has_paddle_dy_lib:
sys.stderr.write( sys.stderr.write(
'Error: Can not import paddle core while this file exists: ' + 'Error: Can not import paddle core while this file exists: '
current_path + os.sep + 'libpaddle.' + dy_lib_suffix + '\n') + current_path
+ os.sep
+ 'libpaddle.'
+ dy_lib_suffix
+ '\n'
)
if not avx_supported() and libpaddle.is_compiled_with_avx(): if not avx_supported() and libpaddle.is_compiled_with_avx():
sys.stderr.write( sys.stderr.write(
"Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, " "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
"you should reinstall paddlepaddle with no-avx core.\n") "you should reinstall paddlepaddle with no-avx core.\n"
)
raise e raise e
...@@ -317,22 +351,26 @@ def set_paddle_custom_device_lib_path(lib_path): ...@@ -317,22 +351,26 @@ def set_paddle_custom_device_lib_path(lib_path):
# set paddle lib path # set paddle lib path
def set_paddle_lib_path(): def set_paddle_lib_path():
site_dirs = site.getsitepackages() if hasattr( site_dirs = (
site, site.getsitepackages()
'getsitepackages') else [x for x in sys.path if 'site-packages' in x] if hasattr(site, 'getsitepackages')
else [x for x in sys.path if 'site-packages' in x]
)
for site_dir in site_dirs: for site_dir in site_dirs:
lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs']) lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs'])
if os.path.exists(lib_dir): if os.path.exists(lib_dir):
_set_paddle_lib_path(lib_dir) _set_paddle_lib_path(lib_dir)
set_paddle_custom_device_lib_path( set_paddle_custom_device_lib_path(
os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])) os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
)
return return
if hasattr(site, 'USER_SITE'): if hasattr(site, 'USER_SITE'):
lib_dir = os.path.sep.join([site.USER_SITE, 'paddle', 'libs']) lib_dir = os.path.sep.join([site.USER_SITE, 'paddle', 'libs'])
if os.path.exists(lib_dir): if os.path.exists(lib_dir):
_set_paddle_lib_path(lib_dir) _set_paddle_lib_path(lib_dir)
set_paddle_custom_device_lib_path( set_paddle_custom_device_lib_path(
os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])) os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
)
set_paddle_lib_path() set_paddle_lib_path()
...@@ -18,19 +18,32 @@ import six ...@@ -18,19 +18,32 @@ import six
import paddle import paddle
from paddle.fluid import framework, backward, core, program_guard from paddle.fluid import framework, backward, core, program_guard
from paddle.fluid.executor import _is_enable_standalone_executor, _is_dy2st_enable_standalone_executor from paddle.fluid.executor import (
_is_enable_standalone_executor,
_is_dy2st_enable_standalone_executor,
)
from paddle.fluid.dygraph import layers from paddle.fluid.dygraph import layers
from paddle.fluid.dygraph.base import switch_to_static_graph from paddle.fluid.dygraph.base import switch_to_static_graph
from paddle.fluid.dygraph.dygraph_to_static import logging_utils from paddle.fluid.dygraph.dygraph_to_static import logging_utils
from paddle.fluid.dygraph.dygraph_to_static.return_transformer import RETURN_NO_VALUE_MAGIC_NUM from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
RETURN_NO_VALUE_MAGIC_NUM,
)
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.layers.utils import pack_sequence_as from paddle.fluid.layers.utils import pack_sequence_as
from paddle.fluid.layers.utils import _hash_with_id from paddle.fluid.layers.utils import _hash_with_id
from paddle.fluid.compiler import BuildStrategy from paddle.fluid.compiler import BuildStrategy
from paddle.fluid.framework import _apply_pass from paddle.fluid.framework import _apply_pass
from paddle.fluid.contrib.mixed_precision.decorator import AutoMixedPrecisionLists from paddle.fluid.contrib.mixed_precision.decorator import (
from paddle.fluid.contrib.mixed_precision.fp16_utils import rewrite_program, cast_model_to_fp16 AutoMixedPrecisionLists,
from paddle.fluid.dygraph.amp.auto_cast import _in_amp_guard, _in_pure_fp16_guard )
from paddle.fluid.contrib.mixed_precision.fp16_utils import (
rewrite_program,
cast_model_to_fp16,
)
from paddle.fluid.dygraph.amp.auto_cast import (
_in_amp_guard,
_in_pure_fp16_guard,
)
import paddle.compat as cpt import paddle.compat as cpt
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops, _legacy_C_ops
...@@ -64,7 +77,8 @@ class NestSequence(object): ...@@ -64,7 +77,8 @@ class NestSequence(object):
var_ids = [] var_ids = []
for idx, var in enumerate(self.__input_list): for idx, var in enumerate(self.__input_list):
if isinstance( if isinstance(
var, (framework.Variable, core.VarBase, core.eager.Tensor)): var, (framework.Variable, core.VarBase, core.eager.Tensor)
):
var_ids.append(idx) var_ids.append(idx)
return var_ids return var_ids
...@@ -77,15 +91,17 @@ class NestSequence(object): ...@@ -77,15 +91,17 @@ class NestSequence(object):
warning_types = set() warning_types = set()
for var in self.__input_list: for var in self.__input_list:
if not isinstance( if not isinstance(
var, var, (framework.Variable, core.VarBase, core.eager.Tensor)
(framework.Variable, core.VarBase, core.eager.Tensor)): ):
warning_types.add(type(var)) warning_types.add(type(var))
if warning_types: if warning_types:
logging_utils.warn( logging_utils.warn(
"Output of traced function contains non-tensor type values: {}. " "Output of traced function contains non-tensor type values: {}. "
"Currently, We don't support to update them while training and will return " "Currently, We don't support to update them while training and will return "
"what we first saw. Please try to return them as tensor.". "what we first saw. Please try to return them as tensor.".format(
format(list(warning_types))) list(warning_types)
)
)
@property @property
def var_ids(self): def var_ids(self):
...@@ -139,12 +155,9 @@ class PartialProgramLayer: ...@@ -139,12 +155,9 @@ class PartialProgramLayer:
Layer: A Layer object that run all ops internally in static mode. Layer: A Layer object that run all ops internally in static mode.
""" """
def __init__(self, def __init__(
main_program, self, main_program, inputs, outputs, parameters=None, **kwargs
inputs, ):
outputs,
parameters=None,
**kwargs):
super(PartialProgramLayer, self).__init__() super(PartialProgramLayer, self).__init__()
self._inputs = NestSequence(inputs) self._inputs = NestSequence(inputs)
self._outputs = NestSequence(outputs, need_check=True) self._outputs = NestSequence(outputs, need_check=True)
...@@ -167,7 +180,8 @@ class PartialProgramLayer: ...@@ -167,7 +180,8 @@ class PartialProgramLayer:
# For AMP training # For AMP training
self._amp_list = AutoMixedPrecisionLists( self._amp_list = AutoMixedPrecisionLists(
custom_white_list=custom_white_list, custom_white_list=custom_white_list,
custom_black_list=custom_black_list) custom_black_list=custom_black_list,
)
# program_id -> list(scope) # program_id -> list(scope)
self._scope_cache = {} self._scope_cache = {}
...@@ -188,10 +202,6 @@ class PartialProgramLayer: ...@@ -188,10 +202,6 @@ class PartialProgramLayer:
else: else:
return core.Scope() return core.Scope()
@LazyInitialized
def __fake_vars(self):
return _create_fake_var()
@LazyInitialized @LazyInitialized
def _double_grads(self): def _double_grads(self):
return self._get_double_grads(self._origin_main_program) return self._get_double_grads(self._origin_main_program)
...@@ -203,7 +213,8 @@ class PartialProgramLayer: ...@@ -203,7 +213,8 @@ class PartialProgramLayer:
return self._origin_main_program.clone(for_test=is_infer_mode) return self._origin_main_program.clone(for_test=is_infer_mode)
else: else:
train_program = self._append_backward_desc( train_program = self._append_backward_desc(
self._origin_main_program) self._origin_main_program
)
# Note: Only set grad type once after initializing train program. So we put it here. # Note: Only set grad type once after initializing train program. So we put it here.
self._set_grad_type(self._params, train_program) self._set_grad_type(self._params, train_program)
return train_program return train_program
...@@ -223,16 +234,18 @@ class PartialProgramLayer: ...@@ -223,16 +234,18 @@ class PartialProgramLayer:
@switch_to_static_graph @switch_to_static_graph
def _create_pure_fp16_program(self, is_infer_mode=False): def _create_pure_fp16_program(self, is_infer_mode=False):
pure_fp16_program = self._origin_main_program.clone( pure_fp16_program = self._origin_main_program.clone(
for_test=is_infer_mode) for_test=is_infer_mode
)
with program_guard(pure_fp16_program): with program_guard(pure_fp16_program):
cast_model_to_fp16(pure_fp16_program, cast_model_to_fp16(
self._amp_list, pure_fp16_program, self._amp_list, use_fp16_guard=False
use_fp16_guard=False) )
if is_infer_mode: if is_infer_mode:
return pure_fp16_program return pure_fp16_program
else: else:
train_pure_fp16_program = self._append_backward_desc( train_pure_fp16_program = self._append_backward_desc(
pure_fp16_program) pure_fp16_program
)
self._set_grad_type(self._params, train_pure_fp16_program) self._set_grad_type(self._params, train_pure_fp16_program)
return train_pure_fp16_program return train_pure_fp16_program
...@@ -240,23 +253,27 @@ class PartialProgramLayer: ...@@ -240,23 +253,27 @@ class PartialProgramLayer:
def _create_forward_backward_train_program(self): def _create_forward_backward_train_program(self):
whole_program = self._create_program() whole_program = self._create_program()
forward_end_op_index = self._infer_program.desc.block(0).op_size() forward_end_op_index = self._infer_program.desc.block(0).op_size()
return self._get_forward_backward_program_form(whole_program, return self._get_forward_backward_program_form(
forward_end_op_index) whole_program, forward_end_op_index
)
@switch_to_static_graph @switch_to_static_graph
def _create_forward_backward_train_amp_program(self): def _create_forward_backward_train_amp_program(self):
whole_program = self._create_amp_program() whole_program = self._create_amp_program()
forward_end_op_index = self._infer_amp_program.desc.block(0).op_size() forward_end_op_index = self._infer_amp_program.desc.block(0).op_size()
return self._get_forward_backward_program_form(whole_program, return self._get_forward_backward_program_form(
forward_end_op_index) whole_program, forward_end_op_index
)
@switch_to_static_graph @switch_to_static_graph
def _create_forward_backward_train_pure_fp16_program(self): def _create_forward_backward_train_pure_fp16_program(self):
whole_program = self._create_pure_fp16_program() whole_program = self._create_pure_fp16_program()
forward_end_op_index = self._infer_pure_fp16_program.desc.block( forward_end_op_index = self._infer_pure_fp16_program.desc.block(
0).op_size() 0
return self._get_forward_backward_program_form(whole_program, ).op_size()
forward_end_op_index) return self._get_forward_backward_program_form(
whole_program, forward_end_op_index
)
@LazyInitialized @LazyInitialized
def _train_program(self): def _train_program(self):
...@@ -352,8 +369,9 @@ class PartialProgramLayer: ...@@ -352,8 +369,9 @@ class PartialProgramLayer:
@LazyInitialized @LazyInitialized
def _train_program_id(self): def _train_program_id(self):
program_id = _hash_with_id(self._train_program, self) program_id = _hash_with_id(self._train_program, self)
core._set_cached_executor_build_strategy(program_id, core._set_cached_executor_build_strategy(
self._build_strategy) program_id, self._build_strategy
)
return program_id return program_id
@LazyInitialized @LazyInitialized
...@@ -363,8 +381,9 @@ class PartialProgramLayer: ...@@ -363,8 +381,9 @@ class PartialProgramLayer:
@LazyInitialized @LazyInitialized
def _train_amp_program_id(self): def _train_amp_program_id(self):
program_id = _hash_with_id(self._train_amp_program, self) program_id = _hash_with_id(self._train_amp_program, self)
core._set_cached_executor_build_strategy(program_id, core._set_cached_executor_build_strategy(
self._build_strategy) program_id, self._build_strategy
)
return program_id return program_id
@LazyInitialized @LazyInitialized
...@@ -374,8 +393,9 @@ class PartialProgramLayer: ...@@ -374,8 +393,9 @@ class PartialProgramLayer:
@LazyInitialized @LazyInitialized
def _train_pure_fp16_program_id(self): def _train_pure_fp16_program_id(self):
program_id = _hash_with_id(self._train_pure_fp16_program, self) program_id = _hash_with_id(self._train_pure_fp16_program, self)
core._set_cached_executor_build_strategy(program_id, core._set_cached_executor_build_strategy(
self._build_strategy) program_id, self._build_strategy
)
return program_id return program_id
@LazyInitialized @LazyInitialized
...@@ -411,8 +431,9 @@ class PartialProgramLayer: ...@@ -411,8 +431,9 @@ class PartialProgramLayer:
return main_program return main_program
def prepare_gradient_aggregation(self, start_idx, main_program, def prepare_gradient_aggregation(
target_program): self, start_idx, main_program, target_program
):
""" """
Why we need add gradient aggregation operation ? Why we need add gradient aggregation operation ?
In some cases, if non leaf nodes are used as output, gradient overwriting will occur, such as In some cases, if non leaf nodes are used as output, gradient overwriting will occur, such as
...@@ -420,7 +441,7 @@ class PartialProgramLayer: ...@@ -420,7 +441,7 @@ class PartialProgramLayer:
x = 2 * in # <---- x is a non-leaf node in program. x = 2 * in # <---- x is a non-leaf node in program.
y = x + 3 y = x + 3
return x, y return x, y
loss = forward(in)[0].sum() loss = forward(in)[0].sum()
loss.backward() # <----- x@grad will be overwrited by elementwise_add_grad Op loss.backward() # <----- x@grad will be overwrited by elementwise_add_grad Op
""" """
...@@ -430,8 +451,8 @@ class PartialProgramLayer: ...@@ -430,8 +451,8 @@ class PartialProgramLayer:
if exist a op whose inputs is var, then return True if exist a op whose inputs is var, then return True
""" """
if not isinstance(var, framework.Variable) or var.type not in [ if not isinstance(var, framework.Variable) or var.type not in [
core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.LOD_TENSOR,
core.VarDesc.VarType.SELECTED_ROWS core.VarDesc.VarType.SELECTED_ROWS,
]: ]:
return False return False
if var.dtype not in [paddle.float32, paddle.float64]: if var.dtype not in [paddle.float32, paddle.float64]:
...@@ -448,20 +469,28 @@ class PartialProgramLayer: ...@@ -448,20 +469,28 @@ class PartialProgramLayer:
new_grad_name = var.name + suffix + "@GRAD" new_grad_name = var.name + suffix + "@GRAD"
finded_ops = list( finded_ops = list(
filter( filter(
lambda x: x[0] >= start_idx and any([ lambda x: x[0] >= start_idx
out_arg == var_grad_name and any(
for out_arg in x[1].output_arg_names [
]), enumerate(target_program.block(0).ops))) out_arg == var_grad_name
for out_arg in x[1].output_arg_names
]
),
enumerate(target_program.block(0).ops),
)
)
# len(finded_ops) may equals zero when stop_gradient works. # len(finded_ops) may equals zero when stop_gradient works.
# len(finded_ops) may > 1, because we may have fill_constant op. # len(finded_ops) may > 1, because we may have fill_constant op.
if len(finded_ops) == 0: if len(finded_ops) == 0:
return None return None
# step1: create a new var named var.name@GRAD # step1: create a new var named var.name@GRAD
target_program.block(0).create_var(name=new_grad_name, target_program.block(0).create_var(
type=var.type, name=new_grad_name,
dtype=var.dtype, type=var.type,
shape=var.shape) dtype=var.dtype,
shape=var.shape,
)
# step2: rename the var.name@GRAD to var.name@GRAD@dy2static # step2: rename the var.name@GRAD to var.name@GRAD@dy2static
for idx, op in finded_ops: for idx, op in finded_ops:
op._rename_input(var_grad_name, new_grad_name) op._rename_input(var_grad_name, new_grad_name)
...@@ -472,11 +501,13 @@ class PartialProgramLayer: ...@@ -472,11 +501,13 @@ class PartialProgramLayer:
finded_ops[-1][0] + 1, finded_ops[-1][0] + 1,
type='sum', type='sum',
inputs={'X': [var_grad_name, new_grad_name]}, inputs={'X': [var_grad_name, new_grad_name]},
outputs={"Out": var_grad_name}) outputs={"Out": var_grad_name},
)
return None return None
to_processed_vars = list( to_processed_vars = list(
filter(_need_aggregation, self._outputs.tolist())) filter(_need_aggregation, self._outputs.tolist())
)
for _var in to_processed_vars: for _var in to_processed_vars:
_insert_aggregation_ops_for_var(target_program, _var) _insert_aggregation_ops_for_var(target_program, _var)
...@@ -489,11 +520,12 @@ class PartialProgramLayer: ...@@ -489,11 +520,12 @@ class PartialProgramLayer:
if isinstance(out, framework.Variable): if isinstance(out, framework.Variable):
targets.append(program.global_block().var(out.name)) targets.append(program.global_block().var(out.name))
if targets and self._params: if targets:
backward.gradients(targets=targets, inputs=[]) backward.gradients(targets=targets, inputs=[])
start_idx = len( start_idx = len(main_program.block(0).ops) + 2 * len(
main_program.block(0).ops) + 2 * len(self._outputs.tolist()) self._outputs.tolist()
)
self.prepare_gradient_aggregation(start_idx, main_program, program) self.prepare_gradient_aggregation(start_idx, main_program, program)
...@@ -512,7 +544,10 @@ class PartialProgramLayer: ...@@ -512,7 +544,10 @@ class PartialProgramLayer:
found_param = False found_param = False
for block in program.blocks: for block in program.blocks:
for op in block.ops: for op in block.ops:
if param.name in op.input_arg_names or param.name in op.output_arg_names: if (
param.name in op.input_arg_names
or param.name in op.output_arg_names
):
required_params.append(param) required_params.append(param)
found_param = True found_param = True
break break
...@@ -529,15 +564,21 @@ class PartialProgramLayer: ...@@ -529,15 +564,21 @@ class PartialProgramLayer:
var_desc = block.vars[name].desc var_desc = block.vars[name].desc
var_base = None var_base = None
if not framework._in_eager_mode_: if not framework._in_eager_mode_:
var_base = core.VarBase(var_desc.dtype(), var_base = core.VarBase(
var_desc.shape(), var_desc.dtype(),
var_desc.name(), var_desc.shape(),
var_desc.type(), False) var_desc.name(),
var_desc.type(),
False,
)
else: else:
var_base = core.eager.Tensor(var_desc.dtype(), var_base = core.eager.Tensor(
var_desc.shape(), var_desc.dtype(),
var_desc.name(), var_desc.shape(),
var_desc.type(), False) var_desc.name(),
var_desc.type(),
False,
)
double_grads.append(var_base) double_grads.append(var_base)
return self._valid_vars(double_grads) return self._valid_vars(double_grads)
...@@ -557,36 +598,62 @@ class PartialProgramLayer: ...@@ -557,36 +598,62 @@ class PartialProgramLayer:
attrs = [ attrs = [
'global_block', 'global_block',
self.program.desc.block(0), 'start_op_index', 0, 'end_op_index', self.program.desc.block(0),
self._get_end_op_index(), 'is_test', not self.training, 'start_op_index',
'program_id', self.program_id 0,
'end_op_index',
self._get_end_op_index(),
'is_test',
not self.training,
'program_id',
self.program_id,
] ]
if self._cuda_graph_capture_mode: if self._cuda_graph_capture_mode:
attrs.extend( attrs.extend(
('cuda_graph_capture_mode', self._cuda_graph_capture_mode, (
'cuda_graph_pool_id', self._cuda_graph_pool_id)) 'cuda_graph_capture_mode',
self._cuda_graph_capture_mode,
use_interpretorcore = _is_enable_standalone_executor( 'cuda_graph_pool_id',
) and _is_dy2st_enable_standalone_executor() self._cuda_graph_pool_id,
)
)
use_interpretorcore = (
_is_enable_standalone_executor()
and _is_dy2st_enable_standalone_executor()
)
attrs.extend(('use_interpretorcore', use_interpretorcore)) attrs.extend(('use_interpretorcore', use_interpretorcore))
if use_interpretorcore: if use_interpretorcore:
attrs.extend( attrs.extend(
('forward_global_block', self.forward_program.desc.block(0), (
'backward_global_block', self.backward_program.desc.block(0))) 'forward_global_block',
self.forward_program.desc.block(0),
'backward_global_block',
self.backward_program.desc.block(0),
)
)
_legacy_C_ops.run_program( _legacy_C_ops.run_program(
self._valid_vars(in_vars), self._valid_vars(self._params), self._valid_vars(in_vars),
self._valid_vars(self._params),
self._valid_vars(out_vars), self._valid_vars(out_vars),
self._create_scope_vec(program_id=self.program_id, self._create_scope_vec(
use_scope_cache=True), program_id=self.program_id, use_scope_cache=True
self._double_grads, self._cuda_graph_vec, *attrs) ),
self._double_grads,
self._cuda_graph_vec,
*attrs
)
else: else:
_legacy_C_ops.run_program(self._valid_vars(in_vars), _legacy_C_ops.run_program(
self._valid_vars(self._params), self._valid_vars(in_vars),
self._valid_vars(out_vars), self._valid_vars(self._params),
self._create_scope_vec(), self._valid_vars(out_vars),
self._double_grads, self._cuda_graph_vec, self._create_scope_vec(),
*attrs) self._double_grads,
self._cuda_graph_vec,
*attrs
)
restored_nest_out = self._restore_out(out_vars) restored_nest_out = self._restore_out(out_vars)
return self._remove_no_value(restored_nest_out) return self._remove_no_value(restored_nest_out)
...@@ -594,9 +661,11 @@ class PartialProgramLayer: ...@@ -594,9 +661,11 @@ class PartialProgramLayer:
if _in_pure_fp16_guard(): if _in_pure_fp16_guard():
for i, var in enumerate(in_vars): for i, var in enumerate(in_vars):
name = var.name name = var.name
if (self.program.global_block().has_var(name) if (
and self.program.global_block().var(name).dtype self.program.global_block().has_var(name)
== paddle.float16): and self.program.global_block().var(name).dtype
== paddle.float16
):
in_vars[i] = var.astype('float16') in_vars[i] = var.astype('float16')
in_vars[i].name = name in_vars[i].name = name
...@@ -627,25 +696,32 @@ class PartialProgramLayer: ...@@ -627,25 +696,32 @@ class PartialProgramLayer:
return self._infer_program return self._infer_program
@switch_to_static_graph @switch_to_static_graph
def _get_forward_backward_program_form(self, whole_program, def _get_forward_backward_program_form(
forward_end_op_index): self, whole_program, forward_end_op_index
):
forward_builded_program = add_build_strategy_for( forward_builded_program = add_build_strategy_for(
whole_program, 0, forward_end_op_index, self._build_strategy) whole_program, 0, forward_end_op_index, self._build_strategy
)
backward_start_op_index = forward_end_op_index + 2 * len( backward_start_op_index = forward_end_op_index + 2 * len(
self._outputs.var_ids) self._outputs.var_ids
)
backward_end_op_index = whole_program.desc.block(0).op_size() backward_end_op_index = whole_program.desc.block(0).op_size()
backward_builded_program = add_build_strategy_for( backward_builded_program = add_build_strategy_for(
whole_program, backward_start_op_index, backward_end_op_index, whole_program,
self._build_strategy) backward_start_op_index,
self._apply_inplace_pass(forward_builded_program, backward_end_op_index,
backward_builded_program) self._build_strategy,
)
self._apply_inplace_pass(
forward_builded_program, backward_builded_program
)
return [forward_builded_program, backward_builded_program] return [forward_builded_program, backward_builded_program]
def _apply_inplace_pass(self, forward_program, backward_program): def _apply_inplace_pass(self, forward_program, backward_program):
attr_types = { attr_types = {
"use_cuda": "bool", "use_cuda": "bool",
"mem_opt_skip_vars": "list[str]", "mem_opt_skip_vars": "list[str]",
"for_partial_block": "bool" "for_partial_block": "bool",
} }
empty_startup_program = paddle.static.Program() empty_startup_program = paddle.static.Program()
use_cuda = True if core.is_compiled_with_cuda() else False use_cuda = True if core.is_compiled_with_cuda() else False
...@@ -667,22 +743,33 @@ class PartialProgramLayer: ...@@ -667,22 +743,33 @@ class PartialProgramLayer:
forward_mem_opt_skip_vars.append(var.desc.name()) forward_mem_opt_skip_vars.append(var.desc.name())
backward_mem_opt_skip_vars.append(var.desc.name()) backward_mem_opt_skip_vars.append(var.desc.name())
for var_name in core.parse_safe_eager_deletion_skip_vars( for var_name in core.parse_safe_eager_deletion_skip_vars(
backward_program.desc): backward_program.desc
):
forward_mem_opt_skip_vars.append(var_name) forward_mem_opt_skip_vars.append(var_name)
attrs = { attrs = {
"use_cuda": use_cuda, "use_cuda": use_cuda,
"mem_opt_skip_vars": forward_mem_opt_skip_vars, "mem_opt_skip_vars": forward_mem_opt_skip_vars,
"for_partial_block": True "for_partial_block": True,
} }
_apply_pass(forward_program, empty_startup_program, _apply_pass(
"buffer_shared_inplace_pass", attrs, attr_types) forward_program,
empty_startup_program,
"buffer_shared_inplace_pass",
attrs,
attr_types,
)
attrs = { attrs = {
"use_cuda": use_cuda, "use_cuda": use_cuda,
"mem_opt_skip_vars": backward_mem_opt_skip_vars, "mem_opt_skip_vars": backward_mem_opt_skip_vars,
"for_partial_block": True "for_partial_block": True,
} }
_apply_pass(backward_program, empty_startup_program, _apply_pass(
"buffer_shared_inplace_pass", attrs, attr_types) backward_program,
empty_startup_program,
"buffer_shared_inplace_pass",
attrs,
attr_types,
)
def _prepare(self, inputs): def _prepare(self, inputs):
""" """
...@@ -698,23 +785,28 @@ class PartialProgramLayer: ...@@ -698,23 +785,28 @@ class PartialProgramLayer:
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
var = None var = None
if not framework._in_eager_mode_: if not framework._in_eager_mode_:
var = core.VarBase(value=value, var = core.VarBase(
name=self._inputs[i].desc.name(), value=value,
persistable=False, name=self._inputs[i].desc.name(),
place=expected_place, persistable=False,
zero_copy=True) place=expected_place,
zero_copy=True,
)
else: else:
var = core.eager.Tensor(value=value, var = core.eager.Tensor(
name=self._inputs[i].desc.name(), value=value,
persistable=False, name=self._inputs[i].desc.name(),
place=expected_place, persistable=False,
zero_copy=True) place=expected_place,
zero_copy=True,
)
elif isinstance(value, (core.VarBase, core.eager.Tensor)): elif isinstance(value, (core.VarBase, core.eager.Tensor)):
# NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times # NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times
# into CUDAPlace when it's as input of multi Ops. so we move it in advance # into CUDAPlace when it's as input of multi Ops. so we move it in advance
# to avoid this problem. # to avoid this problem.
if value.stop_gradient and not value.place._equals( if value.stop_gradient and not value.place._equals(
expected_place): expected_place
):
var = value._copy_to(expected_place, False) var = value._copy_to(expected_place, False)
var.stop_gradient = True var.stop_gradient = True
else: else:
...@@ -737,12 +829,21 @@ class PartialProgramLayer: ...@@ -737,12 +829,21 @@ class PartialProgramLayer:
return out_varbase_map[var_desc.name()] return out_varbase_map[var_desc.name()]
if not framework._in_eager_mode_: if not framework._in_eager_mode_:
var_base = core.VarBase(var_desc.dtype(), var_desc.shape(), var_base = core.VarBase(
var_desc.name(), var_desc.type(), False) var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(),
False,
)
else: else:
var_base = core.eager.Tensor(var_desc.dtype(), var_desc.shape(), var_base = core.eager.Tensor(
var_desc.name(), var_desc.type(), var_desc.dtype(),
False) var_desc.shape(),
var_desc.name(),
var_desc.type(),
False,
)
var_base.stop_gradient = var.stop_gradient var_base.stop_gradient = var.stop_gradient
out_varbase_map[var_desc.name()] = var_base out_varbase_map[var_desc.name()] = var_base
return var_base return var_base
...@@ -755,20 +856,30 @@ class PartialProgramLayer: ...@@ -755,20 +856,30 @@ class PartialProgramLayer:
def _create_scope_vec(self, program_id=None, use_scope_cache=False): def _create_scope_vec(self, program_id=None, use_scope_cache=False):
# Hold forward variables # Hold forward variables
tmp_scope_vec = None tmp_scope_vec = None
inner_scope = self._get_scope(program_id=program_id, inner_scope = self._get_scope(
use_scope_cache=use_scope_cache) program_id=program_id, use_scope_cache=use_scope_cache
)
if not framework._in_eager_mode_: if not framework._in_eager_mode_:
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [], tmp_scope_vec = core.VarBase(
"program_out_scope", core.VarDesc.VarType.FP32,
core.VarDesc.VarType.STEP_SCOPES, True) [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES,
True,
)
tmp_scope_vec.value().set_scope(inner_scope) tmp_scope_vec.value().set_scope(inner_scope)
else: else:
tmp_scope_vec = [inner_scope] tmp_scope_vec = [inner_scope]
return tmp_scope_vec return tmp_scope_vec
def _create_cuda_graph_vec(self): def _create_cuda_graph_vec(self):
var = core.VarBase(core.VarDesc.VarType.FP32, [], "cuda_graph", var = core.VarBase(
core.VarDesc.VarType.RAW, True) core.VarDesc.VarType.FP32,
[],
"cuda_graph",
core.VarDesc.VarType.RAW,
True,
)
var.stop_gradient = True var.stop_gradient = True
return var return var
...@@ -791,8 +902,9 @@ class PartialProgramLayer: ...@@ -791,8 +902,9 @@ class PartialProgramLayer:
return main_program.clone(for_test=True) return main_program.clone(for_test=True)
def _is_no_value(self, var): def _is_no_value(self, var):
if isinstance(var, if isinstance(var, (core.VarBase, core.eager.Tensor)) and var.shape == [
(core.VarBase, core.eager.Tensor)) and var.shape == [1]: 1
]:
# NOTE: .numpy() will insert MemcpySync operation, it hits performance. # NOTE: .numpy() will insert MemcpySync operation, it hits performance.
if var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM: if var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
return True return True
...@@ -808,13 +920,14 @@ class PartialProgramLayer: ...@@ -808,13 +920,14 @@ class PartialProgramLayer:
return out_vars return out_vars
elif isinstance(out_vars, (tuple, list)): elif isinstance(out_vars, (tuple, list)):
if isinstance(out_vars, tuple): if isinstance(out_vars, tuple):
res = tuple(var for var in out_vars res = tuple(
if not self._is_no_value(var)) var for var in out_vars if not self._is_no_value(var)
)
else: else:
# isinstance(out_vars, list) # isinstance(out_vars, list)
res = [var for var in out_vars if not self._is_no_value(var)] res = [var for var in out_vars if not self._is_no_value(var)]
has_removed = (len(out_vars) > len(res)) has_removed = len(out_vars) > len(res)
# len(out_vars) > len(res) means we have removed var. This is # len(out_vars) > len(res) means we have removed var. This is
# preventing out_vars is empty or just one element at the beginning # preventing out_vars is empty or just one element at the beginning
if len(res) == 0 and has_removed: if len(res) == 0 and has_removed:
...@@ -835,7 +948,8 @@ class PartialProgramLayer: ...@@ -835,7 +948,8 @@ class PartialProgramLayer:
for param in params: for param in params:
grad_name = param.name + core.grad_var_suffix() grad_name = param.name + core.grad_var_suffix()
grad_var = train_program.desc.block(0).find_var( grad_var = train_program.desc.block(0).find_var(
cpt.to_bytes(grad_name)) cpt.to_bytes(grad_name)
)
# NOTE: cannot find var desc maybe no problem, such as in batch_norm # NOTE: cannot find var desc maybe no problem, such as in batch_norm
if grad_var is None: if grad_var is None:
continue continue
...@@ -864,15 +978,18 @@ class PartialProgramLayer: ...@@ -864,15 +978,18 @@ class PartialProgramLayer:
if not isinstance(self._params, (list, tuple)): if not isinstance(self._params, (list, tuple)):
raise TypeError( raise TypeError(
"Type of self._params in PartialProgramLayer should be list or tuple, but received %s." "Type of self._params in PartialProgramLayer should be list or tuple, but received %s."
% type(self._params)) % type(self._params)
)
param_and_buffer_names_set = set() param_and_buffer_names_set = set()
for i, var in enumerate(self._params): for i, var in enumerate(self._params):
# self._params constains parameters and buffers with persistable=True. # self._params constains parameters and buffers with persistable=True.
if not isinstance(var, (core.VarBase, core.eager.Tensor)): if not isinstance(var, (core.VarBase, core.eager.Tensor)):
raise TypeError( raise TypeError(
'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.' 'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'.format(
.format(i, type(var))) i, type(var)
)
)
param_and_buffer_names_set.add(var.name) param_and_buffer_names_set.add(var.name)
for block in main_program.blocks: for block in main_program.blocks:
...@@ -886,15 +1003,11 @@ class PartialProgramLayer: ...@@ -886,15 +1003,11 @@ class PartialProgramLayer:
"\n\tRevise suggestion: " "\n\tRevise suggestion: "
"\n\t\t1. Please ensure all your sublayers are inheritted from nn.Layer." "\n\t\t1. Please ensure all your sublayers are inheritted from nn.Layer."
"\n\t\t2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as List" "\n\t\t2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as List"
% name) % name
)
def _valid_vars(self, vars): def _valid_vars(self, vars):
""" return vars if vars else None
Note: run_program_op.InferShape requires `X`/'Out' not be null.
But it's common in dy2static, fake varBase is created to handle the
problem.
"""
return vars if vars else self.__fake_vars
def _create_fake_var(): def _create_fake_var():
...@@ -903,13 +1016,23 @@ def _create_fake_var(): ...@@ -903,13 +1016,23 @@ def _create_fake_var():
""" """
if not framework._in_eager_mode_: if not framework._in_eager_mode_:
return [ return [
core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var", core.VarBase(
core.VarDesc.VarType.RAW, False) core.VarDesc.VarType.FP32,
[],
"Fake_var",
core.VarDesc.VarType.RAW,
False,
)
] ]
else: else:
return [ return [
core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var", core.eager.Tensor(
core.VarDesc.VarType.RAW, False) core.VarDesc.VarType.FP32,
[],
"Fake_var",
core.VarDesc.VarType.RAW,
False,
)
] ]
...@@ -918,23 +1041,27 @@ def partial_program_from(concrete_program): ...@@ -918,23 +1041,27 @@ def partial_program_from(concrete_program):
if inputs and isinstance(inputs[0], layers.Layer): if inputs and isinstance(inputs[0], layers.Layer):
inputs = inputs[1:] inputs = inputs[1:]
return PartialProgramLayer(concrete_program.main_program, inputs, return PartialProgramLayer(
concrete_program.outputs, concrete_program.main_program,
concrete_program.parameters, inputs,
**concrete_program.kwargs) concrete_program.outputs,
concrete_program.parameters,
**concrete_program.kwargs
)
@switch_to_static_graph @switch_to_static_graph
def add_build_strategy_for(program, def add_build_strategy_for(
start_op_index, program, start_op_index, end_op_index, build_strategy=None
end_op_index, ):
build_strategy=None): if start_op_index < end_op_index:
if (start_op_index < end_op_index):
compiled_program = paddle.static.CompiledProgram( compiled_program = paddle.static.CompiledProgram(
core.Graph(program.desc, start_op_index, end_op_index), core.Graph(program.desc, start_op_index, end_op_index),
build_strategy=build_strategy) build_strategy=build_strategy,
compiled_program._compile(core.Scope(), )
framework._current_expected_place()) compiled_program._compile(
core.Scope(), framework._current_expected_place()
)
ir_graph = framework.IrGraph(compiled_program._graph) ir_graph = framework.IrGraph(compiled_program._graph)
builded_program = ir_graph.to_program() builded_program = ir_graph.to_program()
if hasattr(compiled_program._program, 'lr_sheduler'): if hasattr(compiled_program._program, 'lr_sheduler'):
......
...@@ -32,12 +32,25 @@ from . import parallel_helper ...@@ -32,12 +32,25 @@ from . import parallel_helper
from .. import unique_name from .. import unique_name
from paddle.fluid import core from paddle.fluid import core
from .layer_object_helper import LayerObjectHelper from .layer_object_helper import LayerObjectHelper
from .layer_hooks import record_program_ops_pre_hook, set_op_customized_attrs_post_hook, LayerOpsRecoder from .layer_hooks import (
from .base import program_desc_tracing_guard, param_guard, in_declarative_mode, _convert_into_variable record_program_ops_pre_hook,
set_op_customized_attrs_post_hook,
LayerOpsRecoder,
)
from .base import (
program_desc_tracing_guard,
param_guard,
in_declarative_mode,
_convert_into_variable,
)
from paddle.fluid import framework from paddle.fluid import framework
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from paddle.fluid.executor import Executor, global_scope from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.framework import _non_static_mode, convert_np_dtype_to_dtype_, in_dygraph_mode from paddle.fluid.framework import (
_non_static_mode,
convert_np_dtype_to_dtype_,
in_dygraph_mode,
)
from paddle.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.framework import _current_expected_place as _get_device
from paddle.fluid.core import VarDesc from paddle.fluid.core import VarDesc
...@@ -67,7 +80,7 @@ def _addindent(string, indent): ...@@ -67,7 +80,7 @@ def _addindent(string, indent):
class HookRemoveHelper(object): class HookRemoveHelper(object):
""" A HookRemoveHelper that can be used to remove hook. """ """A HookRemoveHelper that can be used to remove hook."""
next_hook_id = 0 next_hook_id = 0
...@@ -153,13 +166,14 @@ class Layer(object): ...@@ -153,13 +166,14 @@ class Layer(object):
def train(self): def train(self):
""" """
Sets this Layer and all its sublayers to training mode. Sets this Layer and all its sublayers to training mode.
This only effects certain modules like `Dropout` and `BatchNorm`. This only effects certain modules like `Dropout` and `BatchNorm`.
Returns: Returns:
None None
Example:: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -236,6 +250,7 @@ class Layer(object): ...@@ -236,6 +250,7 @@ class Layer(object):
def apply(self, fn): def apply(self, fn):
""" """
Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``) Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
as well as self. Typical use includes initializing the parameters of a model. as well as self. Typical use includes initializing the parameters of a model.
...@@ -243,7 +258,7 @@ class Layer(object): ...@@ -243,7 +258,7 @@ class Layer(object):
fn (function): a function to be applied to each sublayer fn (function): a function to be applied to each sublayer
Returns: Returns:
Layer: self Layer, self
Example:: Example::
.. code-block:: python .. code-block:: python
...@@ -263,6 +278,7 @@ class Layer(object): ...@@ -263,6 +278,7 @@ class Layer(object):
net.apply(init_weights) net.apply(init_weights)
print(net.state_dict()) print(net.state_dict())
""" """
for layer in self.children(): for layer in self.children():
layer.apply(fn) layer.apply(fn)
...@@ -272,10 +288,12 @@ class Layer(object): ...@@ -272,10 +288,12 @@ class Layer(object):
return self return self
def full_name(self): def full_name(self):
"""Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__ """
Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
Returns: Returns:
str: full name of this layer. str, full name of this layer.
Example:: Example::
.. code-block:: python .. code-block:: python
...@@ -297,7 +315,9 @@ class Layer(object): ...@@ -297,7 +315,9 @@ class Layer(object):
return self._full_name return self._full_name
def register_forward_post_hook(self, hook): def register_forward_post_hook(self, hook):
"""Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed. """
Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively. It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer. User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
...@@ -308,7 +328,7 @@ class Layer(object): ...@@ -308,7 +328,7 @@ class Layer(object):
hook(function): a function registered as a forward post-hook hook(function): a function registered as a forward post-hook
Returns: Returns:
HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` . HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -340,13 +360,16 @@ class Layer(object): ...@@ -340,13 +360,16 @@ class Layer(object):
# hook change the linear's output to output * 2, so out0 is equal to out1 * 2. # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
assert (out0.numpy() == (out1.numpy()) * 2).any() assert (out0.numpy() == (out1.numpy()) * 2).any()
""" """
hook_remove_helper = HookRemoveHelper(self._forward_post_hooks) hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
self._forward_post_hooks[hook_remove_helper._hook_id] = hook self._forward_post_hooks[hook_remove_helper._hook_id] = hook
return hook_remove_helper return hook_remove_helper
def register_forward_pre_hook(self, hook): def register_forward_pre_hook(self, hook):
"""Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed. """
Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
It should have the following form, `input` of the `hook` is `input` of the `Layer`, It should have the following form, `input` of the `hook` is `input` of the `Layer`,
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
...@@ -359,7 +382,7 @@ class Layer(object): ...@@ -359,7 +382,7 @@ class Layer(object):
hook(function): a function registered as a forward pre-hook hook(function): a function registered as a forward pre-hook
Returns: Returns:
HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` . HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -398,12 +421,14 @@ class Layer(object): ...@@ -398,12 +421,14 @@ class Layer(object):
self._forward_pre_hooks[hook_remove_helper._hook_id] = hook self._forward_pre_hooks[hook_remove_helper._hook_id] = hook
return hook_remove_helper return hook_remove_helper
def create_parameter(self, def create_parameter(
shape, self,
attr=None, shape,
dtype=None, attr=None,
is_bias=False, dtype=None,
default_initializer=None): is_bias=False,
default_initializer=None,
):
"""Create parameters for this layer. """Create parameters for this layer.
Parameters: Parameters:
...@@ -443,12 +468,15 @@ class Layer(object): ...@@ -443,12 +468,15 @@ class Layer(object):
temp_attr = copy.deepcopy(attr) temp_attr = copy.deepcopy(attr)
if isinstance(temp_attr, six.string_types) and temp_attr == "": if isinstance(temp_attr, six.string_types) and temp_attr == "":
temp_attr = None temp_attr = None
return self._helper.create_parameter(temp_attr, shape, dtype, is_bias, return self._helper.create_parameter(
default_initializer) temp_attr, shape, dtype, is_bias, default_initializer
)
@deprecated(since="2.0.0",
update_to="paddle.nn.Layer.create_tensor", @deprecated(
reason="New api in create_tensor, easier to use.") since="2.0.0",
update_to="paddle.nn.Layer.create_tensor",
reason="New api in create_tensor, easier to use.",
)
def create_variable(self, name=None, persistable=None, dtype=None): def create_variable(self, name=None, persistable=None, dtype=None):
""" """
...@@ -488,14 +516,16 @@ class Layer(object): ...@@ -488,14 +516,16 @@ class Layer(object):
if name is not None: if name is not None:
var_name = ".".join([self._full_name, name]) var_name = ".".join([self._full_name, name])
else: else:
var_name = unique_name.generate(".".join( var_name = unique_name.generate(
[self._full_name, "_generated_var"])) ".".join([self._full_name, "_generated_var"])
)
return self._helper.main_program.current_block().create_var( return self._helper.main_program.current_block().create_var(
name=var_name, name=var_name,
persistable=persistable, persistable=persistable,
dtype=dtype, dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR) type=core.VarDesc.VarType.LOD_TENSOR,
)
# TODO: Add more parameter list when we need them # TODO: Add more parameter list when we need them
def create_tensor(self, name=None, persistable=None, dtype=None): def create_tensor(self, name=None, persistable=None, dtype=None):
...@@ -538,38 +568,46 @@ class Layer(object): ...@@ -538,38 +568,46 @@ class Layer(object):
if name is not None: if name is not None:
var_name = ".".join([self._full_name, name]) var_name = ".".join([self._full_name, name])
else: else:
var_name = unique_name.generate(".".join( var_name = unique_name.generate(
[self._full_name, "_generated_var"])) ".".join([self._full_name, "_generated_var"])
)
return self._helper.main_program.current_block().create_var( return self._helper.main_program.current_block().create_var(
name=var_name, name=var_name,
persistable=persistable, persistable=persistable,
dtype=dtype, dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR) type=core.VarDesc.VarType.LOD_TENSOR,
)
def parameters(self, include_sublayers=True): def parameters(self, include_sublayers=True):
"""Returns a list of all Parameters from current layer and its sub-layers. """
Returns a list of all Parameters from current layer and its sub-layers.
Returns: Returns:
list of Tensor : a list of Parameters. list of Tensor, a list of Parameters.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
linear = paddle.nn.Linear(1,1) linear = paddle.nn.Linear(1,1)
print(linear.parameters()) # print linear_0.w_0 and linear_0.b_0 print(linear.parameters()) # print linear_0.w_0 and linear_0.b_0
""" """
ret = [ ret = [
param for _, param in self.named_parameters( param
include_sublayers=include_sublayers) for _, param in self.named_parameters(
include_sublayers=include_sublayers
)
] ]
return ret return ret
def children(self): def children(self):
"""Returns an iterator over immediate children layers. """
Returns an iterator over immediate children layers.
Yields: Yields:
Layer: a child layer Layer: a child layer
...@@ -619,13 +657,15 @@ class Layer(object): ...@@ -619,13 +657,15 @@ class Layer(object):
yield name, layer yield name, layer
def sublayers(self, include_self=False): def sublayers(self, include_self=False):
"""Returns a list of sub layers. """
Returns a list of sub layers.
Parameters: Parameters:
include_self(bool, optional): Whether return self as sublayers. Default: False include_self(bool, optional): Whether return self as sublayers. Default: False
Returns: Returns:
list of Layer : a list of sub layers. list of Layer, a list of sub layers.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -678,9 +718,11 @@ class Layer(object): ...@@ -678,9 +718,11 @@ class Layer(object):
""" """
params_set = set() params_set = set()
named_sublayers = self.named_sublayers( named_sublayers = (
prefix=prefix, include_self=True) if include_sublayers else zip( self.named_sublayers(prefix=prefix, include_self=True)
[prefix], [self]) if include_sublayers
else zip([prefix], [self])
)
for layer_prefix, sublayer in named_sublayers: for layer_prefix, sublayer in named_sublayers:
params = sublayer._parameters.items() params = sublayer._parameters.items()
for key, param in params: for key, param in params:
...@@ -724,9 +766,9 @@ class Layer(object): ...@@ -724,9 +766,9 @@ class Layer(object):
if layer is None: if layer is None:
continue continue
layer_prefix = prefix + ('.' if prefix else '') + key layer_prefix = prefix + ('.' if prefix else '') + key
for p, l in layer.named_sublayers(prefix=layer_prefix, for p, l in layer.named_sublayers(
include_self=True, prefix=layer_prefix, include_self=True, layers_set=layers_set
layers_set=layers_set): ):
yield p, l yield p, l
def register_buffer(self, name, tensor, persistable=True): def register_buffer(self, name, tensor, persistable=True):
...@@ -769,25 +811,32 @@ class Layer(object): ...@@ -769,25 +811,32 @@ class Layer(object):
if '_buffers' not in self.__dict__: if '_buffers' not in self.__dict__:
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first") "super(YourLayer, self).__init__() should be called first"
)
elif not isinstance(name, six.string_types): elif not isinstance(name, six.string_types):
raise TypeError( raise TypeError(
"The name of buffer should be a string, but received {}.". "The name of buffer should be a string, but received {}.".format(
format(type(name).__name__)) type(name).__name__
)
)
elif '.' in name: elif '.' in name:
raise KeyError( raise KeyError(
"The name of buffer can not contain `.`, " "The name of buffer can not contain `.`, "
"because when you access the newly added buffer in the " "because when you access the newly added buffer in the "
"form of `self.**.**`, it will cause AttributeError.") "form of `self.**.**`, it will cause AttributeError."
)
elif name == '': elif name == '':
raise KeyError("The name of buffer can not be empty.") raise KeyError("The name of buffer can not be empty.")
elif hasattr(self, name) and name not in self._buffers: elif hasattr(self, name) and name not in self._buffers:
raise KeyError("attribute '{}' already exists.".format(name)) raise KeyError("attribute '{}' already exists.".format(name))
elif tensor is not None and not (type(tensor) == core.VarBase elif tensor is not None and not (
or type(tensor) == core.eager.Tensor): type(tensor) == core.VarBase or type(tensor) == core.eager.Tensor
):
raise TypeError( raise TypeError(
"The registered buffer should be a Paddle.Tensor, but received {}." "The registered buffer should be a Paddle.Tensor, but received {}.".format(
.format(type(tensor).__name__)) type(tensor).__name__
)
)
else: else:
self._buffers[name] = tensor self._buffers[name] = tensor
if persistable: if persistable:
...@@ -797,13 +846,14 @@ class Layer(object): ...@@ -797,13 +846,14 @@ class Layer(object):
def buffers(self, include_sublayers=True): def buffers(self, include_sublayers=True):
""" """
Returns a list of all buffers from current layer and its sub-layers. Returns a list of all buffers from current layer and its sub-layers.
Parameters: Parameters:
include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True
Returns: Returns:
list of Tensor : a list of buffers. list of Tensor, a list of buffers.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -820,8 +870,10 @@ class Layer(object): ...@@ -820,8 +870,10 @@ class Layer(object):
""" """
ret = [ ret = [
buffer for _, buffer in self.named_buffers( buffer
include_sublayers=include_sublayers) for _, buffer in self.named_buffers(
include_sublayers=include_sublayers
)
] ]
return ret return ret
...@@ -862,9 +914,11 @@ class Layer(object): ...@@ -862,9 +914,11 @@ class Layer(object):
""" """
buffers_set = set() buffers_set = set()
named_sublayers = self.named_sublayers( named_sublayers = (
prefix=prefix, include_self=True) if include_sublayers else zip( self.named_sublayers(prefix=prefix, include_self=True)
[prefix], [self]) if include_sublayers
else zip([prefix], [self])
)
for layer_prefix, sublayer in named_sublayers: for layer_prefix, sublayer in named_sublayers:
buffers = sublayer._buffers.items() buffers = sublayer._buffers.items()
for key, buffer in buffers: for key, buffer in buffers:
...@@ -910,7 +964,7 @@ class Layer(object): ...@@ -910,7 +964,7 @@ class Layer(object):
hook_result = forward_pre_hook(self, inputs) hook_result = forward_pre_hook(self, inputs)
if hook_result is not None: if hook_result is not None:
if not isinstance(hook_result, tuple): if not isinstance(hook_result, tuple):
hook_result = (hook_result, ) hook_result = (hook_result,)
inputs = hook_result inputs = hook_result
if not self._built: if not self._built:
...@@ -920,16 +974,20 @@ class Layer(object): ...@@ -920,16 +974,20 @@ class Layer(object):
# TODO(liuyuhui) Only xpu broadcast parameters here. # TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel # The other device is to call _sync_params_buffers in DataParallel
# to realize the parameter synchronization among multiply cards. # to realize the parameter synchronization among multiply cards.
if parallel_helper._is_data_parallel_mode( if (
) and paddle.is_compiled_with_xpu(): parallel_helper._is_data_parallel_mode()
and paddle.is_compiled_with_xpu()
):
parallel_helper._broadcast_parameters( parallel_helper._broadcast_parameters(
self._parameters.values()) self._parameters.values()
)
self._built = True self._built = True
if in_profiler_mode(): if in_profiler_mode():
with profiler.RecordEvent(self.__class__.__name__, with profiler.RecordEvent(
profiler.TracerEventType.Forward): self.__class__.__name__, profiler.TracerEventType.Forward
):
outputs = self.forward(*inputs, **kwargs) outputs = self.forward(*inputs, **kwargs)
else: else:
outputs = self.forward(*inputs, **kwargs) outputs = self.forward(*inputs, **kwargs)
...@@ -942,8 +1000,14 @@ class Layer(object): ...@@ -942,8 +1000,14 @@ class Layer(object):
return outputs return outputs
def __call__(self, *inputs, **kwargs): def __call__(self, *inputs, **kwargs):
if (not in_declarative_mode()) and (not self._forward_pre_hooks) \ if (
and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()): (not in_declarative_mode())
and (not self._forward_pre_hooks)
and (not self._forward_post_hooks)
and (not self._built)
and in_dygraph_mode()
and (not in_profiler_mode())
):
self._build_once(*inputs, **kwargs) self._build_once(*inputs, **kwargs)
return self.forward(*inputs, **kwargs) return self.forward(*inputs, **kwargs)
else: else:
...@@ -964,7 +1028,9 @@ class Layer(object): ...@@ -964,7 +1028,9 @@ class Layer(object):
raise ValueError("Layer shouldn't implement backward") raise ValueError("Layer shouldn't implement backward")
def add_sublayer(self, name, sublayer): def add_sublayer(self, name, sublayer):
"""Adds a sub Layer instance. """
Adds a sub Layer instance.
Added sublayer can be accessed by self.name Added sublayer can be accessed by self.name
...@@ -972,7 +1038,7 @@ class Layer(object): ...@@ -972,7 +1038,7 @@ class Layer(object):
name(str): name of this sublayer. name(str): name of this sublayer.
sublayer(Layer): an instance of Layer. sublayer(Layer): an instance of Layer.
Returns: Returns:
Layer: the sublayer passed in. Layer, the sublayer passed in.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -999,8 +1065,9 @@ class Layer(object): ...@@ -999,8 +1065,9 @@ class Layer(object):
model = MySequential(fc1, fc2) model = MySequential(fc1, fc2)
for prefix, layer in model.named_sublayers(): for prefix, layer in model.named_sublayers():
print(prefix, layer) print(prefix, layer)
""" """
assert (isinstance(sublayer, Layer) or sublayer == None) assert isinstance(sublayer, Layer) or sublayer == None
self._sub_layers[name] = sublayer self._sub_layers[name] = sublayer
return sublayer return sublayer
...@@ -1014,7 +1081,7 @@ class Layer(object): ...@@ -1014,7 +1081,7 @@ class Layer(object):
name(str): name of this sublayer. name(str): name of this sublayer.
parameter(Parameter): an instance of Parameter. parameter(Parameter): an instance of Parameter.
Returns: Returns:
Parameter: the parameter passed in. Parameter, the parameter passed in.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1037,32 +1104,42 @@ class Layer(object): ...@@ -1037,32 +1104,42 @@ class Layer(object):
""" """
if '_parameters' not in self.__dict__: if '_parameters' not in self.__dict__:
raise RuntimeError( raise RuntimeError(
"super(YourLayer, self).__init__() should be called firstly.") "super(YourLayer, self).__init__() should be called firstly."
)
elif not isinstance(name, six.string_types): elif not isinstance(name, six.string_types):
raise TypeError( raise TypeError(
"The name of parameter should be a string, but received {}.". "The name of parameter should be a string, but received {}.".format(
format(type(name).__name__)) type(name).__name__
)
)
elif '.' in name: elif '.' in name:
raise KeyError( raise KeyError(
"The name of parameter can not contain `.`, " "The name of parameter can not contain `.`, "
"because when you access the newly added parameter in the " "because when you access the newly added parameter in the "
"form of `self.**.**`, it will cause AttributeError.") "form of `self.**.**`, it will cause AttributeError."
)
elif name == '': elif name == '':
raise KeyError("The name of parameter can not be empty.") raise KeyError("The name of parameter can not be empty.")
elif hasattr(self, name) and name not in self._parameters: elif hasattr(self, name) and name not in self._parameters:
raise KeyError("The parameter '{}' already exists.".format(name)) raise KeyError("The parameter '{}' already exists.".format(name))
elif parameter is not None and not isinstance(parameter, elif parameter is not None and not isinstance(
framework.Parameter): parameter, framework.Parameter
):
raise TypeError( raise TypeError(
"The parameter to be added should be a Parameter, but received {}." "The parameter to be added should be a Parameter, but received {}.".format(
.format(type(parameter).__name__)) type(parameter).__name__
)
)
else: else:
if parameter is None: if parameter is None:
self._parameters[name] = None self._parameters[name] = None
if len(self._loaddict_holder) > 0: if len(self._loaddict_holder) > 0:
assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( assert (
parameter.name) parameter.name in self._loaddict_holder
), "Parameter not found, Can't not find [ {} ] in state_dict".format(
parameter.name
)
parameter.set_value(self._loaddict_holder[parameter.name]) parameter.set_value(self._loaddict_holder[parameter.name])
...@@ -1081,37 +1158,50 @@ class Layer(object): ...@@ -1081,37 +1158,50 @@ class Layer(object):
""" """
def is_already_registered(is_pre_hook): def is_already_registered(is_pre_hook):
layers_hooks = self._forward_pre_hooks if is_pre_hook else self._forward_post_hooks layers_hooks = (
candidate_hook = record_program_ops_pre_hook if is_pre_hook else set_op_customized_attrs_post_hook self._forward_pre_hooks
if is_pre_hook
else self._forward_post_hooks
)
candidate_hook = (
record_program_ops_pre_hook
if is_pre_hook
else set_op_customized_attrs_post_hook
)
already_registed = False already_registed = False
if layers_hooks: if layers_hooks:
last_key = next(reversed(layers_hooks)) last_key = next(reversed(layers_hooks))
already_registed = (layers_hooks[last_key] == candidate_hook) already_registed = layers_hooks[last_key] == candidate_hook
return already_registed return already_registed
if not isinstance(attrs, dict): if not isinstance(attrs, dict):
raise TypeError( raise TypeError(
"attrs should be type(dict), but received {}".format( "attrs should be type(dict), but received {}".format(
type(attrs).__name__)) type(attrs).__name__
)
)
# NOTE: Overwrite behavior for same key. # NOTE: Overwrite behavior for same key.
self._customized_attrs.update(attrs) self._customized_attrs.update(attrs)
if not is_already_registered(is_pre_hook=True): if not is_already_registered(is_pre_hook=True):
pre_hook_helper = self.register_forward_pre_hook( pre_hook_helper = self.register_forward_pre_hook(
record_program_ops_pre_hook) record_program_ops_pre_hook
)
assert len(self._op_recorder.hooks) == 0 assert len(self._op_recorder.hooks) == 0
self._op_recorder.hooks = [pre_hook_helper] self._op_recorder.hooks = [pre_hook_helper]
# manually register post_hook to ensure it is inserted into the head. # manually register post_hook to ensure it is inserted into the head.
if not is_already_registered(is_pre_hook=False): if not is_already_registered(is_pre_hook=False):
post_hook_helper = self.register_forward_post_hook( post_hook_helper = self.register_forward_post_hook(
set_op_customized_attrs_post_hook) set_op_customized_attrs_post_hook
)
if len(self._forward_post_hooks) > 1: if len(self._forward_post_hooks) > 1:
self._forward_post_hooks.move_to_end(post_hook_helper._hook_id, self._forward_post_hooks.move_to_end(
last=False) post_hook_helper._hook_id, last=False
)
assert len(self._op_recorder.hooks) == 1 assert len(self._op_recorder.hooks) == 1
...@@ -1144,7 +1234,6 @@ class Layer(object): ...@@ -1144,7 +1234,6 @@ class Layer(object):
return object.__getattribute__(self, name) return object.__getattribute__(self, name)
def __setattr__(self, name, value): def __setattr__(self, name, value):
def _remove_if_exist(*dicts): def _remove_if_exist(*dicts):
for d in dicts: for d in dicts:
if name in d: if name in d:
...@@ -1156,10 +1245,14 @@ class Layer(object): ...@@ -1156,10 +1245,14 @@ class Layer(object):
if isinstance(value, framework.Parameter): if isinstance(value, framework.Parameter):
if params is None: if params is None:
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first") "super(YourLayer, self).__init__() should be called first"
)
if len(self._loaddict_holder) > 0: if len(self._loaddict_holder) > 0:
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( assert (
value.name) value.name in self._loaddict_holder
), "Parameter not found, Can't not find [ {} ] in state_dict".format(
value.name
)
value.set_value(self._loaddict_holder[value.name]) value.set_value(self._loaddict_holder[value.name])
...@@ -1168,9 +1261,10 @@ class Layer(object): ...@@ -1168,9 +1261,10 @@ class Layer(object):
elif params is not None and name in params: elif params is not None and name in params:
if value is not None: if value is not None:
raise TypeError( raise TypeError(
"assignment to parameter '{}' should be of type Parameter or None, but got '{}'" "assignment to parameter '{}' should be of type Parameter or None, but got '{}'".format(
.format(name, name, type(value).__name__
type(value).__name__)) )
)
params[name] = None params[name] = None
else: else:
layers = self.__dict__.get('_sub_layers', None) layers = self.__dict__.get('_sub_layers', None)
...@@ -1185,9 +1279,10 @@ class Layer(object): ...@@ -1185,9 +1279,10 @@ class Layer(object):
elif layers is not None and name in layers: elif layers is not None and name in layers:
if value is not None: if value is not None:
raise TypeError( raise TypeError(
"assignment to sublayer '{}' should be of type Layer or None, but got '{}'" "assignment to sublayer '{}' should be of type Layer or None, but got '{}'".format(
.format(name, name, type(value).__name__
type(value).__name__)) )
)
layers[name] = None layers[name] = None
else: else:
_buffers = self.__dict__.get('_buffers', None) _buffers = self.__dict__.get('_buffers', None)
...@@ -1196,8 +1291,9 @@ class Layer(object): ...@@ -1196,8 +1291,9 @@ class Layer(object):
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first" "super(YourLayer, self).__init__() should be called first"
) )
_remove_if_exist(self.__dict__, self._parameters, _remove_if_exist(
self._sub_layers) self.__dict__, self._parameters, self._sub_layers
)
# Set persistable=False by default. Only `register_buffer` can # Set persistable=False by default. Only `register_buffer` can
# add a persistable buffer. # add a persistable buffer.
if name not in self._buffers: if name not in self._buffers:
...@@ -1211,6 +1307,7 @@ class Layer(object): ...@@ -1211,6 +1307,7 @@ class Layer(object):
# value via `assign`. # value via `assign`.
if type(value) == framework.Variable: if type(value) == framework.Variable:
from paddle import assign from paddle import assign
# Note(zhhsplendid): the condition below happens in PaddleGan model, # Note(zhhsplendid): the condition below happens in PaddleGan model,
# but should all non-Variable _buffers[name] be re-assign? We # but should all non-Variable _buffers[name] be re-assign? We
# should consider it in the future. I current wrote this as # should consider it in the future. I current wrote this as
...@@ -1218,18 +1315,23 @@ class Layer(object): ...@@ -1218,18 +1315,23 @@ class Layer(object):
if in_declarative_mode() and _buffers[name] is None: if in_declarative_mode() and _buffers[name] is None:
raise RuntimeError( raise RuntimeError(
'In Dy2stat, self.{0} is a buffer and self.{0} is ' 'In Dy2stat, self.{0} is a buffer and self.{0} is '
'not allowed to be set to Variable when self.{0} is None.' 'not allowed to be set to Variable when self.{0} is None.'.format(
.format(name)) name
elif _buffers[name] is None or type(getattr( )
self, name)) == core.VarBase: )
elif (
_buffers[name] is None
or type(getattr(self, name)) == core.VarBase
):
_buffers[name] = assign(value) _buffers[name] = assign(value)
else: else:
assign(value, getattr(self, name)) assign(value, getattr(self, name))
elif value is not None: elif value is not None:
raise TypeError( raise TypeError(
"assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'" "assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'".format(
.format(name, name, type(value).__name__
type(value).__name__)) )
)
else: else:
# Assigning None will remove the buffer, but if re-assign a new varBase to it, # Assigning None will remove the buffer, but if re-assign a new varBase to it,
# it will be remarked as a buffer with same `persistable` attribute. # it will be remarked as a buffer with same `persistable` attribute.
...@@ -1316,12 +1418,14 @@ class Layer(object): ...@@ -1316,12 +1418,14 @@ class Layer(object):
self._state_dict_hooks[hook_remove_helper._hook_id] = hook self._state_dict_hooks[hook_remove_helper._hook_id] = hook
return hook_remove_helper return hook_remove_helper
def _obtain_parameters_buffers(self, def _obtain_parameters_buffers(
destination=None, self,
include_sublayers=True, destination=None,
structured_name_prefix=""): include_sublayers=True,
structured_name_prefix="",
):
""" """
The difference from state_dict() is that state_dict_hook will not be called, The difference from state_dict() is that state_dict_hook will not be called,
but the original types of parameters and buffers will be maintained. but the original types of parameters and buffers will be maintained.
""" """
if destination is None: if destination is None:
...@@ -1330,7 +1434,10 @@ class Layer(object): ...@@ -1330,7 +1434,10 @@ class Layer(object):
if data is not None: if data is not None:
destination[structured_name_prefix + name] = data destination[structured_name_prefix + name] = data
for name, buffer in self._buffers.items(): for name, buffer in self._buffers.items():
if buffer is not None and name not in self._non_persistable_buffer_names_set: if (
buffer is not None
and name not in self._non_persistable_buffer_names_set
):
destination[structured_name_prefix + name] = buffer destination[structured_name_prefix + name] = buffer
if include_sublayers: if include_sublayers:
...@@ -1339,17 +1446,22 @@ class Layer(object): ...@@ -1339,17 +1446,22 @@ class Layer(object):
destination_temp = destination.copy() destination_temp = destination.copy()
destination_temp.update( destination_temp.update(
layer_item._obtain_parameters_buffers( layer_item._obtain_parameters_buffers(
destination_temp, include_sublayers, destination_temp,
structured_name_prefix + layer_name + ".")) include_sublayers,
structured_name_prefix + layer_name + ".",
)
)
destination = destination_temp destination = destination_temp
return destination return destination
def _state_dict_impl(self, def _state_dict_impl(
destination=None, self,
include_sublayers=True, destination=None,
structured_name_prefix="", include_sublayers=True,
include_non_persistable_buffer=False, structured_name_prefix="",
use_hook=True): include_non_persistable_buffer=False,
use_hook=True,
):
""" """
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
...@@ -1367,7 +1479,10 @@ class Layer(object): ...@@ -1367,7 +1479,10 @@ class Layer(object):
destination[structured_name_prefix + name] = data destination[structured_name_prefix + name] = data
for name, buffer in self._buffers.items(): for name, buffer in self._buffers.items():
if not include_non_persistable_buffer: if not include_non_persistable_buffer:
if buffer is not None and name not in self._non_persistable_buffer_names_set: if (
buffer is not None
and name not in self._non_persistable_buffer_names_set
):
destination[structured_name_prefix + name] = buffer destination[structured_name_prefix + name] = buffer
else: else:
if buffer is not None: if buffer is not None:
...@@ -1379,9 +1494,13 @@ class Layer(object): ...@@ -1379,9 +1494,13 @@ class Layer(object):
destination_temp = destination.copy() destination_temp = destination.copy()
destination_temp.update( destination_temp.update(
layer_item._state_dict_impl( layer_item._state_dict_impl(
destination_temp, include_sublayers, destination_temp,
include_sublayers,
structured_name_prefix + layer_name + ".", structured_name_prefix + layer_name + ".",
include_non_persistable_buffer, use_hook)) include_non_persistable_buffer,
use_hook,
)
)
destination = destination_temp destination = destination_temp
if use_hook: if use_hook:
for state_dict_hook in self._state_dict_hooks.values(): for state_dict_hook in self._state_dict_hooks.values():
...@@ -1391,12 +1510,15 @@ class Layer(object): ...@@ -1391,12 +1510,15 @@ class Layer(object):
return destination return destination
def to_static_state_dict(self, def to_static_state_dict(
destination=None, self,
include_sublayers=True, destination=None,
structured_name_prefix="", include_sublayers=True,
use_hook=True): structured_name_prefix="",
use_hook=True,
):
''' '''
Get all parameters and buffers of current layer and its sub-layers. And set them into a dict Get all parameters and buffers of current layer and its sub-layers. And set them into a dict
Parameters: Parameters:
...@@ -1405,7 +1527,7 @@ class Layer(object): ...@@ -1405,7 +1527,7 @@ class Layer(object):
use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True
Retruns: Retruns:
dict: a dict contains all the parameters and persistable buffers. dict, a dict contains all the parameters and persistable buffers.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1423,13 +1545,16 @@ class Layer(object): ...@@ -1423,13 +1545,16 @@ class Layer(object):
include_sublayers=include_sublayers, include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix, structured_name_prefix=structured_name_prefix,
include_non_persistable_buffer=True, include_non_persistable_buffer=True,
use_hook=use_hook) use_hook=use_hook,
)
def state_dict(self,
destination=None, def state_dict(
include_sublayers=True, self,
structured_name_prefix="", destination=None,
use_hook=True): include_sublayers=True,
structured_name_prefix="",
use_hook=True,
):
''' '''
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
...@@ -1457,7 +1582,8 @@ class Layer(object): ...@@ -1457,7 +1582,8 @@ class Layer(object):
include_sublayers=include_sublayers, include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix, structured_name_prefix=structured_name_prefix,
include_non_persistable_buffer=False, include_non_persistable_buffer=False,
use_hook=use_hook) use_hook=use_hook,
)
@framework.deprecate_stat_dict @framework.deprecate_stat_dict
def set_state_dict(self, state_dict, use_structured_name=True): def set_state_dict(self, state_dict, use_structured_name=True):
...@@ -1489,22 +1615,31 @@ class Layer(object): ...@@ -1489,22 +1615,31 @@ class Layer(object):
state = state_dict.get(key, None) state = state_dict.get(key, None)
if state is None: if state is None:
raise ValueError( raise ValueError(
"{} is not found in the provided dict.".format(key)) "{} is not found in the provided dict.".format(key)
if (isinstance(state, dict) or isinstance(state, list)): )
if (len(state) != len(param)): if isinstance(state, dict) or isinstance(state, list):
raise ValueError("{} receieves the length of {}, " if len(state) != len(param):
"but the expected shape is {}".format( raise ValueError(
key, len(state), len(param))) "{} receieves the length of {}, "
"but the expected shape is {}".format(
key, len(state), len(param)
)
)
else: else:
return param, state return param, state
else: else:
state_shape = state.shape() if inspect.ismethod( state_shape = (
state.shape) else state.shape state.shape()
if inspect.ismethod(state.shape)
else state.shape
)
if list(state_shape) != list(param.shape): if list(state_shape) != list(param.shape):
raise ValueError( raise ValueError(
"{} receives a shape {}, but the expected shape is {}.". "{} receives a shape {}, but the expected shape is {}.".format(
format(key, list(state_shape), list(param.shape))) key, list(state_shape), list(param.shape)
)
)
return param, state return param, state
matched_param_state = [] matched_param_state = []
...@@ -1541,8 +1676,10 @@ class Layer(object): ...@@ -1541,8 +1676,10 @@ class Layer(object):
executor = Executor(_get_device())._default_executor executor = Executor(_get_device())._default_executor
# restore parameter states # restore parameter states
core._create_loaded_parameter( core._create_loaded_parameter(
[param for param, state in matched_param_state], global_scope(), [param for param, state in matched_param_state],
executor) global_scope(),
executor,
)
for param, state in matched_param_state: for param, state in matched_param_state:
_set_var(param, state) _set_var(param, state)
...@@ -1559,7 +1696,7 @@ class Layer(object): ...@@ -1559,7 +1696,7 @@ class Layer(object):
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None. asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
Returns: Returns:
self self
...@@ -1594,11 +1731,13 @@ class Layer(object): ...@@ -1594,11 +1731,13 @@ class Layer(object):
# [ 0.33960250, 0.96878713]]) # [ 0.33960250, 0.96878713]])
''' '''
return self._to_impl(device=device, return self._to_impl(
dtype=dtype, device=device,
blocking=blocking, dtype=dtype,
include_sublayers=True, blocking=blocking,
floating_only=False) include_sublayers=True,
floating_only=False,
)
def _apply(self, func, device, dtype, blocking, include_sublayers=True): def _apply(self, func, device, dtype, blocking, include_sublayers=True):
if include_sublayers: if include_sublayers:
...@@ -1612,8 +1751,9 @@ class Layer(object): ...@@ -1612,8 +1751,9 @@ class Layer(object):
if param.grad is not None: if param.grad is not None:
with no_grad(): with no_grad():
grad_applied = func(param._grad_ivar(), device, dtype, grad_applied = func(
blocking) param._grad_ivar(), device, dtype, blocking
)
for key, buf in self._buffers.items(): for key, buf in self._buffers.items():
if buf is not None: if buf is not None:
...@@ -1637,12 +1777,14 @@ class Layer(object): ...@@ -1637,12 +1777,14 @@ class Layer(object):
# Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space. # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
# Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough. # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
waiting_alloc_memory = ( waiting_alloc_memory = (
(np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2
)
gpu_memory_available = core.gpu_memory_available() gpu_memory_available = core.gpu_memory_available()
if gpu_memory_available < waiting_alloc_memory: if gpu_memory_available < waiting_alloc_memory:
# Copy param / Tensor to cpu # Copy param / Tensor to cpu
t_used = t._copy_to(paddle.CPUPlace(), t_used = t._copy_to(
blocking) # k-v type will error paddle.CPUPlace(), blocking
) # k-v type will error
# Release mem of t # Release mem of t
t.value().get_tensor()._clear() t.value().get_tensor()._clear()
else: else:
...@@ -1653,7 +1795,8 @@ class Layer(object): ...@@ -1653,7 +1795,8 @@ class Layer(object):
# 2. cast param / Tensor to dtype # 2. cast param / Tensor to dtype
if dtype is not None and dtype != t_used.dtype: if dtype is not None and dtype != t_used.dtype:
with paddle.fluid.framework._dygraph_place_guard( with paddle.fluid.framework._dygraph_place_guard(
place=t_used.place): place=t_used.place
):
t_casted = t_used.cast(dtype=dtype) t_casted = t_used.cast(dtype=dtype)
else: else:
t_casted = t_used t_casted = t_used
...@@ -1671,12 +1814,14 @@ class Layer(object): ...@@ -1671,12 +1814,14 @@ class Layer(object):
return t return t
def _to_impl(self, def _to_impl(
device=None, self,
dtype=None, device=None,
blocking=None, dtype=None,
include_sublayers=True, blocking=None,
floating_only=False): include_sublayers=True,
floating_only=False,
):
''' '''
Cast the parameters and buffers of Layer by the give device, dtype and blocking. Cast the parameters and buffers of Layer by the give device, dtype and blocking.
...@@ -1689,7 +1834,7 @@ class Layer(object): ...@@ -1689,7 +1834,7 @@ class Layer(object):
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None. asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True. include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.
floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking. floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.
...@@ -1705,20 +1850,28 @@ class Layer(object): ...@@ -1705,20 +1850,28 @@ class Layer(object):
if device is not None: if device is not None:
if isinstance(device, str): if isinstance(device, str):
device = paddle.device._convert_to_place(device) device = paddle.device._convert_to_place(device)
elif isinstance(device, (core.CPUPlace, core.CUDAPlace, elif isinstance(
core.CUDAPinnedPlace, core.XPUPlace)): device,
(
core.CPUPlace,
core.CUDAPlace,
core.CUDAPinnedPlace,
core.XPUPlace,
),
):
pass pass
else: else:
raise ValueError( raise ValueError(
"device value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is " "device value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is "
+ type(device).__name__) + type(device).__name__
)
if blocking is None: if blocking is None:
blocking = True blocking = True
else: else:
assert isinstance( assert isinstance(
blocking, blocking, bool
bool), "blocking value error, must be the True, False or None" ), "blocking value error, must be the True, False or None"
def transform(t, device, dtype, blocking): def transform(t, device, dtype, blocking):
if floating_only and (not paddle.is_floating_point(t)): if floating_only and (not paddle.is_floating_point(t)):
......
...@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass): ...@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass):
@six.add_metaclass(VariableMetaClass) @six.add_metaclass(VariableMetaClass)
class Variable(object): class Variable(object):
""" """
**Notes**:
**The constructor of Variable should not be invoked directly.**
**In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.** Notes:
The constructor of Variable should not be invoked directly.
In Static Graph Mode: Please use ** `Block.create_var` ** to create a Static variable which has no data until being feed.
**In Dygraph Mode: Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph variable with real data** In Dygraph Mode: Please use ** :ref:`api_fluid_dygraph_to_variable` ** to create a dygraph variable with real data.
In Fluid, every input and output of an OP is a variable. In most In Fluid, every input and output of an OP is a variable. In most
cases, variables are used for holding different kinds of data or training cases, variables are used for holding different kinds of data or training
...@@ -1514,12 +1515,13 @@ class Variable(object): ...@@ -1514,12 +1515,13 @@ class Variable(object):
def detach(self): def detach(self):
""" """
Returns a new Variable, detached from the current graph. Returns a new Variable, detached from the current graph.
It will share data with origin Variable and without tensor copy. It will share data with origin Variable and without tensor copy.
In addition, the detached Variable doesn't provide gradient propagation. In addition, the detached Variable doesn't provide gradient propagation.
Returns: Returns:
( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable. ( :ref:`api_guide_Variable_en` | dtype is same as current Variable), The detached Variable.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1533,6 +1535,7 @@ class Variable(object): ...@@ -1533,6 +1535,7 @@ class Variable(object):
# create a detached Variable # create a detached Variable
y = x.detach() y = x.detach()
""" """
assert ( assert (
...@@ -2085,6 +2088,7 @@ class Variable(object): ...@@ -2085,6 +2088,7 @@ class Variable(object):
@property @property
def T(self): def T(self):
""" """
Permute current Variable with its dimensions reversed. Permute current Variable with its dimensions reversed.
If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`. If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`.
...@@ -2103,6 +2107,7 @@ class Variable(object): ...@@ -2103,6 +2107,7 @@ class Variable(object):
x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0] x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0]
print(x_T_np.shape) print(x_T_np.shape)
# (5, 3, 2) # (5, 3, 2)
""" """
if len(self.shape) == 1: if len(self.shape) == 1:
return self return self
...@@ -2141,7 +2146,7 @@ class Variable(object): ...@@ -2141,7 +2146,7 @@ class Variable(object):
as ``out = assign(tensor)`` . as ``out = assign(tensor)`` .
Returns: Returns:
Variable: The cloned Variable. Variable, The cloned Variable.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -2171,6 +2176,7 @@ class Variable(object): ...@@ -2171,6 +2176,7 @@ class Variable(object):
def _set_error_clip(self, error_clip): def _set_error_clip(self, error_clip):
""" """
Set the error_clip. Set the error_clip.
Args: Args:
...@@ -2178,11 +2184,13 @@ class Variable(object): ...@@ -2178,11 +2184,13 @@ class Variable(object):
Returns: Returns:
None None
""" """
self.error_clip = error_clip self.error_clip = error_clip
def _set_info(self, key, value): def _set_info(self, key, value):
""" """
Set key-value information for this variable. Set key-value information for this variable.
Args: Args:
...@@ -2191,6 +2199,7 @@ class Variable(object): ...@@ -2191,6 +2199,7 @@ class Variable(object):
Returns: Returns:
None None
""" """
if not hasattr(self, "_info"): if not hasattr(self, "_info"):
self._info = {} self._info = {}
...@@ -2198,6 +2207,7 @@ class Variable(object): ...@@ -2198,6 +2207,7 @@ class Variable(object):
def _get_info(self, key): def _get_info(self, key):
""" """
Get the information of this variable corresponding to key. Get the information of this variable corresponding to key.
Args: Args:
...@@ -2205,6 +2215,7 @@ class Variable(object): ...@@ -2205,6 +2215,7 @@ class Variable(object):
Returns: Returns:
object object
""" """
if hasattr(self, "_info") and key in self._info: if hasattr(self, "_info") and key in self._info:
return self._info[key] return self._info[key]
...@@ -2212,7 +2223,9 @@ class Variable(object): ...@@ -2212,7 +2223,9 @@ class Variable(object):
def _slice_indices(self, slice, length): def _slice_indices(self, slice, length):
""" """
Reference implementation for the slice.indices method. Reference implementation for the slice.indices method.
""" """
# Compute step and length as integers. # Compute step and length as integers.
step = 1 if slice.step is None else slice.step step = 1 if slice.step is None else slice.step
...@@ -2383,7 +2396,7 @@ class Variable(object): ...@@ -2383,7 +2396,7 @@ class Variable(object):
Default: None Default: None
Returns: Returns:
Tensor: the value in given scope. Tensor, the value in given scope.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -2438,6 +2451,7 @@ class Variable(object): ...@@ -2438,6 +2451,7 @@ class Variable(object):
def set_value(self, value, scope=None): def set_value(self, value, scope=None):
''' '''
Set the value to the tensor in given scope. Set the value to the tensor in given scope.
Args: Args:
...@@ -2477,6 +2491,7 @@ class Variable(object): ...@@ -2477,6 +2491,7 @@ class Variable(object):
if var.persistable: if var.persistable:
t_load = paddle.load(path+var.name+'.pdtensor') t_load = paddle.load(path+var.name+'.pdtensor')
var.set_value(t_load) var.set_value(t_load)
''' '''
# The 'framework' is a low-level module, and 'executor' # The 'framework' is a low-level module, and 'executor'
...@@ -2547,10 +2562,11 @@ class Variable(object): ...@@ -2547,10 +2562,11 @@ class Variable(object):
def size(self): def size(self):
""" """
Returns the number of elements for current Variable, which is a int64 Variable with shape [1] Returns the number of elements for current Variable, which is a int64 Variable with shape [1]
Returns: Returns:
Variable: the number of elements for current Variable Variable, the number of elements for current Variable
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -2564,6 +2580,7 @@ class Variable(object): ...@@ -2564,6 +2580,7 @@ class Variable(object):
# get the number of elements of the Variable # get the number of elements of the Variable
y = x.size() y = x.size()
""" """
output = self.block.create_var( output = self.block.create_var(
...@@ -2578,23 +2595,27 @@ class Variable(object): ...@@ -2578,23 +2595,27 @@ class Variable(object):
def _set_attr(self, name, val): def _set_attr(self, name, val):
""" """
Set the value of attribute by attribute's name. Set the value of attribute by attribute's name.
Args: Args:
name(str): the attribute name. name(str): the attribute name.
val(int|str|list): the value of the attribute. val(int|str|list): the value of the attribute.
""" """
self._update_desc_attr(name, val) self._update_desc_attr(name, val)
def _has_attr(self, name): def _has_attr(self, name):
""" """
Whether this Variable has the attribute with the name `name` or not. Whether this Variable has the attribute with the name `name` or not.
Args: Args:
name(str): the attribute name. name(str): the attribute name.
Returns: Returns:
bool: True if has this attribute. bool, True if has this attribute.
""" """
return self.desc.has_attr(name) return self.desc.has_attr(name)
...@@ -2624,7 +2645,7 @@ class Variable(object): ...@@ -2624,7 +2645,7 @@ class Variable(object):
name(str): the attribute name. name(str): the attribute name.
Returns: Returns:
int|str|list: The attribute value. The return value int|str|list, The attribute value. The return value
can be any valid attribute type. can be any valid attribute type.
""" """
return self.desc.attr(name) return self.desc.attr(name)
...@@ -3196,14 +3217,16 @@ class Operator(object): ...@@ -3196,14 +3217,16 @@ class Operator(object):
def input(self, name): def input(self, name):
r""" r"""
Get the input arguments according to the input parameter name. Get the input arguments according to the input parameter name.
Args: Args:
name(str): The input parameter name. name(str): The input parameter name.
Returns: Returns:
list: return the list of argument names that associated with \ list, return the list of argument names that associated with \
the specific parameter name. the specific parameter name.
""" """
return self.desc.input(name) return self.desc.input(name)
......
...@@ -20,7 +20,13 @@ from __future__ import print_function ...@@ -20,7 +20,13 @@ from __future__ import print_function
import warnings import warnings
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable, _non_static_mode, _varbase_creator, _in_legacy_dygraph, in_dygraph_mode from ..framework import (
Variable,
_non_static_mode,
_varbase_creator,
_in_legacy_dygraph,
in_dygraph_mode,
)
from .. import core from .. import core
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from . import nn from . import nn
...@@ -33,22 +39,29 @@ __all__ = ['accuracy', 'auc'] ...@@ -33,22 +39,29 @@ __all__ = ['accuracy', 'auc']
def accuracy(input, label, k=1, correct=None, total=None): def accuracy(input, label, k=1, correct=None, total=None):
""" """
accuracy layer. accuracy layer.
Refer to the https://en.wikipedia.org/wiki/Precision_and_recall Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
This function computes the accuracy using the input and label. This function computes the accuracy using the input and label.
If the correct label occurs in top k predictions, then correct will increment by one. If the correct label occurs in top k predictions, then correct will increment by one.
Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
Note:
the dtype of accuracy is determined by input. the input and label dtype can be different.
Args: Args:
input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64. input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
The shape is ``[sample_number, class_dim]`` . The shape is ``[sample_number, class_dim]`` .
label(Tensor): The label of dataset. Tensor with type int32,int64. The shape is ``[sample_number, 1]`` . label(Tensor): The label of dataset. Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
k(int): The top k predictions for each class will be checked. Data type is int64 or int32. k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
correct(Tensor): The correct predictions count. A Tensor with type int64 or int32. correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
total(Tensor): The total entries count. A tensor with type int64 or int32. total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.
Returns: Returns:
Tensor: The correct rate. A Tensor with type float32. Tensor, The correct rate. A Tensor with type float32.
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle import paddle
import paddle.static as static import paddle.static as static
...@@ -68,6 +81,7 @@ def accuracy(input, label, k=1, correct=None, total=None): ...@@ -68,6 +81,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
fetch_list=[result[0]]) fetch_list=[result[0]])
print(output) print(output)
#[array([0.], dtype=float32)] #[array([0.], dtype=float32)]
""" """
if _non_static_mode(): if _non_static_mode():
if correct is None: if correct is None:
...@@ -76,15 +90,18 @@ def accuracy(input, label, k=1, correct=None, total=None): ...@@ -76,15 +90,18 @@ def accuracy(input, label, k=1, correct=None, total=None):
total = _varbase_creator(dtype="int32") total = _varbase_creator(dtype="int32")
_k = k.numpy().item(0) if isinstance(k, Variable) else k _k = k.numpy().item(0) if isinstance(k, Variable) else k
topk_out, topk_indices = _legacy_C_ops.top_k_v2(input, 'k', _k, topk_out, topk_indices = _legacy_C_ops.top_k_v2(
'sorted', False) input, 'k', _k, 'sorted', False
_acc, _, _ = _legacy_C_ops.accuracy(topk_out, topk_indices, label, )
correct, total) _acc, _, _ = _legacy_C_ops.accuracy(
topk_out, topk_indices, label, correct, total
)
return _acc return _acc
helper = LayerHelper("accuracy", **locals()) helper = LayerHelper("accuracy", **locals())
check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], check_variable_and_dtype(
'accuracy') input, 'input', ['float16', 'float32', 'float64'], 'accuracy'
)
topk_out = helper.create_variable_for_type_inference(dtype=input.dtype) topk_out = helper.create_variable_for_type_inference(dtype=input.dtype)
topk_indices = helper.create_variable_for_type_inference(dtype="int64") topk_indices = helper.create_variable_for_type_inference(dtype="int64")
inputs = {"X": [input]} inputs = {"X": [input]}
...@@ -93,39 +110,38 @@ def accuracy(input, label, k=1, correct=None, total=None): ...@@ -93,39 +110,38 @@ def accuracy(input, label, k=1, correct=None, total=None):
else: else:
attrs = {'k': k} attrs = {'k': k}
attrs['sorted'] = False attrs['sorted'] = False
helper.append_op(type="top_k_v2", helper.append_op(
inputs=inputs, type="top_k_v2",
attrs=attrs, inputs=inputs,
outputs={ attrs=attrs,
"Out": [topk_out], outputs={"Out": [topk_out], "Indices": [topk_indices]},
"Indices": [topk_indices] )
})
acc_out = helper.create_variable_for_type_inference(dtype="float32") acc_out = helper.create_variable_for_type_inference(dtype="float32")
if correct is None: if correct is None:
correct = helper.create_variable_for_type_inference(dtype="int32") correct = helper.create_variable_for_type_inference(dtype="int32")
if total is None: if total is None:
total = helper.create_variable_for_type_inference(dtype="int32") total = helper.create_variable_for_type_inference(dtype="int32")
helper.append_op(type="accuracy", helper.append_op(
inputs={ type="accuracy",
"Out": [topk_out], inputs={"Out": [topk_out], "Indices": [topk_indices], "Label": [label]},
"Indices": [topk_indices], outputs={
"Label": [label] "Accuracy": [acc_out],
}, "Correct": [correct],
outputs={ "Total": [total],
"Accuracy": [acc_out], },
"Correct": [correct], )
"Total": [total],
})
return acc_out return acc_out
def auc(input, def auc(
label, input,
curve='ROC', label,
num_thresholds=2**12 - 1, curve='ROC',
topk=1, num_thresholds=2**12 - 1,
slide_steps=1, topk=1,
ins_tag_weight=None): slide_steps=1,
ins_tag_weight=None,
):
""" """
**Area Under the Curve (AUC) Layer** **Area Under the Curve (AUC) Layer**
...@@ -216,13 +232,14 @@ def auc(input, ...@@ -216,13 +232,14 @@ def auc(input,
helper = LayerHelper("auc", **locals()) helper = LayerHelper("auc", **locals())
if ins_tag_weight is None: if ins_tag_weight is None:
ins_tag_weight = tensor.fill_constant(shape=[1, 1], ins_tag_weight = tensor.fill_constant(
dtype="float32", shape=[1, 1], dtype="float32", value=1.0
value=1.0) )
check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'auc') check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'auc')
check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'auc') check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'auc')
check_variable_and_dtype(ins_tag_weight, 'ins_tag_weight', check_variable_and_dtype(
['float32', 'float64'], 'auc') ins_tag_weight, 'ins_tag_weight', ['float32', 'float64'], 'auc'
)
auc_out = helper.create_variable_for_type_inference(dtype="float64") auc_out = helper.create_variable_for_type_inference(dtype="float64")
batch_auc_out = helper.create_variable_for_type_inference(dtype="float64") batch_auc_out = helper.create_variable_for_type_inference(dtype="float64")
# make tp, tn, fp, fn persistable, so that can accumulate all batches. # make tp, tn, fp, fn persistable, so that can accumulate all batches.
...@@ -236,62 +253,71 @@ def auc(input, ...@@ -236,62 +253,71 @@ def auc(input,
batch_stat_pos = helper.create_global_variable( batch_stat_pos = helper.create_global_variable(
persistable=True, persistable=True,
dtype='int64', dtype='int64',
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1]) shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
)
batch_stat_neg = helper.create_global_variable( batch_stat_neg = helper.create_global_variable(
persistable=True, persistable=True,
dtype='int64', dtype='int64',
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1]) shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
)
# for global auc # for global auc
# Needn't maintain the batch id # Needn't maintain the batch id
stat_pos = helper.create_global_variable(persistable=True, stat_pos = helper.create_global_variable(
dtype='int64', persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
shape=[1, num_thresholds + 1]) )
stat_neg = helper.create_global_variable(persistable=True, stat_neg = helper.create_global_variable(
dtype='int64', persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
shape=[1, num_thresholds + 1]) )
for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]: for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
helper.set_variable_initializer(var, Constant(value=0.0, helper.set_variable_initializer(
force_cpu=False)) var, Constant(value=0.0, force_cpu=False)
)
#"InsTagWeight": [ins_tag_weight] # "InsTagWeight": [ins_tag_weight]
# Batch AUC # Batch AUC
helper.append_op(type="auc", helper.append_op(
inputs={ type="auc",
"Predict": [input], inputs={
"Label": [label], "Predict": [input],
"StatPos": [batch_stat_pos], "Label": [label],
"StatNeg": [batch_stat_neg] "StatPos": [batch_stat_pos],
}, "StatNeg": [batch_stat_neg],
attrs={ },
"curve": curve, attrs={
"num_thresholds": num_thresholds, "curve": curve,
"slide_steps": slide_steps "num_thresholds": num_thresholds,
}, "slide_steps": slide_steps,
outputs={ },
"AUC": [batch_auc_out], outputs={
"StatPosOut": [batch_stat_pos], "AUC": [batch_auc_out],
"StatNegOut": [batch_stat_neg] "StatPosOut": [batch_stat_pos],
}) "StatNegOut": [batch_stat_neg],
},
)
# Global AUC # Global AUC
helper.append_op(type="auc", helper.append_op(
inputs={ type="auc",
"Predict": [input], inputs={
"Label": [label], "Predict": [input],
"StatPos": [stat_pos], "Label": [label],
"StatNeg": [stat_neg] "StatPos": [stat_pos],
}, "StatNeg": [stat_neg],
attrs={ },
"curve": curve, attrs={
"num_thresholds": num_thresholds, "curve": curve,
"slide_steps": 0 "num_thresholds": num_thresholds,
}, "slide_steps": 0,
outputs={ },
"AUC": [auc_out], outputs={
"StatPosOut": [stat_pos], "AUC": [auc_out],
"StatNegOut": [stat_neg] "StatPosOut": [stat_pos],
}) "StatNegOut": [stat_neg],
return auc_out, batch_auc_out, [ },
batch_stat_pos, batch_stat_neg, stat_pos, stat_neg )
] return (
auc_out,
batch_auc_out,
[batch_stat_pos, batch_stat_neg, stat_pos, stat_neg],
)
This source diff could not be displayed because it is too large. You can view the blob instead.
image/
fit_a_line.model/
tmp
cuda_profiler.txt
...@@ -13,18 +13,19 @@ ...@@ -13,18 +13,19 @@
# limitations under the License. # limitations under the License.
import os import os
from paddle.fluid import core
from distutils.sysconfig import get_python_lib from distutils.sysconfig import get_python_lib
from distutils.core import setup, Extension
from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from paddle.fluid import core
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes # refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below: # Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid # cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++ # for C/ObjC but not for C++
class BuildExt(build_ext): class BuildExt(build_ext):
def build_extensions(self): def build_extensions(self):
if '-Wstrict-prototypes' in self.compiler.compiler_so: if '-Wstrict-prototypes' in self.compiler.compiler_so:
self.compiler.compiler_so.remove('-Wstrict-prototypes') self.compiler.compiler_so.remove('-Wstrict-prototypes')
...@@ -48,8 +49,9 @@ paddle_custom_kernel_include = [ ...@@ -48,8 +49,9 @@ paddle_custom_kernel_include = [
os.path.join(site_packages_path, 'paddle', 'include'), os.path.join(site_packages_path, 'paddle', 'include'),
] ]
# include path third_party # include path third_party
compile_third_party_path = os.path.join(os.environ['PADDLE_BINARY_DIR'], compile_third_party_path = os.path.join(
'third_party') os.environ['PADDLE_BINARY_DIR'], 'third_party'
)
paddle_custom_kernel_include += [ paddle_custom_kernel_include += [
os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags
os.path.join(compile_third_party_path, 'install/glog/include'), # glog os.path.join(compile_third_party_path, 'install/glog/include'), # glog
...@@ -69,10 +71,13 @@ custom_kernel_dot_module = Extension( ...@@ -69,10 +71,13 @@ custom_kernel_dot_module = Extension(
include_dirs=paddle_custom_kernel_include, include_dirs=paddle_custom_kernel_include,
library_dirs=paddle_custom_kernel_library_dir, library_dirs=paddle_custom_kernel_library_dir,
libraries=libs, libraries=libs,
extra_compile_args=paddle_extra_compile_args) extra_compile_args=paddle_extra_compile_args,
)
setup(name='custom_kernel_dot_c', setup(
version='1.0', name='custom_kernel_dot_c',
description='custom kernel fot compiling', version='1.0',
cmdclass={'build_ext': BuildExt}, description='custom kernel fot compiling',
ext_modules=[custom_kernel_dot_module]) cmdclass={'build_ext': BuildExt},
ext_modules=[custom_kernel_dot_module],
)
...@@ -14,18 +14,17 @@ ...@@ -14,18 +14,17 @@
import os import os
import site import site
from paddle.fluid import core from setuptools import Extension, setup
from distutils.sysconfig import get_python_lib
from distutils.core import setup, Extension
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from paddle.fluid import core
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes # refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below: # Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid # cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++ # for C/ObjC but not for C++
class BuildExt(build_ext): class BuildExt(build_ext):
def build_extensions(self): def build_extensions(self):
if '-Wstrict-prototypes' in self.compiler.compiler_so: if '-Wstrict-prototypes' in self.compiler.compiler_so:
self.compiler.compiler_so.remove('-Wstrict-prototypes') self.compiler.compiler_so.remove('-Wstrict-prototypes')
...@@ -46,12 +45,15 @@ if core.is_compiled_with_npu(): ...@@ -46,12 +45,15 @@ if core.is_compiled_with_npu():
# include path # include path
site_packages_path = site.getsitepackages() site_packages_path = site.getsitepackages()
paddle_custom_kernel_include = list( paddle_custom_kernel_include = list(
map(lambda path: os.path.join(path, 'paddle', 'include'), map(
site_packages_path)) lambda path: os.path.join(path, 'paddle', 'include'), site_packages_path
)
)
# include path third_party # include path third_party
compile_third_party_path = os.path.join(os.environ['PADDLE_BINARY_DIR'], compile_third_party_path = os.path.join(
'third_party') os.environ['PADDLE_BINARY_DIR'], 'third_party'
)
paddle_custom_kernel_include += [ paddle_custom_kernel_include += [
os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags os.path.join(compile_third_party_path, 'install/gflags/include'), # gflags
os.path.join(compile_third_party_path, 'install/glog/include'), # glog os.path.join(compile_third_party_path, 'install/glog/include'), # glog
...@@ -59,7 +61,8 @@ paddle_custom_kernel_include += [ ...@@ -59,7 +61,8 @@ paddle_custom_kernel_include += [
# libs path # libs path
paddle_custom_kernel_library_dir = list( paddle_custom_kernel_library_dir = list(
map(lambda path: os.path.join(path, 'paddle', 'fluid'), site_packages_path)) map(lambda path: os.path.join(path, 'paddle', 'fluid'), site_packages_path)
)
# libs # libs
libs = [':libpaddle.so'] libs = [':libpaddle.so']
...@@ -70,10 +73,13 @@ custom_kernel_dot_module = Extension( ...@@ -70,10 +73,13 @@ custom_kernel_dot_module = Extension(
include_dirs=paddle_custom_kernel_include, include_dirs=paddle_custom_kernel_include,
library_dirs=paddle_custom_kernel_library_dir, library_dirs=paddle_custom_kernel_library_dir,
libraries=libs, libraries=libs,
extra_compile_args=paddle_extra_compile_args) extra_compile_args=paddle_extra_compile_args,
)
setup(name='custom_kernel_dot', setup(
version='1.0', name='custom_kernel_dot',
description='custom kernel fot compiling', version='1.0',
cmdclass={'build_ext': BuildExt}, description='custom kernel fot compiling',
ext_modules=[custom_kernel_dot_module]) cmdclass={'build_ext': BuildExt},
ext_modules=[custom_kernel_dot_module],
)
...@@ -44,7 +44,7 @@ __global__ void relu_cuda_double_backward_kernel(const data_t* out_data, ...@@ -44,7 +44,7 @@ __global__ void relu_cuda_double_backward_kernel(const data_t* out_data,
data_t* ddout_data, data_t* ddout_data,
int64_t num) { int64_t num) {
int64_t gid = blockIdx.x * blockDim.x + threadIdx.x; int64_t gid = blockIdx.x * blockDim.x + threadIdx.x;
for (int64_t i = num; i < num; i += blockDim.x * gridDim.x) { for (int64_t i = gid; i < num; i += blockDim.x * gridDim.x) {
ddout_data[i] = ddx_data[i] * (out_data[i] > static_cast<data_t>(0.) ddout_data[i] = ddx_data[i] * (out_data[i] > static_cast<data_t>(0.)
? static_cast<data_t>(1.) ? static_cast<data_t>(1.)
: static_cast<data_t>(0.)); : static_cast<data_t>(0.));
......
...@@ -21,6 +21,7 @@ import paddle.static as static ...@@ -21,6 +21,7 @@ import paddle.static as static
import tempfile import tempfile
import subprocess import subprocess
import numpy as np import numpy as np
from paddle import fluid
from paddle.vision.transforms import Compose, Normalize from paddle.vision.transforms import Compose, Normalize
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
...@@ -43,12 +44,9 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -43,12 +44,9 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
return out.numpy(), t.grad.numpy() return out.numpy(), t.grad.numpy()
def custom_relu_static(func, def custom_relu_static(
device, func, device, dtype, np_x, use_func=True, test_infer=False
dtype, ):
np_x,
use_func=True,
test_infer=False):
paddle.enable_static() paddle.enable_static()
paddle.set_device(device) paddle.set_device(device)
...@@ -62,9 +60,11 @@ def custom_relu_static(func, ...@@ -62,9 +60,11 @@ def custom_relu_static(func,
exe = static.Executor() exe = static.Executor()
exe.run(static.default_startup_program()) exe.run(static.default_startup_program())
# in static mode, x data has been covered by out # in static mode, x data has been covered by out
out_v = exe.run(static.default_main_program(), out_v = exe.run(
feed={'X': np_x}, static.default_main_program(),
fetch_list=[out.name]) feed={'X': np_x},
fetch_list=[out.name],
)
paddle.disable_static() paddle.disable_static()
return out_v return out_v
...@@ -87,11 +87,11 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True): ...@@ -87,11 +87,11 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
# in static mode, x data has been covered by out # in static mode, x data has been covered by out
compiled_prog = static.CompiledProgram( compiled_prog = static.CompiledProgram(
static.default_main_program()).with_data_parallel( static.default_main_program()
loss_name=out.name, places=places) ).with_data_parallel(loss_name=out.name, places=places)
out_v = exe.run(compiled_prog, out_v = exe.run(
feed={'X': np_x}, compiled_prog, feed={'X': np_x}, fetch_list=[out.name]
fetch_list=[out.name]) )
paddle.disable_static() paddle.disable_static()
return out_v return out_v
...@@ -103,9 +103,9 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): ...@@ -103,9 +103,9 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
with static.scope_guard(static.Scope()): with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()): with static.program_guard(static.Program()):
# simple module # simple module
data = static.data(name='data', data = static.data(
shape=[None, 1, 28, 28], name='data', shape=[None, 1, 28, 28], dtype='float32'
dtype='float32') )
label = static.data(name='label', shape=[None, 1], dtype='int64') label = static.data(name='label', shape=[None, 1], dtype='int64')
hidden = static.nn.fc(data, size=128) hidden = static.nn.fc(data, size=128)
...@@ -124,23 +124,21 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): ...@@ -124,23 +124,21 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
# train # train
for i in range(4): for i in range(4):
avg_loss_v = exe.run(static.default_main_program(), avg_loss_v = exe.run(
feed={ static.default_main_program(),
'data': np_data, feed={'data': np_data, 'label': np_label},
'label': np_label fetch_list=[avg_loss],
}, )
fetch_list=[avg_loss])
# save inference model # save inference model
static.save_inference_model(path_prefix, [data], [predict], exe) static.save_inference_model(path_prefix, [data], [predict], exe)
# get train predict value # get train predict value
predict_v = exe.run(static.default_main_program(), predict_v = exe.run(
feed={ static.default_main_program(),
'data': np_data, feed={'data': np_data, 'label': np_label},
'label': np_label fetch_list=[predict],
}, )
fetch_list=[predict])
return predict_v return predict_v
...@@ -151,30 +149,37 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -151,30 +149,37 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.stop_gradient = False dx = paddle.grad(
outputs=out,
dx = paddle.grad(outputs=[out], inputs=t,
inputs=[t], grad_outputs=paddle.ones_like(t),
create_graph=True, create_graph=True,
retain_graph=True) retain_graph=True,
)
dx[0].backward() ddout = paddle.grad(
outputs=dx[0],
inputs=out.grad,
grad_outputs=paddle.ones_like(t),
create_graph=False,
)
assert dx[0].grad is not None assert ddout[0].numpy() is not None
return dx[0].numpy(), dx[0].grad.numpy() return dx[0].numpy(), ddout[0].numpy()
class TestNewCustomOpSetUpInstall(unittest.TestCase): class TestNewCustomOpSetUpInstall(unittest.TestCase):
def setUp(self): def setUp(self):
cur_dir = os.path.dirname(os.path.abspath(__file__)) cur_dir = os.path.dirname(os.path.abspath(__file__))
# compile, install the custom op egg into site-packages under background # compile, install the custom op egg into site-packages under background
if os.name == 'nt': if os.name == 'nt':
cmd = 'cd /d {} && python custom_relu_setup.py install'.format( cmd = 'cd /d {} && python custom_relu_setup.py install'.format(
cur_dir) cur_dir
)
else: else:
cmd = 'cd {} && {} custom_relu_setup.py install'.format( cmd = 'cd {} && {} custom_relu_setup.py install'.format(
cur_dir, sys.executable) cur_dir, sys.executable
)
run_cmd(cmd) run_cmd(cmd)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users. # NOTE(Aurelius84): Normally, it's no need to add following codes for users.
...@@ -190,16 +195,18 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -190,16 +195,18 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
custom_egg_path = [ custom_egg_path = [
x for x in os.listdir(site_dir) if 'custom_relu_module_setup' in x x for x in os.listdir(site_dir) if 'custom_relu_module_setup' in x
] ]
assert len(custom_egg_path assert len(custom_egg_path) == 1, "Matched egg number is %d." % len(
) == 1, "Matched egg number is %d." % len(custom_egg_path) custom_egg_path
)
sys.path.append(os.path.join(site_dir, custom_egg_path[0])) sys.path.append(os.path.join(site_dir, custom_egg_path[0]))
# usage: import the package directly # usage: import the package directly
import custom_relu_module_setup import custom_relu_module_setup
# `custom_relu_dup` is same as `custom_relu_dup` # `custom_relu_dup` is same as `custom_relu_dup`
self.custom_ops = [ self.custom_ops = [
custom_relu_module_setup.custom_relu, custom_relu_module_setup.custom_relu,
custom_relu_module_setup.custom_relu_dup custom_relu_module_setup.custom_relu_dup,
] ]
self.dtypes = ['float32', 'float64'] self.dtypes = ['float32', 'float64']
...@@ -222,13 +229,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -222,13 +229,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out = custom_relu_static(custom_op, device, dtype, x) out = custom_relu_static(custom_op, device, dtype, x)
pd_out = custom_relu_static(custom_op, device, dtype, x, pd_out = custom_relu_static(
False) custom_op, device, dtype, x, False
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
out, out,
pd_out, pd_out,
err_msg='custom op out: {},\n paddle api out: {}'. err_msg='custom op out: {},\n paddle api out: {}'.format(
format(out, pd_out)) out, pd_out
),
)
def test_static_pe(self): def test_static_pe(self):
for device in self.devices: for device in self.devices:
...@@ -238,13 +248,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -238,13 +248,16 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out = custom_relu_static_pe(custom_op, device, dtype, x) out = custom_relu_static_pe(custom_op, device, dtype, x)
pd_out = custom_relu_static_pe(custom_op, device, dtype, x, pd_out = custom_relu_static_pe(
False) custom_op, device, dtype, x, False
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
out, out,
pd_out, pd_out,
err_msg='custom op out: {},\n paddle api out: {}'. err_msg='custom op out: {},\n paddle api out: {}'.format(
format(out, pd_out)) out, pd_out
),
)
def func_dynamic(self): def func_dynamic(self):
for device in self.devices: for device in self.devices:
...@@ -253,20 +266,26 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -253,20 +266,26 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
continue continue
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
for custom_op in self.custom_ops: for custom_op in self.custom_ops:
out, x_grad = custom_relu_dynamic(custom_op, device, dtype, out, x_grad = custom_relu_dynamic(
x) custom_op, device, dtype, x
)
pd_out, pd_x_grad = custom_relu_dynamic( pd_out, pd_x_grad = custom_relu_dynamic(
custom_op, device, dtype, x, False) custom_op, device, dtype, x, False
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
out, out,
pd_out, pd_out,
err_msg='custom op out: {},\n paddle api out: {}'. err_msg='custom op out: {},\n paddle api out: {}'.format(
format(out, pd_out)) out, pd_out
),
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
x_grad, x_grad,
pd_x_grad, pd_x_grad,
err_msg='custom op x grad: {},\n paddle api x grad: {}'. err_msg='custom op x grad: {},\n paddle api x grad: {}'.format(
format(x_grad, pd_x_grad)) x_grad, pd_x_grad
),
)
def test_dynamic(self): def test_dynamic(self):
with _test_eager_guard(): with _test_eager_guard():
...@@ -279,22 +298,29 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -279,22 +298,29 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
np_label = np.random.random((1, 1)).astype("int64") np_label = np.random.random((1, 1)).astype("int64")
path_prefix = "custom_op_inference/custom_relu" path_prefix = "custom_op_inference/custom_relu"
for device in self.devices: for device in self.devices:
predict = custom_relu_static_inference(self.custom_ops[0], device, predict = custom_relu_static_inference(
np_data, np_label, self.custom_ops[0], device, np_data, np_label, path_prefix
path_prefix) )
# load inference model # load inference model
with static.scope_guard(static.Scope()): with static.scope_guard(static.Scope()):
exe = static.Executor() exe = static.Executor()
[inference_program, feed_target_names, [
fetch_targets] = static.load_inference_model(path_prefix, exe) inference_program,
predict_infer = exe.run(inference_program, feed_target_names,
feed={feed_target_names[0]: np_data}, fetch_targets,
fetch_list=fetch_targets) ] = static.load_inference_model(path_prefix, exe)
predict_infer = exe.run(
inference_program,
feed={feed_target_names[0]: np_data},
fetch_list=fetch_targets,
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
predict, predict,
predict_infer, predict_infer,
err_msg='custom op predict: {},\n custom op infer predict: {}' err_msg='custom op predict: {},\n custom op infer predict: {}'.format(
.format(predict, predict_infer)) predict, predict_infer
),
)
paddle.disable_static() paddle.disable_static()
def test_static_save_and_run_inference_predictor(self): def test_static_save_and_run_inference_predictor(self):
...@@ -304,62 +330,80 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -304,62 +330,80 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
path_prefix = "custom_op_inference/custom_relu" path_prefix = "custom_op_inference/custom_relu"
from paddle.inference import Config from paddle.inference import Config
from paddle.inference import create_predictor from paddle.inference import create_predictor
for device in self.devices: for device in self.devices:
predict = custom_relu_static_inference(self.custom_ops[0], device, predict = custom_relu_static_inference(
np_data, np_label, self.custom_ops[0], device, np_data, np_label, path_prefix
path_prefix) )
# load inference model # load inference model
config = Config(path_prefix + ".pdmodel", config = Config(
path_prefix + ".pdiparams") path_prefix + ".pdmodel", path_prefix + ".pdiparams"
)
predictor = create_predictor(config) predictor = create_predictor(config)
input_tensor = predictor.get_input_handle( input_tensor = predictor.get_input_handle(
predictor.get_input_names()[0]) predictor.get_input_names()[0]
)
input_tensor.reshape(np_data.shape) input_tensor.reshape(np_data.shape)
input_tensor.copy_from_cpu(np_data.copy()) input_tensor.copy_from_cpu(np_data.copy())
predictor.run() predictor.run()
output_tensor = predictor.get_output_handle( output_tensor = predictor.get_output_handle(
predictor.get_output_names()[0]) predictor.get_output_names()[0]
)
predict_infer = output_tensor.copy_to_cpu() predict_infer = output_tensor.copy_to_cpu()
self.assertTrue( self.assertTrue(
np.isclose(predict, predict_infer, rtol=5e-5).any(), np.isclose(predict, predict_infer, rtol=5e-5).any(),
"custom op predict: {},\n custom op infer predict: {}".format( "custom op predict: {},\n custom op infer predict: {}".format(
predict, predict_infer)) predict, predict_infer
),
)
paddle.disable_static() paddle.disable_static()
def test_func_double_grad_dynamic(self): def test_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16': if device == 'cpu' and dtype == 'float16':
continue continue
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
out, dx_grad = custom_relu_double_grad_dynamic( out, dx_grad = custom_relu_double_grad_dynamic(
self.custom_ops[0], device, dtype, x) self.custom_ops[0], device, dtype, x
)
pd_out, pd_dx_grad = custom_relu_double_grad_dynamic( pd_out, pd_dx_grad = custom_relu_double_grad_dynamic(
self.custom_ops[0], device, dtype, x, False) self.custom_ops[0], device, dtype, x, False
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
out, out,
pd_out, pd_out,
err_msg='custom op out: {},\n paddle api out: {}'.format( err_msg='custom op out: {},\n paddle api out: {}'.format(
out, pd_out)) out, pd_out
),
)
np.testing.assert_array_equal( np.testing.assert_array_equal(
dx_grad, dx_grad,
pd_dx_grad, pd_dx_grad,
err_msg='custom op dx grad: {},\n paddle api dx grad: {}'. err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format(
format(dx_grad, pd_dx_grad)) dx_grad, pd_dx_grad
),
)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_with_dataloader(self): def test_with_dataloader(self):
for device in self.devices: for device in self.devices:
paddle.set_device(device) paddle.set_device(device)
# data loader # data loader
transform = Compose( transform = Compose(
[Normalize(mean=[127.5], std=[127.5], data_format='CHW')]) [Normalize(mean=[127.5], std=[127.5], data_format='CHW')]
train_dataset = paddle.vision.datasets.MNIST(mode='train', )
transform=transform) train_dataset = paddle.vision.datasets.MNIST(
train_loader = paddle.io.DataLoader(train_dataset, mode='train', transform=transform
batch_size=64, )
shuffle=True, train_loader = paddle.io.DataLoader(
drop_last=True, train_dataset,
num_workers=0) batch_size=64,
shuffle=True,
drop_last=True,
num_workers=0,
)
for batch_id, (image, _) in enumerate(train_loader()): for batch_id, (image, _) in enumerate(train_loader()):
out = self.custom_ops[0](image) out = self.custom_ops[0](image)
...@@ -368,7 +412,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -368,7 +412,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
out, out,
pd_out, pd_out,
err_msg='custom op out: {},\n paddle api out: {}'.format( err_msg='custom op out: {},\n paddle api out: {}'.format(
out, pd_out)) out, pd_out
),
)
if batch_id == 5: if batch_id == 5:
break break
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
class Net(paddle.nn.Layer):
def __init__(self):
super(Net, self).__init__()
@paddle.jit.to_static
def forward(self, x):
out = x + 1
return out
class TestBackwardWithoutParams(unittest.TestCase):
def test_run(self):
net = Net()
x = paddle.ones([2, 2])
x.stop_gradient = False
out = net(x)
loss = paddle.mean(out)
loss.backward()
np.testing.assert_equal(x.grad.numpy(), np.full(x.shape, 0.25))
if __name__ == '__main__':
unittest.main()
...@@ -292,7 +292,6 @@ def for_tuple_as_enumerate_value(x_array): ...@@ -292,7 +292,6 @@ def for_tuple_as_enumerate_value(x_array):
# 20. test for function in a class # 20. test for function in a class
class ForwardContainsForLayer(paddle.nn.Layer): class ForwardContainsForLayer(paddle.nn.Layer):
def __init__(self): def __init__(self):
super(ForwardContainsForLayer, self).__init__() super(ForwardContainsForLayer, self).__init__()
self.high = 5 self.high = 5
...@@ -328,8 +327,8 @@ def for_original_tuple(): ...@@ -328,8 +327,8 @@ def for_original_tuple():
# 23. for zip error # 23. for zip error
@paddle.jit.to_static( @paddle.jit.to_static(
input_spec=[InputSpec(shape=[None, 10]), input_spec=[InputSpec(shape=[None, 10]), InputSpec(shape=[None, 10])]
InputSpec(shape=[None, 10])]) )
def for_zip_error(x, y): def for_zip_error(x, y):
for i, j in zip(x, y): for i, j in zip(x, y):
a = i + j a = i + j
...@@ -338,8 +337,8 @@ def for_zip_error(x, y): ...@@ -338,8 +337,8 @@ def for_zip_error(x, y):
# 24. for zip # 24. for zip
@paddle.jit.to_static( @paddle.jit.to_static(
input_spec=[InputSpec(shape=[2, 10]), input_spec=[InputSpec(shape=[2, 10]), InputSpec(shape=[2, 10])]
InputSpec(shape=[2, 10])]) )
def for_zip(x, y): def for_zip(x, y):
for i, j in zip(x, y): for i, j in zip(x, y):
a = i + j a = i + j
...@@ -347,10 +346,12 @@ def for_zip(x, y): ...@@ -347,10 +346,12 @@ def for_zip(x, y):
class TestTransformBase(unittest.TestCase): class TestTransformBase(unittest.TestCase):
def setUp(self): def setUp(self):
self.place = fluid.CUDAPlace( self.place = (
0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() fluid.CUDAPlace(0)
if fluid.is_compiled_with_cuda()
else fluid.CPUPlace()
)
self.set_input() self.set_input()
self.set_test_func() self.set_test_func()
...@@ -359,7 +360,8 @@ class TestTransformBase(unittest.TestCase): ...@@ -359,7 +360,8 @@ class TestTransformBase(unittest.TestCase):
def set_test_func(self): def set_test_func(self):
raise NotImplementedError( raise NotImplementedError(
"For Enumerate test should implement set_test_func") "For Enumerate test should implement set_test_func"
)
def _run(self, to_static): def _run(self, to_static):
program_translator.enable(to_static) program_translator.enable(to_static)
...@@ -374,22 +376,21 @@ class TestTransformBase(unittest.TestCase): ...@@ -374,22 +376,21 @@ class TestTransformBase(unittest.TestCase):
class TestTransform(TestTransformBase): class TestTransform(TestTransformBase):
def transformed_result_compare(self): def transformed_result_compare(self):
dy_outs = self.get_dygraph_output() dy_outs = self.get_dygraph_output()
if not isinstance(dy_outs, (tuple, list)): if not isinstance(dy_outs, (tuple, list)):
dy_outs = (dy_outs, ) dy_outs = (dy_outs,)
self.dygraph_func.eval()
st_outs = self.get_static_output() st_outs = self.get_static_output()
if not isinstance(st_outs, (tuple, list)): if not isinstance(st_outs, (tuple, list)):
st_outs = (st_outs, ) st_outs = (st_outs,)
for x, y in zip(dy_outs, st_outs): for x, y in zip(dy_outs, st_outs):
np.testing.assert_allclose(x.numpy(), y.numpy(), rtol=1e-05) np.testing.assert_allclose(x.numpy(), y.numpy(), rtol=1e-05)
class TestTransformForOriginalList(TestTransform): class TestTransformForOriginalList(TestTransform):
def _run(self, to_static): def _run(self, to_static):
program_translator.enable(to_static) program_translator.enable(to_static)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
...@@ -397,7 +398,6 @@ class TestTransformForOriginalList(TestTransform): ...@@ -397,7 +398,6 @@ class TestTransformForOriginalList(TestTransform):
class TestTransformError(TestTransformBase): class TestTransformError(TestTransformBase):
def transformed_error(self, etype): def transformed_error(self, etype):
with self.assertRaises(etype): with self.assertRaises(etype):
dy_out = self.get_dygraph_output() dy_out = self.get_dygraph_output()
...@@ -405,7 +405,6 @@ class TestTransformError(TestTransformBase): ...@@ -405,7 +405,6 @@ class TestTransformError(TestTransformBase):
class TestForInRange(TestTransform): class TestForInRange(TestTransform):
def set_input(self): def set_input(self):
self.input = np.array([5]) self.input = np.array([5])
...@@ -417,7 +416,6 @@ class TestForInRange(TestTransform): ...@@ -417,7 +416,6 @@ class TestForInRange(TestTransform):
class TestForIterList(TestTransform): class TestForIterList(TestTransform):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_iter_list self.dygraph_func = for_iter_list
...@@ -426,19 +424,16 @@ class TestForIterList(TestTransform): ...@@ -426,19 +424,16 @@ class TestForIterList(TestTransform):
class TestForEnumerateSimple(TestForIterList): class TestForEnumerateSimple(TestForIterList):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_list self.dygraph_func = for_enumerate_list
class TestForInRangeWithBreak(TestForInRange): class TestForInRangeWithBreak(TestForInRange):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_in_range_with_break self.dygraph_func = for_in_range_with_break
class TestForIterVarNumpy(TestTransform): class TestForIterVarNumpy(TestTransform):
def set_input(self): def set_input(self):
self.input = np.array([1, 2, 3, 4, 5]) self.input = np.array([1, 2, 3, 4, 5])
...@@ -450,103 +445,86 @@ class TestForIterVarNumpy(TestTransform): ...@@ -450,103 +445,86 @@ class TestForIterVarNumpy(TestTransform):
class TestForEnumerateVarNumpy(TestForIterVarNumpy): class TestForEnumerateVarNumpy(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy self.dygraph_func = for_enumerate_var_numpy
class TestForEnumerateVarNumpyWithStart(TestForIterVarNumpy): class TestForEnumerateVarNumpyWithStart(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy_with_start self.dygraph_func = for_enumerate_var_numpy_with_start
class TestForEnumerateVarNumpyWithBreak(TestForIterVarNumpy): class TestForEnumerateVarNumpyWithBreak(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy_with_break self.dygraph_func = for_enumerate_var_numpy_with_break
class TestForEnumerateVarNumpyWithContinue(TestForIterVarNumpy): class TestForEnumerateVarNumpyWithContinue(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy_with_continue self.dygraph_func = for_enumerate_var_numpy_with_continue
class TestForEnumerateVarNumpyWithStartAndBreak(TestForIterVarNumpy): class TestForEnumerateVarNumpyWithStartAndBreak(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy_with_start_break self.dygraph_func = for_enumerate_var_numpy_with_start_break
class TestForEnumerateVarNumpyWithStartAndContinue(TestForIterVarNumpy): class TestForEnumerateVarNumpyWithStartAndContinue(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_numpy_with_start_continue self.dygraph_func = for_enumerate_var_numpy_with_start_continue
class TestForIterVar(TestForIterVarNumpy): class TestForIterVar(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_iter_var self.dygraph_func = for_iter_var
class TestForIterVarIdx(TestForIterVarNumpy): class TestForIterVarIdx(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_iter_var_idx self.dygraph_func = for_iter_var_idx
class TestForEnumerateVar(TestForIterVarNumpy): class TestForEnumerateVar(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var self.dygraph_func = for_enumerate_var
class TestForEnumerateVarWithNestedRange(TestForIterVarNumpy): class TestForEnumerateVarWithNestedRange(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_with_nested_range self.dygraph_func = for_enumerate_var_with_nested_range
class TestForIterVarList(TestForInRange): class TestForIterVarList(TestForInRange):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_iter_var_list self.dygraph_func = for_iter_var_list
class TestForEnumerateVarList(TestForInRange): class TestForEnumerateVarList(TestForInRange):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_enumerate_var_list self.dygraph_func = for_enumerate_var_list
class TestForTupleAsIterVar(TestForIterVarNumpy): class TestForTupleAsIterVar(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_tuple_as_iter_var self.dygraph_func = for_tuple_as_iter_var
class TestForTupleAsEnumerateIter(TestForIterVarNumpy): class TestForTupleAsEnumerateIter(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_tuple_as_enumerate_iter self.dygraph_func = for_tuple_as_enumerate_iter
class TestForTupleAsEnumerateValue(TestForIterVarNumpy): class TestForTupleAsEnumerateValue(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_tuple_as_enumerate_value self.dygraph_func = for_tuple_as_enumerate_value
class TestForwardContainsForLayer(TestForIterVarNumpy): class TestForwardContainsForLayer(TestForIterVarNumpy):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = ForwardContainsForLayer() self.dygraph_func = ForwardContainsForLayer()
class TestForOriginalList(TestTransformForOriginalList): class TestForOriginalList(TestTransformForOriginalList):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_original_list self.dygraph_func = for_original_list
...@@ -555,7 +533,6 @@ class TestForOriginalList(TestTransformForOriginalList): ...@@ -555,7 +533,6 @@ class TestForOriginalList(TestTransformForOriginalList):
class TestForOriginalTuple(TestTransformForOriginalList): class TestForOriginalTuple(TestTransformForOriginalList):
def set_test_func(self): def set_test_func(self):
self.dygraph_func = for_original_tuple self.dygraph_func = for_original_tuple
...@@ -564,7 +541,6 @@ class TestForOriginalTuple(TestTransformForOriginalList): ...@@ -564,7 +541,6 @@ class TestForOriginalTuple(TestTransformForOriginalList):
class TestForZip(unittest.TestCase): class TestForZip(unittest.TestCase):
def setUp(self): def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory() self.temp_dir = tempfile.TemporaryDirectory()
......
...@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set ...@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set
class TrtConvertActivationTest(TrtLayerAutoScanTest): class TrtConvertActivationTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True return True
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
if dims == 1: if dims == 1:
return np.random.random([32]).astype(np.float32) return np.random.random([32]).astype(np.float32)
...@@ -41,11 +39,19 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ...@@ -41,11 +39,19 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
for dims in [1, 2, 3, 4]: for dims in [1, 2, 3, 4]:
for batch in [1, 4]: for batch in [1, 4]:
for op_type in [ for op_type in [
"relu", "sigmoid", "tanh", "relu6", "elu", "selu", "relu",
"softsign", "stanh", "thresholded_relu", "softplus" "sigmoid",
"tanh",
"relu6",
"elu",
"selu",
"softsign",
"stanh",
"thresholded_relu",
"softplus",
]: ]:
# few samples to reduce time # few samples to reduce time
#for beta in [-0.2, 0.5, 0.67, 3]: # for beta in [-0.2, 0.5, 0.67, 3]:
# for alpha in [-0.2, 0.5, 0.67, 3]: # for alpha in [-0.2, 0.5, 0.67, 3]:
for beta in [0.67]: for beta in [0.67]:
for alpha in [0.67]: for alpha in [0.67]:
...@@ -62,33 +68,34 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ...@@ -62,33 +68,34 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
if op_type == "softplus": if op_type == "softplus":
dics = [{"beta": beta}] dics = [{"beta": beta}]
ops_config = [{ ops_config = [
"op_type": op_type, {
"op_inputs": { "op_type": op_type,
"X": ["input_data"] "op_inputs": {"X": ["input_data"]},
}, "op_outputs": {"Out": ["output_data"]},
"op_outputs": { "op_attrs": dics[0],
"Out": ["output_data"] }
}, ]
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config) ops = self.generate_op_config(ops_config)
program_config = ProgramConfig( program_config = ProgramConfig(
ops=ops, ops=ops,
weights={}, weights={},
inputs={ inputs={
"input_data": "input_data": TensorConfig(
TensorConfig(data_gen=partial( data_gen=partial(
generate_input1, dims, batch, dics)) generate_input1, dims, batch, dics
)
)
}, },
outputs=["output_data"]) outputs=["output_data"],
)
yield program_config yield program_config
def sample_predictor_configs( def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float): self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs): def generate_dynamic_shape(attrs):
if self.dims == 1: if self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [1]} self.dynamic_shape.min_input_shape = {"input_data": [1]}
...@@ -131,19 +138,23 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ...@@ -131,19 +138,23 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
clear_dynamic_shape() clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5 attrs, False
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5 attrs, False
), 1e-3
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5 attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5 attrs, True
), 1e-3
def test(self): def test(self):
self.run_test() self.run_test()
......
...@@ -22,60 +22,66 @@ from typing import Optional, List, Callable, Dict, Any, Set ...@@ -22,60 +22,66 @@ from typing import Optional, List, Callable, Dict, Any, Set
class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True return True
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(batch, attrs: List[Dict[str, Any]]): def generate_input1(batch, attrs: List[Dict[str, Any]]):
return np.random.random([batch, 3, 64, 64]).astype(np.float32) return np.random.random([batch, 3, 64, 64]).astype(np.float32)
for batch in [1, 2, 4]: for batch in [1, 2, 4]:
for anchor_sizes in [[64.0, 128.0, 256.0, 512.0]]: for anchor_sizes in [[64.0, 128.0, 256.0, 512.0]]:
for aspect_ratios in [[0.5, 1, 2], [0.4, 1.2, 3]]: for aspect_ratios in [[0.5, 1, 2], [0.4, 1.2, 3]]:
for variances in [[1.0, 1.0, 1.0, 1.0], for variances in [
[0.5, 1.0, 0.5, 1.0]]: [1.0, 1.0, 1.0, 1.0],
[0.5, 1.0, 0.5, 1.0],
]:
for stride in [[16.0, 16.0], [16.0, 32.0]]: for stride in [[16.0, 16.0], [16.0, 32.0]]:
for offset in [0.5, 0.8]: for offset in [0.5, 0.8]:
dics = [{ dics = [
"anchor_sizes": anchor_sizes, {
"aspect_ratios": aspect_ratios, "anchor_sizes": anchor_sizes,
"variances": variances, "aspect_ratios": aspect_ratios,
"stride": stride, "variances": variances,
"offset": offset "stride": stride,
}] "offset": offset,
}
ops_config = [{ ]
"op_type": "anchor_generator",
"op_inputs": { ops_config = [
"Input": ["input_data"] {
}, "op_type": "anchor_generator",
"op_outputs": { "op_inputs": {"Input": ["input_data"]},
"Anchors": ["output_anchors"], "op_outputs": {
"Variances": ["output_variances"] "Anchors": ["output_anchors"],
}, "Variances": ["output_variances"],
"op_attrs": dics[0] },
}] "op_attrs": dics[0],
}
]
ops = self.generate_op_config(ops_config) ops = self.generate_op_config(ops_config)
program_config = ProgramConfig( program_config = ProgramConfig(
ops=ops, ops=ops,
weights={}, weights={},
inputs={ inputs={
"input_data": "input_data": TensorConfig(
TensorConfig(data_gen=partial( data_gen=partial(
generate_input1, batch, dics)) generate_input1, batch, dics
)
)
}, },
outputs=[ outputs=[
"output_anchors", "output_variances" "output_anchors",
]) "output_variances",
],
)
yield program_config yield program_config
def sample_predictor_configs( def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float): self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs): def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
...@@ -100,19 +106,23 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): ...@@ -100,19 +106,23 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest):
clear_dynamic_shape() clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5 attrs, False
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5 attrs, False
), 1e-3
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5 attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5 attrs, True
), 1e-3
def test(self): def test(self):
self.run_test() self.run_test()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment