# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import sys import codecs import warnings import argparse LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(LOCAL_PATH, '..', '..')) import yaml import functools import numpy as np from paddle.inference import create_predictor, PrecisionType from paddle.inference import Config as PredictConfig import paddle import medicalseg.transforms as T from medicalseg.cvlibs import manager from medicalseg.utils import get_sys_env, logger, get_image_list from medicalseg.utils.visualize import get_pseudo_color_map from medicalseg.core.infer import sliding_window_inference from tools import HUnorm, resample from tools import Prep def parse_args(): parser = argparse.ArgumentParser(description='Test') parser.add_argument( "--config", dest="cfg", help="The config file.", default=None, type=str, required=True) parser.add_argument( '--image_path', dest='image_path', help='The directory or path or file list of the images to be predicted.', type=str, default=None, required=True) parser.add_argument( '--batch_size', dest='batch_size', help='Mini batch size of one gpu or cpu.', type=int, default=1) parser.add_argument( '--save_dir', dest='save_dir', help='The directory for saving the predict result.', type=str, default='./output') parser.add_argument( '--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to inference, defaults to gpu.") parser.add_argument( '--use_trt', default=False, type=eval, choices=[True, False], help='Whether to use Nvidia TensorRT to accelerate prediction.') parser.add_argument( "--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], help='The tensorrt precision.') parser.add_argument( '--enable_auto_tune', default=False, type=eval, choices=[True, False], help='Whether to enable tuned dynamic shape. We uses some images to collect ' 'the dynamic shape for trt sub graph, which avoids setting dynamic shape manually.' ) parser.add_argument( '--auto_tuned_shape_file', type=str, default="auto_tune_tmp.pbtxt", help='The temp file to save tuned dynamic shape.') parser.add_argument( '--cpu_threads', default=10, type=int, help='Number of threads to predict when using cpu.') parser.add_argument( '--enable_mkldnn', default=False, type=eval, choices=[True, False], help='Enable to use mkldnn to speed up when using cpu.') parser.add_argument( "--benchmark", type=eval, default=False, help="Whether to log some information about environment, model, configuration and performance." ) parser.add_argument( "--model_name", default="", type=str, help='When `--benchmark` is True, the specified model name is displayed.' ) parser.add_argument( '--with_argmax', dest='with_argmax', help='Perform argmax operation on the predict result.', action='store_true') parser.add_argument( '--print_detail', default=True, type=eval, choices=[True, False], help='Print GLOG information of Paddle Inference.') parser.add_argument( '--use_swl', default=False, type=eval, help='use sliding_window_inference') parser.add_argument('--use_warmup', default=True, type=eval, help='warmup') parser.add_argument( '--img_shape', default=[128], nargs='+', help='"A single value or three values to specify the size in each dimension."' ) parser.add_argument('--is_nhwd', default=True, type=eval, help='is_nhwd') return parser.parse_args() def use_auto_tune(args): return hasattr(PredictConfig, "collect_shape_range_info") \ and hasattr(PredictConfig, "enable_tuned_tensorrt_dynamic_shape") \ and args.device == "gpu" and args.use_trt and args.enable_auto_tune class DeployConfig: def __init__(self, path): with codecs.open(path, 'r', 'utf-8') as file: self.dic = yaml.load(file, Loader=yaml.FullLoader) self._transforms = self.load_transforms(self.dic['Deploy'][ 'transforms']) if self.dic['Deploy']['inference_helper'] is not None: self._inference_helper = self.load_inference_helper(self.dic[ 'Deploy']['inference_helper']) else: self._inference_helper = None self._dir = os.path.dirname(path) @property def transforms(self): return self._transforms @property def inference_helper(self): return self._inference_helper @property def model(self): return os.path.join(self._dir, self.dic['Deploy']['model']) @property def params(self): return os.path.join(self._dir, self.dic['Deploy']['params']) @staticmethod def load_transforms(t_list): com = manager.TRANSFORMS transforms = [] for t in t_list: ctype = t.pop('type', None) if ctype is not None: transforms.append(com[ctype](**t)) return T.Compose(transforms) @staticmethod def load_inference_helper(t): com = manager.INFERENCE_HELPERS inference_helper = None ctype = t.pop('type', None) if ctype is not None: inference_helper = com[ctype](**t) return inference_helper def auto_tune(args, imgs, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. imgs(str, list[str]): the path for images. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) if not isinstance(imgs, (list, tuple)): imgs = [imgs] num = min(len(imgs), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for i in range(0, num): data = np.array([cfg.transforms(imgs[i])[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return logger.info("Auto tune success.\n") class ModelLikeInfer: def __init__(self, input_handle, output_handle, predictor): self.input_handle = input_handle self.output_handle = output_handle self.predictor = predictor def infer_likemodel(self, input_handle, output_handle, predictor, data): input_handle.reshape(data.shape) input_handle.copy_from_cpu(data.numpy()) predictor.run() return paddle.to_tensor(output_handle.copy_to_cpu()) def infer_model(self, data): return (self.infer_likemodel(self.input_handle, self.output_handle, self.predictor, data), ) class Predictor: def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() if args.device == 'cpu': self._init_cpu_config() else: self._init_gpu_config() self.predictor = create_predictor(self.pred_cfg) if hasattr(args, 'benchmark') and args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger( model_name=args.model_name, model_precision=args.precision, batch_size=args.batch_size, data_shape="dynamic", save_path=None, inference_config=self.pred_cfg, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger) def _init_base_config(self): "初始化基础配置" self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True) def _init_cpu_config(self): """ Init the config for x86 cpu. """ logger.info("Use CPU") self.pred_cfg.disable_gpu() if self.args.enable_mkldnn: logger.info("Use MKLDNN") # cache 10 different shapes for mkldnn self.pred_cfg.set_mkldnn_cache_capacity(10) self.pred_cfg.enable_mkldnn() self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads) def _init_gpu_config(self): """ Init the config for nvidia gpu. """ logger.info("Use GPU") self.pred_cfg.enable_use_gpu(100, 0) precision_map = { "fp16": PrecisionType.Half, "fp32": PrecisionType.Float32, "int8": PrecisionType.Int8 } precision_mode = precision_map[self.args.precision] if self.args.use_trt: logger.info("Use TRT") self.pred_cfg.enable_tensorrt_engine( workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=300, precision_mode=precision_mode, use_static=False, use_calib_mode=False) if use_auto_tune(self.args) and \ os.path.exists(self.args.auto_tuned_shape_file): logger.info("Use auto tuned dynamic shape") allow_build_at_runtime = True self.pred_cfg.enable_tuned_tensorrt_dynamic_shape( self.args.auto_tuned_shape_file, allow_build_at_runtime) else: logger.info("Use manual set dynamic shape") min_input_shape = {"x": [1, 3, 100, 100]} max_input_shape = {"x": [1, 3, 2000, 3000]} opt_input_shape = {"x": [1, 3, 512, 1024]} self.pred_cfg.set_trt_dynamic_shape_info( min_input_shape, max_input_shape, opt_input_shape) def run(self, imgs_path): if not isinstance(imgs_path, (list, tuple)): imgs_path = [imgs_path] input_names = self.predictor.get_input_names() input_handle = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() output_handle = self.predictor.get_output_handle(output_names[0]) results = [] args = self.args if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) for i in range(0, len(imgs_path), args.batch_size): if args.use_warmup: # warm up if i == 0 and args.benchmark: for j in range(5): if self.cfg.inference_helper is not None: data = self.cfg.inference_helper.preprocess( self.cfg, imgs_path, args.batch_size, 0) else: data = np.array([ self._preprocess(img) # load from original for img in imgs_path[0:args.batch_size] ]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) self.predictor.run() results = output_handle.copy_to_cpu() results = self._postprocess(results) # inference if args.benchmark: self.autolog.times.start() if self.cfg.inference_helper is not None: data = self.cfg.inference_helper.preprocess(self.cfg, imgs_path, args.batch_size, i) else: data = np.array([ self._preprocess(p) for p in imgs_path[i:i + args.batch_size] ]) if args.benchmark: self.autolog.times.stamp() if args.use_swl: infer_like_model = ModelLikeInfer(input_handle, output_handle, self.predictor) data = paddle.to_tensor(data) if args.is_nhwd: data = paddle.squeeze(data, axis=1) if len(args.img_shape) == 1: results = sliding_window_inference( data, (int(args.img_shape[0]), int(args.img_shape[0]), int(args.img_shape[0])), 1, infer_like_model.infer_model) else: results = sliding_window_inference( data, (int(args.img_shape[0]), int(args.img_shape[1]), int(args.img_shape[2])), 1, infer_like_model.infer_model, "NCDHW") results = results[0] else: input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) self.predictor.run() results = output_handle.copy_to_cpu() if args.benchmark: self.autolog.times.stamp() if self.cfg.inference_helper is not None: results = self.cfg.inference_helper.postprocess(results) else: results = self._postprocess(results) if args.benchmark: self.autolog.times.end(stamp=True) self._save_npy(results, imgs_path[i:i + args.batch_size]) logger.info("Finish") def _preprocess(self, img): """load img and transform it Args: Img(str): A batch of image path """ if not "npy" in img: image_files = get_image_list(img, None, None) warnings.warn( "The image path is {}, please make sure this is the images you want to infer". format(image_files)) savepath = os.path.dirname(img) pre = [ HUnorm, functools.partial( resample, # TODO: config preprocess in deply.yaml(export) to set params new_shape=[128, 128, 128], order=1) ] for f in image_files: f_nps = Prep.load_medical_data(f) for f_np in f_nps: if pre is not None: for op in pre: f_np = op(f_np) # Set image to a uniform format before save. if isinstance(f_np, tuple): f_np = f_np[0] f_np = f_np.astype("float32") np.save( os.path.join( savepath, f.split("/")[-1].split( ".", maxsplit=1)[0]), f_np) img = img.split(".", maxsplit=1)[0] + ".npy" return self.cfg.transforms(img)[0] def _postprocess(self, results): "results is numpy array, optionally postprocess with argmax" if self.args.with_argmax: results = np.argmax(results, axis=1) return results def _save_npy(self, results, imgs_path): for i in range(results.shape[0]): basename = os.path.basename(imgs_path[i]) basename, _ = os.path.splitext(basename) basename = f'{basename}.npy' np.save(os.path.join(self.args.save_dir, basename), results) def main(args): imgs_list = get_image_list( args.image_path) # get image list from image path # support autotune to collect dynamic shape, works only with trt on. if use_auto_tune(args): tune_img_nums = 10 auto_tune(args, imgs_list, tune_img_nums) # infer with paddle inference. predictor = Predictor(args) predictor.run(imgs_list) if use_auto_tune(args) and \ os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) # test the speed. if args.benchmark: predictor.autolog.report() if __name__ == '__main__': args = parse_args() main(args)