Update tensorrt

3f2a16d7 · Weiyun1025 · zhe chen · d4dc2427 · 3f2a16d7 · 3f2a16d7
Commit 3f2a16d7 authored Mar 27, 2023 by Weiyun1025 Committed by zhe chen Mar 27, 2023
20 changed files
--- a/README.md
+++ b/README.md
@@ -166,72 +166,96 @@
 - 3D感知: [BEVFormer](https://github.com/fundamentalvision/BEVFormer)

 ## 开源视觉预训练模型
-|      name      | pretrain   | pre-training  resolution |  #param |       download      |
-| :------------: | :--------: | :--------: | :-----: | :-----------------: |
-| InternImage-L  | ImageNet-22K |  384x384   |  223M   |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth)            |
-| InternImage-XL | ImageNet-22K |  384x384   |  335M   |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth)            |
-| InternImage-H | Joint 427M |  384x384   |  1.08B   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth)            |
-| InternImage-G | - |  384x384   |   3B   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_pretrainto22k_384.pth)            | 
+|      name      |   pretrain   | pre-training  resolution | #param |                                               download                                                |
+| :------------: | :----------: | :----------------------: | :----: | :---------------------------------------------------------------------------------------------------: |
+| InternImage-L  | ImageNet-22K |         384x384          |  223M  |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth)    |
+| InternImage-XL | ImageNet-22K |         384x384          |  335M  |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth)   |
+| InternImage-H  |  Joint 427M  |         384x384          | 1.08B  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth)   |
+| InternImage-G  |      -       |         384x384          |   3B   | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_pretrainto22k_384.pth) |



 ## ImageNet-1K图像分类
-|      name      |   pretrain   | resolution | acc@1 | #param | FLOPs |           download       |
-| :------------: | :----------: | :--------: | :---: | :-----: | :---: |  :-----------------: |
-| InternImage-T  | ImageNet-1K  |  224x224   | 83.5  |   30M   |  5G   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml) |
-| InternImage-S  | ImageNet-1K  |  224x224   | 84.2  |   50M   |  8G   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml) |
-| InternImage-B  | ImageNet-1K  |  224x224   | 84.9  |   97M   |  16G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml) |
-| InternImage-L  | ImageNet-22K |  384x384   | 87.7  |  223M   | 108G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml) |
-| InternImage-XL | ImageNet-22K |  384x384   | 88.0  |  335M   | 163G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
-| InternImage-H | Joint 427M |  640x640   | 89.6  |  1.08B   | 1478G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml) |
-| InternImage-G | - | 512x512 | 90.1  |  3B   | 2700G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml) |
+|      name      |   pretrain   | resolution | acc@1 | #param | FLOPs |                                                                             download                                                                              |
+| :------------: | :----------: | :--------: | :---: | :----: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  | ImageNet-1K  |  224x224   | 83.5  |  30M   |  5G   |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml)       |
+| InternImage-S  | ImageNet-1K  |  224x224   | 84.2  |  50M   |  8G   |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml)       |
+| InternImage-B  | ImageNet-1K  |  224x224   | 84.9  |  97M   |  16G  |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml)       |
+| InternImage-L  | ImageNet-22K |  384x384   | 87.7  |  223M  | 108G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml)  |
+| InternImage-XL | ImageNet-22K |  384x384   | 88.0  |  335M  | 163G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
+| InternImage-H  |  Joint 427M  |  640x640   | 89.6  | 1.08B  | 1478G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml)  |
+| InternImage-G  |      -       |  512x512   | 90.1  |   3B   | 2700G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml)  |



 ## COCO目标检测和实例分割

-|    backbone    |       method       | schd | box mAP  | mask mAP  | #param | FLOPs | download | 
-| :------------: | :----------------: | :---------: | :-----: | :------: | :-----: | :---: | :---: | 
-| InternImage-T  |     Mask R-CNN     |     1x      |  47.2   |   42.5   |   49M   | 270G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_1x_coco.py) |
-| InternImage-T  |     Mask R-CNN     |     3x      |  49.1   |   43.7   |   49M   | 270G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_3x_coco.py) |
-| InternImage-S  |     Mask R-CNN     |     1x      |  47.8   |   43.3   |   69M   | 340G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_1x_coco.py) |
-| InternImage-S  |     Mask R-CNN     |     3x      |  49.7   |   44.5   |   69M   | 340G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_3x_coco.py) |
-| InternImage-B  |     Mask R-CNN     |     1x      |  48.8   |   44.0   |  115M   | 501G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_1x_coco.py) |
-| InternImage-B  |     Mask R-CNN     |     3x      |  50.3   |   44.8   |  115M   | 501G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_3x_coco.py) |
-| InternImage-L  |     Cascade        |     1x      |  54.9   |   47.7   |  277M   | 1399G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_1x_coco.py) |
-| InternImage-L  |     Cascade        |     3x      |  56.1   |   48.5   |  277M   | 1399G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_3x_coco.py) |
-| InternImage-XL |     Cascade        |     1x      |  55.3   |   48.1   |  387M   | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_1x_coco.py) |
-| InternImage-XL |     Cascade        |     3x      |  56.2   |   48.8   |  387M   | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_3x_coco.py) |
-
-|    backbone    |       method       |  box mAP (val/test) |  #param  | FLOPs | download |
-| :------------: | :----------------: |     :---------:     | :------: | :-----: | :-----: |
-| InternImage-H  |     DINO (TTA)     |      65.0 / 65.4     |   2.18B  | TODO | TODO |
-| InternImage-G  |     DINO (TTA)     |      65.3 / 65.5     |    3B    | TODO | TODO |
+|    backbone    |   method   | schd  | box mAP | mask mAP | #param | FLOPs |                                                                                     download                                                                                      |
+| :------------: | :--------: | :---: | :-----: | :------: | :----: | :---: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  | Mask R-CNN |  1x   |  47.2   |   42.5   |  49M   | 270G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_1x_coco.py) |
+| InternImage-T  | Mask R-CNN |  3x   |  49.1   |   43.7   |  49M   | 270G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_3x_coco.py) |
+| InternImage-S  | Mask R-CNN |  1x   |  47.8   |   43.3   |  69M   | 340G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_1x_coco.py) |
+| InternImage-S  | Mask R-CNN |  3x   |  49.7   |   44.5   |  69M   | 340G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_3x_coco.py) |
+| InternImage-B  | Mask R-CNN |  1x   |  48.8   |   44.0   |  115M  | 501G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_1x_coco.py) |
+| InternImage-B  | Mask R-CNN |  3x   |  50.3   |   44.8   |  115M  | 501G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_3x_coco.py) |
+| InternImage-L  |  Cascade   |  1x   |  54.9   |   47.7   |  277M  | 1399G |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_1x_coco.py)   |
+| InternImage-L  |  Cascade   |  3x   |  56.1   |   48.5   |  277M  | 1399G |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_3x_coco.py)   |
+| InternImage-XL |  Cascade   |  1x   |  55.3   |   48.1   |  387M  | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_1x_coco.py)  |
+| InternImage-XL |  Cascade   |  3x   |  56.2   |   48.8   |  387M  | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_3x_coco.py)  |
+
+|   backbone    |   method   | box mAP (val/test) | #param | FLOPs | download |
+| :-----------: | :--------: | :----------------: | :----: | :---: | :------: |
+| InternImage-H | DINO (TTA) |    65.0 / 65.4     | 2.18B  | TODO  |   TODO   |
+| InternImage-G | DINO (TTA) |    65.3 / 65.5     |   3B   | TODO  |   TODO   |

 ## ADE20K语义分割

-|    backbone    | method     |   resolution | mIoU (ss/ms) | #param | FLOPs | download |
-| :------------: | :--------: | :--------: | :----------: | :-----: | :---: |   :---:  |
-| InternImage-T  |  UperNet   |   512x512   |     47.9 / 48.1     |   59M   | 944G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py) |
-| InternImage-S  |  UperNet   |  512x512   |     50.1 / 50.9     |   80M   | 1017G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py) |
-| InternImage-B  |  UperNet   |  512x512   |     50.8 / 51.3     |  128M   | 1185G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_b_512_160k_ade20k.py) |
-| InternImage-L  |  UperNet   |  640x640   |     53.9 / 54.1     |  256M   | 2526G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py) |
-| InternImage-XL |  UperNet   |  640x640   |     55.0 / 55.3     |  368M   | 3142G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py) |
-| InternImage-H |  UperNet   |  896x896   |     59.9 / 60.3     |  1.12B   | 3566G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py) |
-| InternImage-H |  Mask2Former   |  896x896   |     62.5 / 62.9     |  1.31B   | 4635G | TODO | 
+|    backbone    |   method    | resolution | mIoU (ss/ms) | #param | FLOPs |                                                                                           download                                                                                           |
+| :------------: | :---------: | :--------: | :----------: | :----: | :---: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  |   UperNet   |  512x512   | 47.9 / 48.1  |  59M   | 944G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py)  |
+| InternImage-S  |   UperNet   |  512x512   | 50.1 / 50.9  |  80M   | 1017G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py)  |
+| InternImage-B  |   UperNet   |  512x512   | 50.8 / 51.3  |  128M  | 1185G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_b_512_160k_ade20k.py)  |
+| InternImage-L  |   UperNet   |  640x640   | 53.9 / 54.1  |  256M  | 2526G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py)  |
+| InternImage-XL |   UperNet   |  640x640   | 55.0 / 55.3  |  368M  | 3142G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py) |
+| InternImage-H  |   UperNet   |  896x896   | 59.9 / 60.3  | 1.12B  | 3566G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py)  |
+| InternImage-H  | Mask2Former |  896x896   | 62.5 / 62.9  | 1.31B  | 4635G |                                                                                             TODO                                                                                             |


 ## 模型推理速度

-[[TensorRT]](classification/export.py)
+[export classification model from pytorch to tensorrt](classification/README.md#export)
+
+[export detection model from pytorch to tensorrt](detection/README.md#export)
+
+[export segmentation model from pytorch to tensorrt](segmentation/README.md#export)

 |      name      | resolution | #param | FLOPs | batch 1 FPS (TensorRT) |
-| :------------: | :--------: | :-----: | :---: | :-------------------: |
-| InternImage-T  |  224x224   |   30M   |  5G   |          156          |
-| InternImage-S  |  224x224   |   50M   |  8G   |          129          |
-| InternImage-B  |  224x224   |   97M   |  16G  |          116          |
-| InternImage-L  |  384x384   |  223M   | 108G  |          56           |
-| InternImage-XL |  384x384   |  335M   | 163G  |          47           |
+| :------------: | :--------: | :----: | :---: | :--------------------: |
+| InternImage-T  |  224x224   |  30M   |  5G   |          156           |
+| InternImage-S  |  224x224   |  50M   |  8G   |          129           |
+| InternImage-B  |  224x224   |  97M   |  16G  |          116           |
+| InternImage-L  |  384x384   |  223M  | 108G  |           56           |
+| InternImage-XL |  384x384   |  335M  | 163G  |           47           |
+
+在使用`mmdeploy`将PyTorch模型转为TensorRT之前，请确保您已正确编译DCNv3的自定义算子，其安装方式如下：
+```shell
+export MMDEPLOY_DIR=/the/root/path/of/MMDeploy
+
+# prepare our custom ops, you can find it at InternImage/tensorrt/modulated_deform_conv_v3
+cp -r modulated_deform_conv_v3 ${MMDEPLOY_DIR}/csrc/mmdeploy/backend_ops/tensorrt
+
+# build custom ops
+cd ${MMDEPLOY_DIR}
+mkdir -p build && cd build
+cmake -DCMAKE_CXX_COMPILER=g++-7 -DMMDEPLOY_TARGET_BACKENDS=trt -DTENSORRT_DIR=${TENSORRT_DIR} -DCUDNN_DIR=${CUDNN_DIR} ..
+make -j$(nproc) && make install
+
+# install the mmdeploy after building custom ops
+cd ${MMDEPLOY_DIR}
+pip install -e .
+```
+关于`mmdeploy`编译自定义算子的更多细节，请参考这份[文档](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/01-how-to-build/linux-x86_64.md)。
+


 ## 引用

--- a/README_EN.md
+++ b/README_EN.md
@@ -170,72 +170,96 @@ InternImage, the visual backbone network of "INTERN-2.5", has a parameter size o


 ## Open-source Visual Pretrained Models
-|      name      | pretrain     | pre-training  resolution |  #param |       download      |
-| :------------: | :--------:   | :--------: | :-----: | :-----------------: |
-| InternImage-L  | ImageNet-22K |  384x384   |  223M   |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth)            |
-| InternImage-XL | ImageNet-22K |  384x384   |  335M   |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth)            |
-| InternImage-H  | Joint 427M   |  384x384   |  1.08B  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth)            |
-| InternImage-G  | -            |  384x384   |   3B    |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_pretrainto22k_384.pth)            | 
+|      name      |   pretrain   | pre-training  resolution | #param |                                               download                                                |
+| :------------: | :----------: | :----------------------: | :----: | :---------------------------------------------------------------------------------------------------: |
+| InternImage-L  | ImageNet-22K |         384x384          |  223M  |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth)    |
+| InternImage-XL | ImageNet-22K |         384x384          |  335M  |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth)   |
+| InternImage-H  |  Joint 427M  |         384x384          | 1.08B  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth)   |
+| InternImage-G  |      -       |         384x384          |   3B   | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_pretrainto22k_384.pth) |



 ## ImageNet-1K Image Classification
-|      name      |   pretrain   | resolution | acc@1 | #param | FLOPs |           download       |
-| :------------: | :----------: | :--------: | :---: | :-----: | :---: |  :-----------------: |
-| InternImage-T  | ImageNet-1K  |  224x224   | 83.5  |   30M   |  5G   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml) |
-| InternImage-S  | ImageNet-1K  |  224x224   | 84.2  |   50M   |  8G   |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml) |
-| InternImage-B  | ImageNet-1K  |  224x224   | 84.9  |   97M   |  16G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml) |
-| InternImage-L  | ImageNet-22K |  384x384   | 87.7  |  223M   | 108G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml) |
-| InternImage-XL | ImageNet-22K |  384x384   | 88.0  |  335M   | 163G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
-| InternImage-H | Joint 427M |  640x640   | 89.6  |  1.08B   | 1478G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml) |
-| InternImage-G | - | 512x512 | 90.1  |  3B   | 2700G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml) |
+|      name      |   pretrain   | resolution | acc@1 | #param | FLOPs |                                                                             download                                                                              |
+| :------------: | :----------: | :--------: | :---: | :----: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  | ImageNet-1K  |  224x224   | 83.5  |  30M   |  5G   |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml)       |
+| InternImage-S  | ImageNet-1K  |  224x224   | 84.2  |  50M   |  8G   |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml)       |
+| InternImage-B  | ImageNet-1K  |  224x224   | 84.9  |  97M   |  16G  |       [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml)       |
+| InternImage-L  | ImageNet-22K |  384x384   | 87.7  |  223M  | 108G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml)  |
+| InternImage-XL | ImageNet-22K |  384x384   | 88.0  |  335M  | 163G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
+| InternImage-H  |  Joint 427M  |  640x640   | 89.6  | 1.08B  | 1478G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml)  |
+| InternImage-G  |      -       |  512x512   | 90.1  |   3B   | 2700G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml)  |


 ## COCO Object Detection and Instance Segmentation

-|    backbone    |       method       | schd | box mAP  | mask mAP  | #param | FLOPs | download | 
-| :------------: | :----------------: | :---------: | :-----: | :------: | :-----: | :---: | :---: | 
-| InternImage-T  |     Mask R-CNN     |     1x      |  47.2   |   42.5   |   49M   | 270G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_1x_coco.py) |
-| InternImage-T  |     Mask R-CNN     |     3x      |  49.1   |   43.7   |   49M   | 270G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_3x_coco.py) |
-| InternImage-S  |     Mask R-CNN     |     1x      |  47.8   |   43.3   |   69M   | 340G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_1x_coco.py) |
-| InternImage-S  |     Mask R-CNN     |     3x      |  49.7   |   44.5   |   69M   | 340G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_3x_coco.py) |
-| InternImage-B  |     Mask R-CNN     |     1x      |  48.8   |   44.0   |  115M   | 501G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_1x_coco.py) |
-| InternImage-B  |     Mask R-CNN     |     3x      |  50.3   |   44.8   |  115M   | 501G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_3x_coco.py) |
-| InternImage-L  |     Cascade        |     1x      |  54.9   |   47.7   |  277M   | 1399G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_1x_coco.py) |
-| InternImage-L  |     Cascade        |     3x      |  56.1   |   48.5   |  277M   | 1399G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_3x_coco.py) |
-| InternImage-XL |     Cascade        |     1x      |  55.3   |   48.1   |  387M   | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_1x_coco.py) |
-| InternImage-XL |     Cascade        |     3x      |  56.2   |   48.8   |  387M   | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_3x_coco.py) |
-
-|    backbone    |       method       |  box mAP (val/test) |  #param  | FLOPs | download | 
-| :------------: | :----------------: |     :---------:     | :------: | :-----: | :---: | 
-| InternImage-H  |     DINO (TTA)     |      65.0 / 65.4     |   2.18B  | TODO |  TODO |
-| InternImage-G  |     DINO (TTA)     |      65.3 / 65.5     |    3B    | TODO |  TODO |
+|    backbone    |   method   | schd  | box mAP | mask mAP | #param | FLOPs |                                                                                     download                                                                                      |
+| :------------: | :--------: | :---: | :-----: | :------: | :----: | :---: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  | Mask R-CNN |  1x   |  47.2   |   42.5   |  49M   | 270G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_1x_coco.py) |
+| InternImage-T  | Mask R-CNN |  3x   |  49.1   |   43.7   |  49M   | 270G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_t_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_t_fpn_3x_coco.py) |
+| InternImage-S  | Mask R-CNN |  1x   |  47.8   |   43.3   |  69M   | 340G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_1x_coco.py) |
+| InternImage-S  | Mask R-CNN |  3x   |  49.7   |   44.5   |  69M   | 340G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_s_fpn_3x_coco.py) |
+| InternImage-B  | Mask R-CNN |  1x   |  48.8   |   44.0   |  115M  | 501G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_1x_coco.py) |
+| InternImage-B  | Mask R-CNN |  3x   |  50.3   |   44.8   |  115M  | 501G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_b_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/mask_rcnn_internimage_b_fpn_3x_coco.py) |
+| InternImage-L  |  Cascade   |  1x   |  54.9   |   47.7   |  277M  | 1399G |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_1x_coco.py)   |
+| InternImage-L  |  Cascade   |  3x   |  56.1   |   48.5   |  277M  | 1399G |   [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_l_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_l_fpn_3x_coco.py)   |
+| InternImage-XL |  Cascade   |  1x   |  55.3   |   48.1   |  387M  | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_1x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_1x_coco.py)  |
+| InternImage-XL |  Cascade   |  3x   |  56.2   |   48.8   |  387M  | 1782G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/cascade_internimage_xl_fpn_3x_coco.pth) \| [cfg](detection/configs/coco/cascade_internimage_xl_fpn_3x_coco.py)  |
+
+|   backbone    |   method   | box mAP (val/test) | #param | FLOPs | download |
+| :-----------: | :--------: | :----------------: | :----: | :---: | :------: |
+| InternImage-H | DINO (TTA) |    65.0 / 65.4     | 2.18B  | TODO  |   TODO   |
+| InternImage-G | DINO (TTA) |    65.3 / 65.5     |   3B   | TODO  |   TODO   |

 ## ADE20K Semantic Segmentation


-|    backbone    | method     |   resolution | mIoU (ss/ms) | #param | FLOPs | download | 
-| :------------: | :--------: | :--------: | :----------: | :-----: | :---: |   :---:  |
-| InternImage-T  |  UperNet   |   512x512   |     47.9 / 48.1     |   59M   | 944G  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py) |
-| InternImage-S  |  UperNet   |  512x512   |     50.1 / 50.9     |   80M   | 1017G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py) |
-| InternImage-B  |  UperNet   |  512x512   |     50.8 / 51.3     |  128M   | 1185G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_b_512_160k_ade20k.py) |
-| InternImage-L  |  UperNet   |  640x640   |     53.9 / 54.1     |  256M   | 2526G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py) |
-| InternImage-XL |  UperNet   |  640x640   |     55.0 / 55.3     |  368M   | 3142G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py) |
-| InternImage-H |  UperNet   |  896x896   |     59.9 / 60.3     |  1.12B   | 3566G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py) |
-| InternImage-H |  Mask2Former   |  896x896   |     62.5 / 62.9     |  1.31B   | 4635G | TODO |
+|    backbone    |   method    | resolution | mIoU (ss/ms) | #param | FLOPs |                                                                                           download                                                                                           |
+| :------------: | :---------: | :--------: | :----------: | :----: | :---: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| InternImage-T  |   UperNet   |  512x512   | 47.9 / 48.1  |  59M   | 944G  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py)  |
+| InternImage-S  |   UperNet   |  512x512   | 50.1 / 50.9  |  80M   | 1017G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py)  |
+| InternImage-B  |   UperNet   |  512x512   | 50.8 / 51.3  |  128M  | 1185G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_b_512_160k_ade20k.py)  |
+| InternImage-L  |   UperNet   |  640x640   | 53.9 / 54.1  |  256M  | 2526G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py)  |
+| InternImage-XL |   UperNet   |  640x640   | 55.0 / 55.3  |  368M  | 3142G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py) |
+| InternImage-H  |   UperNet   |  896x896   | 59.9 / 60.3  | 1.12B  | 3566G |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [cfg](segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py)  |
+| InternImage-H  | Mask2Former |  896x896   | 62.5 / 62.9  | 1.31B  | 4635G |                                                                                             TODO                                                                                             |


 ## Main Results of FPS 

-[[TensorRT]](classification/export.py)
+[export classification model from pytorch to tensorrt](classification/README.md#export)
+
+[export detection model from pytorch to tensorrt](detection/README.md#export)
+
+[export segmentation model from pytorch to tensorrt](segmentation/README.md#export)

 |      name      | resolution | #param | FLOPs | batch 1 FPS (TensorRT) |
-| :------------: | :--------: | :-----: | :---: | :-------------------: |
-| InternImage-T  |  224x224   |   30M   |  5G   |          156          |
-| InternImage-S  |  224x224   |   50M   |  8G   |          129          |
-| InternImage-B  |  224x224   |   97M   |  16G  |          116          |
-| InternImage-L  |  384x384   |  223M   | 108G  |          56           |
-| InternImage-XL |  384x384   |  335M   | 163G  |          47           |
+| :------------: | :--------: | :----: | :---: | :--------------------: |
+| InternImage-T  |  224x224   |  30M   |  5G   |          156           |
+| InternImage-S  |  224x224   |  50M   |  8G   |          129           |
+| InternImage-B  |  224x224   |  97M   |  16G  |          116           |
+| InternImage-L  |  384x384   |  223M  | 108G  |           56           |
+| InternImage-XL |  384x384   |  335M  | 163G  |           47           |
+
+Before using `mmdeploy` to convert our PyTorch models to TensorRT, please make sure you have the DCNv3 custom operator builded correctly. You can build it with the following command:
+```shell
+export MMDEPLOY_DIR=/the/root/path/of/MMDeploy
+
+# prepare our custom ops, you can find it at InternImage/tensorrt/modulated_deform_conv_v3
+cp -r modulated_deform_conv_v3 ${MMDEPLOY_DIR}/csrc/mmdeploy/backend_ops/tensorrt
+
+# build custom ops
+cd ${MMDEPLOY_DIR}
+mkdir -p build && cd build
+cmake -DCMAKE_CXX_COMPILER=g++-7 -DMMDEPLOY_TARGET_BACKENDS=trt -DTENSORRT_DIR=${TENSORRT_DIR} -DCUDNN_DIR=${CUDNN_DIR} ..
+make -j$(nproc) && make install
+
+# install the mmdeploy after building custom ops
+cd ${MMDEPLOY_DIR}
+pip install -e .
+```
+For more details on building custom ops, please refering to [this document](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/01-how-to-build/linux-x86_64.md).
+


 ## Citation

--- a/detection/README.md
+++ b/detection/README.md
@@ -100,3 +100,35 @@ For example, to train `InternImage-L` with 32 GPU on 4 node, run:
 ```bash
 GPUS=32 sh slurm_train.sh <partition> <job-name> configs/coco/cascade_internimage_xl_fpn_3x_coco.py work_dirs/cascade_internimage_xl_fpn_3x_coco
 ```
+
+### Export
+
+To export a detection model from PyTorch to TensorRT, run:
+```shell
+MODEL="model_name"
+CKPT_PATH="/path/to/model/ckpt.pth"
+
+python deploy.py \
+    "./deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py" \
+    "./configs/coco/${MODEL}.py" \
+    "${CKPT_PATH}" \
+    "./deploy/demo.jpg" \
+    --work-dir "./work_dirs/mmdet/instance-seg/${MODEL}" \
+    --device cuda \
+    --dump-info
+```
+
+For example, to export `mask_rcnn_internimage_t_fpn_1x_coco` from PyTorch to TensorRT, run:
+```shell
+MODEL="mask_rcnn_internimage_t_fpn_1x_coco"
+CKPT_PATH="/path/to/model/ckpt/mask_rcnn_internimage_t_fpn_1x_coco.pth"
+
+python deploy.py \
+    "./deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py" \
+    "./configs/coco/${MODEL}.py" \
+    "${CKPT_PATH}" \
+    "./deploy/demo.jpg" \
+    --work-dir "./work_dirs/mmdet/instance-seg/${MODEL}" \
+    --device cuda \
+    --dump-info
+```
--- a/detection/deploy.py
+++ b/detection/deploy.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import logging
+import os
+import os.path as osp
+from functools import partial
+
+import mmcv
+import torch.multiprocessing as mp
+from torch.multiprocessing import Process, set_start_method
+
+from mmdeploy.apis import (create_calib_input_data, extract_model,
+                           get_predefined_partition_cfg, torch2onnx,
+                           torch2torchscript, visualize_model)
+from mmdeploy.apis.core import PIPELINE_MANAGER
+from mmdeploy.apis.utils import to_backend
+from mmdeploy.backend.sdk.export_info import export2SDK
+from mmdeploy.utils import (IR, Backend, get_backend, get_calib_filename,
+                            get_ir_config, get_partition_config,
+                            get_root_logger, load_config, target_wrapper)
+
+import mmcv_custom
+import mmdet_custom
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Export model to backends.')
+    parser.add_argument('deploy_cfg', help='deploy config path')
+    parser.add_argument('model_cfg', help='model config path')
+    parser.add_argument('checkpoint', help='model checkpoint path')
+    parser.add_argument('img', help='image used to convert model model')
+    parser.add_argument(
+        '--test-img',
+        default=None,
+        type=str,
+        nargs='+',
+        help='image used to test model')
+    parser.add_argument(
+        '--work-dir',
+        default=os.getcwd(),
+        help='the dir to save logs and models')
+    parser.add_argument(
+        '--calib-dataset-cfg',
+        help=('dataset config path used to calibrate in int8 mode. If not '
+              'specified, it will use "val" dataset in model config instead.'),
+        default=None)
+    parser.add_argument(
+        '--device', help='device used for conversion', default='cpu')
+    parser.add_argument(
+        '--log-level',
+        help='set log level',
+        default='INFO',
+        choices=list(logging._nameToLevel.keys()))
+    parser.add_argument(
+        '--show', action='store_true', help='Show detection outputs')
+    parser.add_argument(
+        '--dump-info', action='store_true', help='Output information for SDK')
+    parser.add_argument(
+        '--quant-image-dir',
+        default=None,
+        help='Image directory for quantize model.')
+    parser.add_argument(
+        '--quant', action='store_true', help='Quantize model to low bit.')
+    parser.add_argument(
+        '--uri',
+        default='192.168.1.1:60000',
+        help='Remote ipv4:port or ipv6:port for inference on edge device.')
+    args = parser.parse_args()
+    return args
+
+
+def create_process(name, target, args, kwargs, ret_value=None):
+    logger = get_root_logger()
+    logger.info(f'{name} start.')
+    log_level = logger.level
+
+    wrap_func = partial(target_wrapper, target, log_level, ret_value)
+
+    process = Process(target=wrap_func, args=args, kwargs=kwargs)
+    process.start()
+    process.join()
+
+    if ret_value is not None:
+        if ret_value.value != 0:
+            logger.error(f'{name} failed.')
+            exit(1)
+        else:
+            logger.info(f'{name} success.')
+
+
+def torch2ir(ir_type: IR):
+    """Return the conversion function from torch to the intermediate
+    representation.
+
+    Args:
+        ir_type (IR): The type of the intermediate representation.
+    """
+    if ir_type == IR.ONNX:
+        return torch2onnx
+    elif ir_type == IR.TORCHSCRIPT:
+        return torch2torchscript
+    else:
+        raise KeyError(f'Unexpected IR type {ir_type}')
+
+
+def main():
+    args = parse_args()
+    set_start_method('spawn', force=True)
+    logger = get_root_logger()
+    log_level = logging.getLevelName(args.log_level)
+    logger.setLevel(log_level)
+
+    pipeline_funcs = [
+        torch2onnx, torch2torchscript, extract_model, create_calib_input_data
+    ]
+    PIPELINE_MANAGER.enable_multiprocess(True, pipeline_funcs)
+    PIPELINE_MANAGER.set_log_level(log_level, pipeline_funcs)
+
+    deploy_cfg_path = args.deploy_cfg
+    model_cfg_path = args.model_cfg
+    checkpoint_path = args.checkpoint
+    quant = args.quant
+    quant_image_dir = args.quant_image_dir
+
+    # load deploy_cfg
+    deploy_cfg, model_cfg = load_config(deploy_cfg_path, model_cfg_path)
+
+    # create work_dir if not
+    mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
+
+    if args.dump_info:
+        export2SDK(
+            deploy_cfg,
+            model_cfg,
+            args.work_dir,
+            pth=checkpoint_path,
+            device=args.device)
+
+    ret_value = mp.Value('d', 0, lock=False)
+
+    # convert to IR
+    ir_config = get_ir_config(deploy_cfg)
+    ir_save_file = ir_config['save_file']
+    ir_type = IR.get(ir_config['type'])
+    torch2ir(ir_type)(
+        args.img,
+        args.work_dir,
+        ir_save_file,
+        deploy_cfg_path,
+        model_cfg_path,
+        checkpoint_path,
+        device=args.device)
+
+    # convert backend
+    ir_files = [osp.join(args.work_dir, ir_save_file)]
+
+    # partition model
+    partition_cfgs = get_partition_config(deploy_cfg)
+
+    if partition_cfgs is not None:
+
+        if 'partition_cfg' in partition_cfgs:
+            partition_cfgs = partition_cfgs.get('partition_cfg', None)
+        else:
+            assert 'type' in partition_cfgs
+            partition_cfgs = get_predefined_partition_cfg(
+                deploy_cfg, partition_cfgs['type'])
+
+        origin_ir_file = ir_files[0]
+        ir_files = []
+        for partition_cfg in partition_cfgs:
+            save_file = partition_cfg['save_file']
+            save_path = osp.join(args.work_dir, save_file)
+            start = partition_cfg['start']
+            end = partition_cfg['end']
+            dynamic_axes = partition_cfg.get('dynamic_axes', None)
+
+            extract_model(
+                origin_ir_file,
+                start,
+                end,
+                dynamic_axes=dynamic_axes,
+                save_file=save_path)
+
+            ir_files.append(save_path)
+
+    # calib data
+    calib_filename = get_calib_filename(deploy_cfg)
+    if calib_filename is not None:
+        calib_path = osp.join(args.work_dir, calib_filename)
+        create_calib_input_data(
+            calib_path,
+            deploy_cfg_path,
+            model_cfg_path,
+            checkpoint_path,
+            dataset_cfg=args.calib_dataset_cfg,
+            dataset_type='val',
+            device=args.device)
+
+    backend_files = ir_files
+    # convert backend
+    backend = get_backend(deploy_cfg)
+
+    # preprocess deploy_cfg
+    if backend == Backend.RKNN:
+        # TODO: Add this to task_processor in the future
+        import tempfile
+
+        from mmdeploy.utils import (get_common_config, get_normalization,
+                                    get_quantization_config,
+                                    get_rknn_quantization)
+        quantization_cfg = get_quantization_config(deploy_cfg)
+        common_params = get_common_config(deploy_cfg)
+        if get_rknn_quantization(deploy_cfg) is True:
+            transform = get_normalization(model_cfg)
+            common_params.update(
+                dict(
+                    mean_values=[transform['mean']],
+                    std_values=[transform['std']]))
+
+        dataset_file = tempfile.NamedTemporaryFile(suffix='.txt').name
+        with open(dataset_file, 'w') as f:
+            f.writelines([osp.abspath(args.img)])
+        quantization_cfg.setdefault('dataset', dataset_file)
+    if backend == Backend.ASCEND:
+        # TODO: Add this to backend manager in the future
+        if args.dump_info:
+            from mmdeploy.backend.ascend import update_sdk_pipeline
+            update_sdk_pipeline(args.work_dir)
+
+    # convert to backend
+    PIPELINE_MANAGER.set_log_level(log_level, [to_backend])
+    if backend == Backend.TENSORRT:
+        PIPELINE_MANAGER.enable_multiprocess(True, [to_backend])
+    backend_files = to_backend(
+        backend,
+        ir_files,
+        work_dir=args.work_dir,
+        deploy_cfg=deploy_cfg,
+        log_level=log_level,
+        device=args.device,
+        uri=args.uri)
+
+    # ncnn quantization
+    if backend == Backend.NCNN and quant:
+        from onnx2ncnn_quant_table import get_table
+
+        from mmdeploy.apis.ncnn import get_quant_model_file, ncnn2int8
+        model_param_paths = backend_files[::2]
+        model_bin_paths = backend_files[1::2]
+        backend_files = []
+        for onnx_path, model_param_path, model_bin_path in zip(
+                ir_files, model_param_paths, model_bin_paths):
+
+            deploy_cfg, model_cfg = load_config(deploy_cfg_path,
+                                                model_cfg_path)
+            quant_onnx, quant_table, quant_param, quant_bin = get_quant_model_file(  # noqa: E501
+                onnx_path, args.work_dir)
+
+            create_process(
+                'ncnn quant table',
+                target=get_table,
+                args=(onnx_path, deploy_cfg, model_cfg, quant_onnx,
+                      quant_table, quant_image_dir, args.device),
+                kwargs=dict(),
+                ret_value=ret_value)
+
+            create_process(
+                'ncnn_int8',
+                target=ncnn2int8,
+                args=(model_param_path, model_bin_path, quant_table,
+                      quant_param, quant_bin),
+                kwargs=dict(),
+                ret_value=ret_value)
+            backend_files += [quant_param, quant_bin]
+
+    if args.test_img is None:
+        args.test_img = args.img
+
+    extra = dict(
+        backend=backend,
+        output_file=osp.join(args.work_dir, f'output_{backend.value}.jpg'),
+        show_result=args.show)
+    if backend == Backend.SNPE:
+        extra['uri'] = args.uri
+
+    # get backend inference result, try render
+    create_process(
+        f'visualize {backend.value} model',
+        target=visualize_model,
+        args=(model_cfg_path, deploy_cfg_path, backend_files, args.test_img,
+              args.device),
+        kwargs=extra,
+        ret_value=ret_value)
+
+    # get pytorch model inference result, try visualize if possible
+    create_process(
+        'visualize pytorch model',
+        target=visualize_model,
+        args=(model_cfg_path, deploy_cfg_path, [checkpoint_path],
+              args.test_img, args.device),
+        kwargs=dict(
+            backend=Backend.PYTORCH,
+            output_file=osp.join(args.work_dir, 'output_pytorch.jpg'),
+            show_result=args.show),
+        ret_value=ret_value)
+    logger.info('All process success.')
+
+
+if __name__ == '__main__':
+    main()
--- a/detection/deploy/configs/_base_/backends/tensorrt-fp16.py
+++ b/detection/deploy/configs/_base_/backends/tensorrt-fp16.py
+backend_config = dict(
+    type='tensorrt', common_config=dict(fp16_mode=True, max_workspace_size=0))
--- a/detection/deploy/configs/_base_/backends/tensorrt.py
+++ b/detection/deploy/configs/_base_/backends/tensorrt.py
+backend_config = dict(
+    type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0))
--- a/detection/deploy/configs/_base_/onnx_config.py
+++ b/detection/deploy/configs/_base_/onnx_config.py
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
--- a/detection/deploy/configs/mmdet/_base_/base_dynamic.py
+++ b/detection/deploy/configs/mmdet/_base_/base_dynamic.py
+_base_ = ['./base_static.py']
+onnx_config = dict(
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'dets': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'labels': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+    }, )
--- a/detection/deploy/configs/mmdet/_base_/base_instance-seg_dynamic.py
+++ b/detection/deploy/configs/mmdet/_base_/base_instance-seg_dynamic.py
+_base_ = ['./base_instance-seg_static.py']
+onnx_config = dict(
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'dets': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'labels': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'masks': {
+            0: 'batch',
+            1: 'num_dets',
+            2: 'height',
+            3: 'width'
+        },
+    })
--- a/detection/deploy/configs/mmdet/_base_/base_instance-seg_static.py
+++ b/detection/deploy/configs/mmdet/_base_/base_instance-seg_static.py
+_base_ = ['./base_static.py']
+
+onnx_config = dict(output_names=['dets', 'labels', 'masks'])
+codebase_config = dict(post_processing=dict(export_postprocess_mask=False))
--- a/detection/deploy/configs/mmdet/_base_/base_static.py
+++ b/detection/deploy/configs/mmdet/_base_/base_static.py
+_base_ = ['../../_base_/onnx_config.py']
+
+onnx_config = dict(output_names=['dets', 'labels'], input_shape=None)
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='end2end',
+    post_processing=dict(
+        score_threshold=0.05,
+        confidence_threshold=0.005,  # for YOLOv3
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
--- a/detection/deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py
+++ b/detection/deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py
+_base_ = [
+    '../_base_/base_instance-seg_dynamic.py',
+    '../../_base_/backends/tensorrt.py'
+]
+
+backend_config = dict(
+    common_config=dict(max_workspace_size=1 << 30),
+    model_inputs=[
+        dict(
+            input_shapes=dict(
+                input=dict(
+                    min_shape=[1, 3, 320, 320],
+                    opt_shape=[1, 3, 800, 1344],
+                    max_shape=[1, 3, 1344, 1344])))
+    ])
--- a/detection/deploy/demo.jpg
+++ b/detection/deploy/demo.jpg
--- a/detection/ops_dcnv3/functions/dcnv3_func.py
+++ b/detection/ops_dcnv3/functions/dcnv3_func.py
@@ -60,6 +60,33 @@ class DCNv3Function(Function):
        return grad_input, grad_offset, grad_mask, \
            None, None, None, None, None, None, None, None, None, None, None, None

+    @staticmethod
+    def symbolic(g, input, offset, mask, kernel_h, kernel_w, stride_h,
+                 stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
+                 group_channels, offset_scale, im2col_step):
+        """Symbolic function for mmdeploy::DCNv3.
+
+        Returns:
+            DCNv3 op for onnx.
+        """
+        return g.op(
+            'mmdeploy::TRTDCNv3',
+            input,
+            offset,
+            mask,
+            kernel_h_i=int(kernel_h),
+            kernel_w_i=int(kernel_w),
+            stride_h_i=int(stride_h),
+            stride_w_i=int(stride_w),
+            pad_h_i=int(pad_h),
+            pad_w_i=int(pad_w),
+            dilation_h_i=int(dilation_h),
+            dilation_w_i=int(dilation_w),
+            group_i=int(group),
+            group_channels_i=int(group_channels),
+            offset_scale_f=float(offset_scale),
+            im2col_step_i=int(im2col_step),
+        )

 def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):
    _, H_, W_, _ = spatial_shapes

--- a/segmentation/README.md
+++ b/segmentation/README.md
@@ -109,3 +109,35 @@ CUDA_VISIBLE_DEVICES=0 python image_demo.py \
  checkpoint_dir/seg/upernet_internimage_t_512_160k_ade20k.pth  \
  --palette ade20k 
 ```
+
+### Export
+
+To export a segmentation model from PyTorch to TensorRT, run:
+```shell
+MODEL="model_name"
+CKPT_PATH="/path/to/model/ckpt.pth"
+
+python deploy.py \
+    "./deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py" \
+    "./configs/ade20k/${MODEL}.py" \
+    "${CKPT_PATH}" \
+    "./deploy/demo.png" \
+    --work-dir "./work_dirs/mmseg/${MODEL}" \
+    --device cuda \
+    --dump-info
+```
+
+For example, to export `upernet_internimage_t_512_160k_ade20k` from PyTorch to TensorRT, run:
+```shell
+MODEL="upernet_internimage_t_512_160k_ade20k"
+CKPT_PATH="/path/to/model/ckpt/upernet_internimage_t_512_160k_ade20k.pth"
+
+python deploy.py \
+    "./deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py" \
+    "./configs/ade20k/${MODEL}.py" \
+    "${CKPT_PATH}" \
+    "./deploy/demo.png" \
+    --work-dir "./work_dirs/mmseg/${MODEL}" \
+    --device cuda \
+    --dump-info
+```
--- a/segmentation/deploy.py
+++ b/segmentation/deploy.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import logging
+import os
+import os.path as osp
+from functools import partial
+
+import mmcv
+import torch.multiprocessing as mp
+from torch.multiprocessing import Process, set_start_method
+
+from mmdeploy.apis import (create_calib_input_data, extract_model,
+                           get_predefined_partition_cfg, torch2onnx,
+                           torch2torchscript, visualize_model)
+from mmdeploy.apis.core import PIPELINE_MANAGER
+from mmdeploy.apis.utils import to_backend
+from mmdeploy.backend.sdk.export_info import export2SDK
+from mmdeploy.utils import (IR, Backend, get_backend, get_calib_filename,
+                            get_ir_config, get_partition_config,
+                            get_root_logger, load_config, target_wrapper)
+
+import mmcv_custom
+import mmseg_custom
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Export model to backends.')
+    parser.add_argument('deploy_cfg', help='deploy config path')
+    parser.add_argument('model_cfg', help='model config path')
+    parser.add_argument('checkpoint', help='model checkpoint path')
+    parser.add_argument('img', help='image used to convert model model')
+    parser.add_argument(
+        '--test-img',
+        default=None,
+        type=str,
+        nargs='+',
+        help='image used to test model')
+    parser.add_argument(
+        '--work-dir',
+        default=os.getcwd(),
+        help='the dir to save logs and models')
+    parser.add_argument(
+        '--calib-dataset-cfg',
+        help=('dataset config path used to calibrate in int8 mode. If not '
+              'specified, it will use "val" dataset in model config instead.'),
+        default=None)
+    parser.add_argument(
+        '--device', help='device used for conversion', default='cpu')
+    parser.add_argument(
+        '--log-level',
+        help='set log level',
+        default='INFO',
+        choices=list(logging._nameToLevel.keys()))
+    parser.add_argument(
+        '--show', action='store_true', help='Show detection outputs')
+    parser.add_argument(
+        '--dump-info', action='store_true', help='Output information for SDK')
+    parser.add_argument(
+        '--quant-image-dir',
+        default=None,
+        help='Image directory for quantize model.')
+    parser.add_argument(
+        '--quant', action='store_true', help='Quantize model to low bit.')
+    parser.add_argument(
+        '--uri',
+        default='192.168.1.1:60000',
+        help='Remote ipv4:port or ipv6:port for inference on edge device.')
+    args = parser.parse_args()
+    return args
+
+
+def create_process(name, target, args, kwargs, ret_value=None):
+    logger = get_root_logger()
+    logger.info(f'{name} start.')
+    log_level = logger.level
+
+    wrap_func = partial(target_wrapper, target, log_level, ret_value)
+
+    process = Process(target=wrap_func, args=args, kwargs=kwargs)
+    process.start()
+    process.join()
+
+    if ret_value is not None:
+        if ret_value.value != 0:
+            logger.error(f'{name} failed.')
+            exit(1)
+        else:
+            logger.info(f'{name} success.')
+
+
+def torch2ir(ir_type: IR):
+    """Return the conversion function from torch to the intermediate
+    representation.
+
+    Args:
+        ir_type (IR): The type of the intermediate representation.
+    """
+    if ir_type == IR.ONNX:
+        return torch2onnx
+    elif ir_type == IR.TORCHSCRIPT:
+        return torch2torchscript
+    else:
+        raise KeyError(f'Unexpected IR type {ir_type}')
+
+
+def main():
+    args = parse_args()
+    set_start_method('spawn', force=True)
+    logger = get_root_logger()
+    log_level = logging.getLevelName(args.log_level)
+    logger.setLevel(log_level)
+
+    pipeline_funcs = [
+        torch2onnx, torch2torchscript, extract_model, create_calib_input_data
+    ]
+    PIPELINE_MANAGER.enable_multiprocess(True, pipeline_funcs)
+    PIPELINE_MANAGER.set_log_level(log_level, pipeline_funcs)
+
+    deploy_cfg_path = args.deploy_cfg
+    model_cfg_path = args.model_cfg
+    checkpoint_path = args.checkpoint
+    quant = args.quant
+    quant_image_dir = args.quant_image_dir
+
+    # load deploy_cfg
+    deploy_cfg, model_cfg = load_config(deploy_cfg_path, model_cfg_path)
+
+    # create work_dir if not
+    mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
+
+    if args.dump_info:
+        export2SDK(
+            deploy_cfg,
+            model_cfg,
+            args.work_dir,
+            pth=checkpoint_path,
+            device=args.device)
+
+    ret_value = mp.Value('d', 0, lock=False)
+
+    # convert to IR
+    ir_config = get_ir_config(deploy_cfg)
+    ir_save_file = ir_config['save_file']
+    ir_type = IR.get(ir_config['type'])
+    torch2ir(ir_type)(
+        args.img,
+        args.work_dir,
+        ir_save_file,
+        deploy_cfg_path,
+        model_cfg_path,
+        checkpoint_path,
+        device=args.device)
+
+    # convert backend
+    ir_files = [osp.join(args.work_dir, ir_save_file)]
+
+    # partition model
+    partition_cfgs = get_partition_config(deploy_cfg)
+
+    if partition_cfgs is not None:
+
+        if 'partition_cfg' in partition_cfgs:
+            partition_cfgs = partition_cfgs.get('partition_cfg', None)
+        else:
+            assert 'type' in partition_cfgs
+            partition_cfgs = get_predefined_partition_cfg(
+                deploy_cfg, partition_cfgs['type'])
+
+        origin_ir_file = ir_files[0]
+        ir_files = []
+        for partition_cfg in partition_cfgs:
+            save_file = partition_cfg['save_file']
+            save_path = osp.join(args.work_dir, save_file)
+            start = partition_cfg['start']
+            end = partition_cfg['end']
+            dynamic_axes = partition_cfg.get('dynamic_axes', None)
+
+            extract_model(
+                origin_ir_file,
+                start,
+                end,
+                dynamic_axes=dynamic_axes,
+                save_file=save_path)
+
+            ir_files.append(save_path)
+
+    # calib data
+    calib_filename = get_calib_filename(deploy_cfg)
+    if calib_filename is not None:
+        calib_path = osp.join(args.work_dir, calib_filename)
+        create_calib_input_data(
+            calib_path,
+            deploy_cfg_path,
+            model_cfg_path,
+            checkpoint_path,
+            dataset_cfg=args.calib_dataset_cfg,
+            dataset_type='val',
+            device=args.device)
+
+    backend_files = ir_files
+    # convert backend
+    backend = get_backend(deploy_cfg)
+
+    # preprocess deploy_cfg
+    if backend == Backend.RKNN:
+        # TODO: Add this to task_processor in the future
+        import tempfile
+
+        from mmdeploy.utils import (get_common_config, get_normalization,
+                                    get_quantization_config,
+                                    get_rknn_quantization)
+        quantization_cfg = get_quantization_config(deploy_cfg)
+        common_params = get_common_config(deploy_cfg)
+        if get_rknn_quantization(deploy_cfg) is True:
+            transform = get_normalization(model_cfg)
+            common_params.update(
+                dict(
+                    mean_values=[transform['mean']],
+                    std_values=[transform['std']]))
+
+        dataset_file = tempfile.NamedTemporaryFile(suffix='.txt').name
+        with open(dataset_file, 'w') as f:
+            f.writelines([osp.abspath(args.img)])
+        quantization_cfg.setdefault('dataset', dataset_file)
+    if backend == Backend.ASCEND:
+        # TODO: Add this to backend manager in the future
+        if args.dump_info:
+            from mmdeploy.backend.ascend import update_sdk_pipeline
+            update_sdk_pipeline(args.work_dir)
+
+    # convert to backend
+    PIPELINE_MANAGER.set_log_level(log_level, [to_backend])
+    if backend == Backend.TENSORRT:
+        PIPELINE_MANAGER.enable_multiprocess(True, [to_backend])
+    backend_files = to_backend(
+        backend,
+        ir_files,
+        work_dir=args.work_dir,
+        deploy_cfg=deploy_cfg,
+        log_level=log_level,
+        device=args.device,
+        uri=args.uri)
+
+    # ncnn quantization
+    if backend == Backend.NCNN and quant:
+        from onnx2ncnn_quant_table import get_table
+
+        from mmdeploy.apis.ncnn import get_quant_model_file, ncnn2int8
+        model_param_paths = backend_files[::2]
+        model_bin_paths = backend_files[1::2]
+        backend_files = []
+        for onnx_path, model_param_path, model_bin_path in zip(
+                ir_files, model_param_paths, model_bin_paths):
+
+            deploy_cfg, model_cfg = load_config(deploy_cfg_path,
+                                                model_cfg_path)
+            quant_onnx, quant_table, quant_param, quant_bin = get_quant_model_file(  # noqa: E501
+                onnx_path, args.work_dir)
+
+            create_process(
+                'ncnn quant table',
+                target=get_table,
+                args=(onnx_path, deploy_cfg, model_cfg, quant_onnx,
+                      quant_table, quant_image_dir, args.device),
+                kwargs=dict(),
+                ret_value=ret_value)
+
+            create_process(
+                'ncnn_int8',
+                target=ncnn2int8,
+                args=(model_param_path, model_bin_path, quant_table,
+                      quant_param, quant_bin),
+                kwargs=dict(),
+                ret_value=ret_value)
+            backend_files += [quant_param, quant_bin]
+
+    if args.test_img is None:
+        args.test_img = args.img
+
+    extra = dict(
+        backend=backend,
+        output_file=osp.join(args.work_dir, f'output_{backend.value}.jpg'),
+        show_result=args.show)
+    if backend == Backend.SNPE:
+        extra['uri'] = args.uri
+
+    # get backend inference result, try render
+    create_process(
+        f'visualize {backend.value} model',
+        target=visualize_model,
+        args=(model_cfg_path, deploy_cfg_path, backend_files, args.test_img,
+              args.device),
+        kwargs=extra,
+        ret_value=ret_value)
+
+    # get pytorch model inference result, try visualize if possible
+    create_process(
+        'visualize pytorch model',
+        target=visualize_model,
+        args=(model_cfg_path, deploy_cfg_path, [checkpoint_path],
+              args.test_img, args.device),
+        kwargs=dict(
+            backend=Backend.PYTORCH,
+            output_file=osp.join(args.work_dir, 'output_pytorch.jpg'),
+            show_result=args.show),
+        ret_value=ret_value)
+    logger.info('All process success.')
+
+
+if __name__ == '__main__':
+    main()
--- a/segmentation/deploy/configs/_base_/backends/tensorrt.py
+++ b/segmentation/deploy/configs/_base_/backends/tensorrt.py
+backend_config = dict(
+    type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0))
--- a/segmentation/deploy/configs/_base_/onnx_config.py
+++ b/segmentation/deploy/configs/_base_/onnx_config.py
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
--- a/segmentation/deploy/configs/mmseg/segmentation_static.py
+++ b/segmentation/deploy/configs/mmseg/segmentation_static.py
+_base_ = ['../_base_/onnx_config.py']
+codebase_config = dict(type='mmseg', task='Segmentation', with_argmax=True)
--- a/segmentation/deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py
+++ b/segmentation/deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py
+_base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt.py']
+
+onnx_config = dict(input_shape=[512, 512])
+backend_config = dict(
+    common_config=dict(max_workspace_size=1 << 30),
+    model_inputs=[
+        dict(
+            input_shapes=dict(
+                input=dict(
+                    min_shape=[1, 3, 512, 512],
+                    opt_shape=[1, 3, 512, 512],
+                    max_shape=[1, 3, 512, 512])))
+    ])